1 /* Character set conversion with error handling.
2 Copyright (C) 2001-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible and Simon Josefsson.
5 This file is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation; either version 2.1 of the
8 License, or (at your option) any later version.
10 This file is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
21 #include "striconveh.h"
32 #include "c-strcase.h"
33 #include "c-strcaseeq.h"
36 # define SIZE_MAX ((size_t) -1)
42 /* The caller must provide an iconveh_t, not just an iconv_t, because when a
43 conversion error occurs, we may have to determine the Unicode representation
44 of the inconvertible character. */
47 iconveh_open (const char *to_codeset
, const char *from_codeset
, iconveh_t
*cdp
)
53 /* Avoid glibc-2.1 bug with EUC-KR. */
54 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
55 && !defined _LIBICONV_VERSION
56 if (c_strcasecmp (from_codeset
, "EUC-KR") == 0
57 || c_strcasecmp (to_codeset
, "EUC-KR") == 0)
64 cd
= iconv_open (to_codeset
, from_codeset
);
66 if (STRCASEEQ (from_codeset
, "UTF-8", 'U','T','F','-','8',0,0,0,0))
70 cd1
= iconv_open ("UTF-8", from_codeset
);
71 if (cd1
== (iconv_t
)(-1))
73 int saved_errno
= errno
;
74 if (cd
!= (iconv_t
)(-1))
81 if (STRCASEEQ (to_codeset
, "UTF-8", 'U','T','F','-','8',0,0,0,0)
82 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
83 && !defined __UCLIBC__) \
84 || _LIBICONV_VERSION >= 0x0105 \
85 || defined ICONV_SET_TRANSLITERATE
86 || c_strcasecmp (to_codeset
, "UTF-8//TRANSLIT") == 0
92 cd2
= iconv_open (to_codeset
, "UTF-8");
93 if (cd2
== (iconv_t
)(-1))
95 int saved_errno
= errno
;
96 if (cd1
!= (iconv_t
)(-1))
98 if (cd
!= (iconv_t
)(-1))
112 iconveh_close (const iconveh_t
*cd
)
114 if (cd
->cd2
!= (iconv_t
)(-1) && iconv_close (cd
->cd2
) < 0)
116 /* Return -1, but preserve the errno from iconv_close. */
117 int saved_errno
= errno
;
118 if (cd
->cd1
!= (iconv_t
)(-1))
119 iconv_close (cd
->cd1
);
120 if (cd
->cd
!= (iconv_t
)(-1))
121 iconv_close (cd
->cd
);
125 if (cd
->cd1
!= (iconv_t
)(-1) && iconv_close (cd
->cd1
) < 0)
127 /* Return -1, but preserve the errno from iconv_close. */
128 int saved_errno
= errno
;
129 if (cd
->cd
!= (iconv_t
)(-1))
130 iconv_close (cd
->cd
);
134 if (cd
->cd
!= (iconv_t
)(-1) && iconv_close (cd
->cd
) < 0)
139 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
140 a conversion error, and it returns in *INCREMENTED a boolean telling whether
141 it has incremented the input pointers past the error location. */
142 # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
143 && !(defined __GLIBC__ && !defined __UCLIBC__)
144 /* Irix iconv() inserts a NUL byte if it cannot convert.
145 NetBSD iconv() inserts a question mark if it cannot convert.
146 Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc are
147 known to prefer to fail rather than doing a lossy conversion. */
149 iconv_carefully (iconv_t cd
,
150 const char **inbuf
, size_t *inbytesleft
,
151 char **outbuf
, size_t *outbytesleft
,
154 const char *inptr
= *inbuf
;
155 const char *inptr_end
= inptr
+ *inbytesleft
;
156 char *outptr
= *outbuf
;
157 size_t outsize
= *outbytesleft
;
158 const char *inptr_before
;
165 inptr_before
= inptr
;
168 for (insize
= 1; inptr
+ insize
<= inptr_end
; insize
++)
171 (ICONV_CONST
char **) &inptr
, &insize
,
173 if (!(res
== (size_t)(-1) && errno
== EINVAL
))
175 /* iconv can eat up a shift sequence but give EINVAL while attempting
176 to convert the first character. E.g. libiconv does this. */
177 if (inptr
> inptr_before
)
187 *outbytesleft
= outsize
;
190 while (res
== 0 && inptr
< inptr_end
);
193 *inbytesleft
= inptr_end
- inptr
;
194 if (res
!= (size_t)(-1) && res
> 0)
196 /* iconv() has already incremented INPTR. We cannot go back to a
197 previous INPTR, otherwise the state inside CD would become invalid,
198 if FROM_CODESET is a stateful encoding. So, tell the caller that
199 *INBUF has already been incremented. */
200 *incremented
= (inptr
> inptr_before
);
206 *incremented
= false;
211 # define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
212 (*(incremented) = false, \
213 iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
216 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
217 converting one character or one shift sequence. */
219 iconv_carefully_1 (iconv_t cd
,
220 const char **inbuf
, size_t *inbytesleft
,
221 char **outbuf
, size_t *outbytesleft
,
224 const char *inptr_before
= *inbuf
;
225 const char *inptr
= inptr_before
;
226 const char *inptr_end
= inptr_before
+ *inbytesleft
;
227 char *outptr
= *outbuf
;
228 size_t outsize
= *outbytesleft
;
229 size_t res
= (size_t)(-1);
232 for (insize
= 1; inptr_before
+ insize
<= inptr_end
; insize
++)
234 inptr
= inptr_before
;
236 (ICONV_CONST
char **) &inptr
, &insize
,
238 if (!(res
== (size_t)(-1) && errno
== EINVAL
))
240 /* iconv can eat up a shift sequence but give EINVAL while attempting
241 to convert the first character. E.g. libiconv does this. */
242 if (inptr
> inptr_before
)
250 *inbytesleft
= inptr_end
- inptr
;
251 # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
252 && !(defined __GLIBC__ && !defined __UCLIBC__)
253 /* Irix iconv() inserts a NUL byte if it cannot convert.
254 NetBSD iconv() inserts a question mark if it cannot convert.
255 Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc are
256 known to prefer to fail rather than doing a lossy conversion. */
257 if (res
!= (size_t)(-1) && res
> 0)
259 /* iconv() has already incremented INPTR. We cannot go back to a
260 previous INPTR, otherwise the state inside CD would become invalid,
261 if FROM_CODESET is a stateful encoding. So, tell the caller that
262 *INBUF has already been incremented. */
263 *incremented
= (inptr
> inptr_before
);
269 if (res
!= (size_t)(-1))
272 *outbytesleft
= outsize
;
274 *incremented
= false;
278 /* utf8conv_carefully is like iconv, except that
279 - it converts from UTF-8 to UTF-8,
280 - it stops as soon as it encounters a conversion error, and it returns
281 in *INCREMENTED a boolean telling whether it has incremented the input
282 pointers past the error location,
283 - if one_character_only is true, it stops after converting one
286 utf8conv_carefully (bool one_character_only
,
287 const char **inbuf
, size_t *inbytesleft
,
288 char **outbuf
, size_t *outbytesleft
,
291 const char *inptr
= *inbuf
;
292 size_t insize
= *inbytesleft
;
293 char *outptr
= *outbuf
;
294 size_t outsize
= *outbytesleft
;
304 n
= u8_mbtoucr (&uc
, (const uint8_t *) inptr
, insize
);
307 errno
= (n
== -2 ? EINVAL
: EILSEQ
);
308 n
= u8_mbtouc (&uc
, (const uint8_t *) inptr
, insize
);
319 *incremented
= false;
322 m
= u8_uctomb ((uint8_t *) outptr
, uc
, outsize
);
327 *incremented
= false;
342 while (!one_character_only
&& insize
> 0);
345 *inbytesleft
= insize
;
347 *outbytesleft
= outsize
;
352 mem_cd_iconveh_internal (const char *src
, size_t srclen
,
353 iconv_t cd
, iconv_t cd1
, iconv_t cd2
,
354 enum iconv_ilseq_handler handler
,
357 char **resultp
, size_t *lengthp
)
359 /* When a conversion error occurs, we cannot start using CD1 and CD2 at
360 this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
361 Instead, we have to start afresh from the beginning of SRC. */
362 /* Use a temporary buffer, so that for small strings, a single malloc()
363 call will be sufficient. */
364 # define tmpbufsize 4096
365 /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
366 libiconv's UCS-4-INTERNAL encoding. */
367 union { unsigned int align
; char buf
[tmpbufsize
]; } tmp
;
368 # define tmpbuf tmp.buf
370 char *initial_result
;
374 size_t last_length
= (size_t)(-1); /* only needed if offsets != NULL */
376 if (*resultp
!= NULL
&& *lengthp
>= sizeof (tmpbuf
))
378 initial_result
= *resultp
;
379 allocated
= *lengthp
;
383 initial_result
= tmpbuf
;
384 allocated
= sizeof (tmpbuf
);
386 result
= initial_result
;
388 /* Test whether a direct conversion is possible at all. */
389 if (cd
== (iconv_t
)(-1))
396 for (i
= 0; i
< srclen
; i
++)
397 offsets
[i
] = (size_t)(-1);
399 last_length
= (size_t)(-1);
403 /* First, try a direct conversion, and see whether a conversion error
406 const char *inptr
= src
;
407 size_t insize
= srclen
;
409 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
410 # if defined _LIBICONV_VERSION \
411 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
413 /* Set to the initial state. */
414 iconv (cd
, NULL
, NULL
, NULL
, NULL
);
419 char *outptr
= result
+ length
;
420 size_t outsize
= allocated
- extra_alloc
- length
;
427 if (length
!= last_length
) /* ensure that offset[] be increasing */
429 offsets
[inptr
- src
] = length
;
430 last_length
= length
;
432 res
= iconv_carefully_1 (cd
,
438 /* Use iconv_carefully instead of iconv here, because:
439 - If TO_CODESET is UTF-8, we can do the error handling in this
440 loop, no need for a second loop,
441 - With iconv() implementations other than GNU libiconv and GNU
442 libc, if we use iconv() in a big swoop, checking for an E2BIG
443 return, we lose the number of irreversible conversions. */
444 res
= iconv_carefully (cd
,
449 length
= outptr
- result
;
450 grow
= (length
+ extra_alloc
> allocated
/ 2);
451 if (res
== (size_t)(-1))
455 else if (errno
== EINVAL
)
457 else if (errno
== EILSEQ
&& handler
!= iconveh_error
)
459 if (cd2
== (iconv_t
)(-1))
461 /* TO_CODESET is UTF-8. */
462 /* Error handling can produce up to 1 or 3 bytes of
465 (handler
== iconveh_replacement_character
? 3 : 1);
466 if (length
+ extra_need
+ extra_alloc
> allocated
)
470 allocated
= 2 * allocated
;
471 if (length
+ extra_need
+ extra_alloc
> allocated
)
472 allocated
= 2 * allocated
;
473 if (length
+ extra_need
+ extra_alloc
> allocated
)
475 if (result
== initial_result
)
476 memory
= (char *) malloc (allocated
);
478 memory
= (char *) realloc (result
, allocated
);
481 if (result
!= initial_result
)
486 if (result
== initial_result
)
487 memcpy (memory
, initial_result
, length
);
491 /* The input is invalid in FROM_CODESET. Eat up one byte
492 and emit a replacement character or a question mark. */
500 if (handler
== iconveh_replacement_character
)
502 /* U+FFFD in UTF-8 encoding. */
503 result
[length
+0] = '\357';
504 result
[length
+1] = '\277';
505 result
[length
+2] = '\275';
510 result
[length
] = '?';
519 if (result
!= initial_result
)
530 allocated
= 2 * allocated
;
531 if (result
== initial_result
)
532 memory
= (char *) malloc (allocated
);
534 memory
= (char *) realloc (result
, allocated
);
537 if (result
!= initial_result
)
542 if (result
== initial_result
)
543 memcpy (memory
, initial_result
, length
);
549 /* Now get the conversion state back to the initial state.
550 But avoid glibc-2.1 bug and Solaris 2.7 bug. */
551 #if defined _LIBICONV_VERSION \
552 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
556 char *outptr
= result
+ length
;
557 size_t outsize
= allocated
- extra_alloc
- length
;
560 res
= iconv (cd
, NULL
, NULL
, &outptr
, &outsize
);
561 length
= outptr
- result
;
562 if (res
== (size_t)(-1))
568 allocated
= 2 * allocated
;
569 if (result
== initial_result
)
570 memory
= (char *) malloc (allocated
);
572 memory
= (char *) realloc (result
, allocated
);
575 if (result
!= initial_result
)
580 if (result
== initial_result
)
581 memcpy (memory
, initial_result
, length
);
586 if (result
!= initial_result
)
596 /* The direct conversion succeeded. */
600 /* The direct conversion failed.
601 Use a conversion through UTF-8. */
606 for (i
= 0; i
< srclen
; i
++)
607 offsets
[i
] = (size_t)(-1);
609 last_length
= (size_t)(-1);
613 const bool slowly
= (offsets
!= NULL
|| handler
== iconveh_error
);
614 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
615 char utf8buf
[utf8bufsize
+ 3];
617 const char *in1ptr
= src
;
618 size_t in1size
= srclen
;
619 bool do_final_flush1
= true;
620 bool do_final_flush2
= true;
622 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
623 # if defined _LIBICONV_VERSION \
624 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
626 /* Set to the initial state. */
627 if (cd1
!= (iconv_t
)(-1))
628 iconv (cd1
, NULL
, NULL
, NULL
, NULL
);
629 if (cd2
!= (iconv_t
)(-1))
630 iconv (cd2
, NULL
, NULL
, NULL
, NULL
);
633 while (in1size
> 0 || do_final_flush1
|| utf8len
> 0 || do_final_flush2
)
635 char *out1ptr
= utf8buf
+ utf8len
;
636 size_t out1size
= utf8bufsize
- utf8len
;
641 /* Conversion step 1: from FROM_CODESET to UTF-8. */
645 && length
!= last_length
) /* ensure that offset[] be increasing */
647 offsets
[in1ptr
- src
] = length
;
648 last_length
= length
;
650 if (cd1
!= (iconv_t
)(-1))
653 res1
= iconv_carefully_1 (cd1
,
658 res1
= iconv_carefully (cd1
,
665 /* FROM_CODESET is UTF-8. */
666 res1
= utf8conv_carefully (slowly
,
672 else if (do_final_flush1
)
674 /* Now get the conversion state of CD1 back to the initial state.
675 But avoid glibc-2.1 bug and Solaris 2.7 bug. */
676 # if defined _LIBICONV_VERSION \
677 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
679 if (cd1
!= (iconv_t
)(-1))
680 res1
= iconv (cd1
, NULL
, NULL
, &out1ptr
, &out1size
);
684 do_final_flush1
= false;
692 if (res1
== (size_t)(-1)
693 && !(errno
== E2BIG
|| errno
== EINVAL
|| errno
== EILSEQ
))
695 if (result
!= initial_result
)
699 if (res1
== (size_t)(-1)
700 && errno
== EILSEQ
&& handler
!= iconveh_error
)
702 /* The input is invalid in FROM_CODESET. Eat up one byte and
703 emit a U+FFFD character or a question mark. Room for this
704 character was allocated at the end of utf8buf. */
712 if (handler
== iconveh_replacement_character
)
714 /* U+FFFD in UTF-8 encoding. */
725 utf8len
= out1ptr
- utf8buf
;
729 || utf8len
> utf8bufsize
/ 2
730 || (res1
== (size_t)(-1) && errno1
== E2BIG
))
732 /* Conversion step 2: from UTF-8 to TO_CODESET. */
733 const char *in2ptr
= utf8buf
;
734 size_t in2size
= utf8len
;
737 || (in1size
== 0 && !do_final_flush1
&& do_final_flush2
))
739 char *out2ptr
= result
+ length
;
740 size_t out2size
= allocated
- extra_alloc
- length
;
747 if (cd2
!= (iconv_t
)(-1))
748 res2
= iconv_carefully (cd2
,
753 /* TO_CODESET is UTF-8. */
754 res2
= utf8conv_carefully (false,
759 else /* in1size == 0 && !do_final_flush1
760 && in2size == 0 && do_final_flush2 */
762 /* Now get the conversion state of CD1 back to the initial
763 state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */
764 # if defined _LIBICONV_VERSION \
765 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
767 if (cd2
!= (iconv_t
)(-1))
768 res2
= iconv (cd2
, NULL
, NULL
, &out2ptr
, &out2size
);
772 do_final_flush2
= false;
776 length
= out2ptr
- result
;
777 grow
= (length
+ extra_alloc
> allocated
/ 2);
778 if (res2
== (size_t)(-1))
782 else if (errno
== EINVAL
)
784 else if (errno
== EILSEQ
&& handler
!= iconveh_error
)
786 /* Error handling can produce up to 10 bytes of UTF-8
787 output. But TO_CODESET may be UCS-2, UTF-16 or
788 UCS-4, so use CD2 here as well. */
798 if (u8_prev (&uc
, (const uint8_t *) in2ptr
,
799 (const uint8_t *) utf8buf
)
808 n
= u8_mbtouc_unsafe (&uc
, (const uint8_t *) in2ptr
,
814 if (handler
== iconveh_escape_sequence
)
816 static char const hex
[16] = "0123456789ABCDEF";
818 scratchbuf
[scratchlen
++] = '\\';
820 scratchbuf
[scratchlen
++] = 'u';
823 scratchbuf
[scratchlen
++] = 'U';
824 scratchbuf
[scratchlen
++] = hex
[(uc
>>28) & 15];
825 scratchbuf
[scratchlen
++] = hex
[(uc
>>24) & 15];
826 scratchbuf
[scratchlen
++] = hex
[(uc
>>20) & 15];
827 scratchbuf
[scratchlen
++] = hex
[(uc
>>16) & 15];
829 scratchbuf
[scratchlen
++] = hex
[(uc
>>12) & 15];
830 scratchbuf
[scratchlen
++] = hex
[(uc
>>8) & 15];
831 scratchbuf
[scratchlen
++] = hex
[(uc
>>4) & 15];
832 scratchbuf
[scratchlen
++] = hex
[uc
& 15];
834 else if (handler
== iconveh_replacement_character
)
836 /* U+FFFD in UTF-8 encoding. */
837 scratchbuf
[0] = '\357';
838 scratchbuf
[1] = '\277';
839 scratchbuf
[2] = '\275';
850 if (cd2
!= (iconv_t
)(-1))
852 char *out2ptr_try
= out2ptr
;
853 size_t out2size_try
= out2size
;
855 (ICONV_CONST
char **) &inptr
, &insize
,
856 &out2ptr_try
, &out2size_try
);
857 if (handler
== iconveh_replacement_character
858 && (res
== (size_t)(-1)
860 /* FreeBSD iconv(), NetBSD iconv(), and
861 Solaris 11 iconv() insert a '?' if they
862 cannot convert. This is what we want.
863 But IRIX iconv() inserts a NUL byte if it
865 And musl libc iconv() inserts a '*' if it
868 && !(out2ptr_try
- out2ptr
== 1
869 && *out2ptr
== '?'))))
871 /* The iconv() call failed.
872 U+FFFD can't be converted to TO_CODESET.
879 (ICONV_CONST
char **) &inptr
, &insize
,
880 &out2ptr
, &out2size
);
884 /* Accept the results of the iconv() call. */
885 out2ptr
= out2ptr_try
;
886 out2size
= out2size_try
;
892 /* TO_CODESET is UTF-8. */
893 if (out2size
>= insize
)
895 memcpy (out2ptr
, inptr
, insize
);
908 length
= out2ptr
- result
;
909 if (res
== (size_t)(-1) && errno
== E2BIG
)
913 allocated
= 2 * allocated
;
914 if (length
+ 1 + extra_alloc
> allocated
)
916 if (result
== initial_result
)
917 memory
= (char *) malloc (allocated
);
919 memory
= (char *) realloc (result
, allocated
);
922 if (result
!= initial_result
)
927 if (result
== initial_result
)
928 memcpy (memory
, initial_result
, length
);
932 out2ptr
= result
+ length
;
933 out2size
= allocated
- extra_alloc
- length
;
934 if (cd2
!= (iconv_t
)(-1))
936 (ICONV_CONST
char **) &inptr
,
938 &out2ptr
, &out2size
);
941 /* TO_CODESET is UTF-8. */
942 if (!(out2size
>= insize
))
944 memcpy (out2ptr
, inptr
, insize
);
951 length
= out2ptr
- result
;
953 # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
954 && !(defined __GLIBC__ && !defined __UCLIBC__)
955 /* IRIX iconv() inserts a NUL byte if it cannot convert.
956 FreeBSD iconv(), NetBSD iconv(), and Solaris 11
957 iconv() insert a '?' if they cannot convert.
958 musl libc iconv() inserts a '*' if it cannot convert.
959 Only GNU libiconv (excluding the bastard Apple iconv)
960 and GNU libc are known to prefer to fail rather than
961 doing a lossy conversion. */
962 if (res
!= (size_t)(-1) && res
> 0)
968 if (res
== (size_t)(-1))
970 /* Failure converting the ASCII replacement. */
971 if (result
!= initial_result
)
978 if (result
!= initial_result
)
984 || (in1size
== 0 && !do_final_flush1
&& do_final_flush2
)))
990 allocated
= 2 * allocated
;
991 if (result
== initial_result
)
992 memory
= (char *) malloc (allocated
);
994 memory
= (char *) realloc (result
, allocated
);
997 if (result
!= initial_result
)
1002 if (result
== initial_result
)
1003 memcpy (memory
, initial_result
, length
);
1008 /* Move the remaining bytes to the beginning of utf8buf. */
1010 memmove (utf8buf
, in2ptr
, in2size
);
1014 if (res1
== (size_t)(-1))
1016 if (errno1
== EINVAL
)
1018 else if (errno1
== EILSEQ
)
1020 if (result
!= initial_result
)
1031 /* Now the final memory allocation. */
1032 if (result
== tmpbuf
)
1034 size_t memsize
= length
+ extra_alloc
;
1036 if (*resultp
!= NULL
&& *lengthp
>= memsize
)
1042 memory
= (char *) malloc (memsize
> 0 ? memsize
: 1);
1051 memcpy (result
, tmpbuf
, length
);
1053 else if (result
!= *resultp
&& length
+ extra_alloc
< allocated
)
1055 /* Shrink the allocated memory if possible. */
1056 size_t memsize
= length
+ extra_alloc
;
1059 memory
= (char *) realloc (result
, memsize
> 0 ? memsize
: 1);
1071 mem_cd_iconveh (const char *src
, size_t srclen
,
1072 const iconveh_t
*cd
,
1073 enum iconv_ilseq_handler handler
,
1075 char **resultp
, size_t *lengthp
)
1077 return mem_cd_iconveh_internal (src
, srclen
, cd
->cd
, cd
->cd1
, cd
->cd2
,
1078 handler
, 0, offsets
, resultp
, lengthp
);
1082 str_cd_iconveh (const char *src
,
1083 const iconveh_t
*cd
,
1084 enum iconv_ilseq_handler handler
)
1086 /* For most encodings, a trailing NUL byte in the input will be converted
1087 to a trailing NUL byte in the output. But not for UTF-7. So that this
1088 function is usable for UTF-7, we have to exclude the NUL byte from the
1089 conversion and add it by hand afterwards. */
1090 char *result
= NULL
;
1092 int retval
= mem_cd_iconveh_internal (src
, strlen (src
),
1093 cd
->cd
, cd
->cd1
, cd
->cd2
, handler
, 1,
1094 NULL
, &result
, &length
);
1102 /* Add the terminating NUL byte. */
1103 result
[length
] = '\0';
1111 mem_iconveh (const char *src
, size_t srclen
,
1112 const char *from_codeset
, const char *to_codeset
,
1113 enum iconv_ilseq_handler handler
,
1115 char **resultp
, size_t *lengthp
)
1119 /* Nothing to convert. */
1123 else if (offsets
== NULL
&& c_strcasecmp (from_codeset
, to_codeset
) == 0)
1127 if (*resultp
!= NULL
&& *lengthp
>= srclen
)
1131 result
= (char *) malloc (srclen
);
1138 memcpy (result
, src
, srclen
);
1151 if (iconveh_open (to_codeset
, from_codeset
, &cd
) < 0)
1156 retval
= mem_cd_iconveh (src
, srclen
, &cd
, handler
, offsets
,
1161 /* Close cd, but preserve the errno from str_cd_iconv. */
1162 int saved_errno
= errno
;
1163 iconveh_close (&cd
);
1164 errno
= saved_errno
;
1168 if (iconveh_close (&cd
) < 0)
1170 if (result
!= *resultp
)
1179 /* This is a different error code than if iconv_open existed but didn't
1180 support from_codeset and to_codeset, so that the caller can emit
1181 an error message such as
1182 "iconv() is not supported. Installing GNU libiconv and
1183 then reinstalling this package would fix this." */
1191 str_iconveh (const char *src
,
1192 const char *from_codeset
, const char *to_codeset
,
1193 enum iconv_ilseq_handler handler
)
1195 if (*src
== '\0' || c_strcasecmp (from_codeset
, to_codeset
) == 0)
1197 char *result
= strdup (src
);
1209 if (iconveh_open (to_codeset
, from_codeset
, &cd
) < 0)
1212 result
= str_cd_iconveh (src
, &cd
, handler
);
1216 /* Close cd, but preserve the errno from str_cd_iconv. */
1217 int saved_errno
= errno
;
1218 iconveh_close (&cd
);
1219 errno
= saved_errno
;
1223 if (iconveh_close (&cd
) < 0)
1231 /* This is a different error code than if iconv_open existed but didn't
1232 support from_codeset and to_codeset, so that the caller can emit
1233 an error message such as
1234 "iconv() is not supported. Installing GNU libiconv and
1235 then reinstalling this package would fix this." */