1 /* Charset handling for GNU tar.
3 Copyright 2004-2024 Free Software Foundation, Inc.
5 This file is part of GNU tar.
7 GNU tar is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU tar is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include <localcharset.h>
35 # define iconv_open(tocode, fromcode) ((iconv_t) -1)
38 # define iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft) (errno = ENOSYS, (size_t) -1)
41 # define iconv_close(cd) 0
51 static iconv_t conv_desc
[2] = { (iconv_t
) -1, (iconv_t
) -1 };
54 utf8_init (bool to_utf
)
56 if (conv_desc
[(int) to_utf
] == (iconv_t
) -1)
59 conv_desc
[(int) to_utf
] = iconv_open ("UTF-8", locale_charset ());
61 conv_desc
[(int) to_utf
] = iconv_open (locale_charset (), "UTF-8");
63 return conv_desc
[(int) to_utf
];
67 utf8_convert (bool to_utf
, char const *input
, char **output
)
73 iconv_t cd
= utf8_init (to_utf
);
77 *output
= xstrdup (input
);
80 else if (cd
== (iconv_t
)-1)
83 inlen
= strlen (input
) + 1;
84 outlen
= inlen
* MB_LEN_MAX
+ 1;
85 ob
= ret
= xmalloc (outlen
);
86 ib
= (char ICONV_CONST
*) input
;
87 /* According to POSIX, "if iconv() encounters a character in the input
88 buffer that is valid, but for which an identical character does not
89 exist in the target codeset, iconv() shall perform an
90 implementation-defined conversion on this character." It will "update
91 the variables pointed to by the arguments to reflect the extent of the
92 conversion and return the number of non-identical conversions performed".
93 On error, it returns -1.
94 In other words, non-zero return always indicates failure, either because
95 the input was not fully converted, or because it was converted in a
98 if (iconv (cd
, &ib
, &inlen
, &ob
, &outlen
) != 0)
110 string_ascii_p (char const *p
)