Fix description of the CACHEDIR.TAG file.
[tar.git] / src / utf8.c
blob568143203642d406318ba1649761f03cd9975d85
1 /* Charset handling for GNU tar.
3 Copyright 2004-2024 Free Software Foundation, Inc.
5 This file is part of GNU tar.
7 GNU tar is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 GNU tar is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include <system.h>
21 #include <quotearg.h>
22 #include <localcharset.h>
23 #include "common.h"
24 #ifdef HAVE_ICONV_H
25 # include <iconv.h>
26 #endif
28 #ifndef ICONV_CONST
29 # define ICONV_CONST
30 #endif
32 #ifndef HAVE_ICONV
34 # undef iconv_open
35 # define iconv_open(tocode, fromcode) ((iconv_t) -1)
37 # undef iconv
38 # define iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft) (errno = ENOSYS, (size_t) -1)
40 # undef iconv_close
41 # define iconv_close(cd) 0
43 # undef iconv_t
44 # define iconv_t int
46 #endif
51 static iconv_t conv_desc[2] = { (iconv_t) -1, (iconv_t) -1 };
53 static iconv_t
54 utf8_init (bool to_utf)
56 if (conv_desc[(int) to_utf] == (iconv_t) -1)
58 if (to_utf)
59 conv_desc[(int) to_utf] = iconv_open ("UTF-8", locale_charset ());
60 else
61 conv_desc[(int) to_utf] = iconv_open (locale_charset (), "UTF-8");
63 return conv_desc[(int) to_utf];
66 bool
67 utf8_convert (bool to_utf, char const *input, char **output)
69 char ICONV_CONST *ib;
70 char *ob, *ret;
71 size_t inlen;
72 size_t outlen;
73 iconv_t cd = utf8_init (to_utf);
75 if (cd == 0)
77 *output = xstrdup (input);
78 return true;
80 else if (cd == (iconv_t)-1)
81 return false;
83 inlen = strlen (input) + 1;
84 outlen = inlen * MB_LEN_MAX + 1;
85 ob = ret = xmalloc (outlen);
86 ib = (char ICONV_CONST *) input;
87 /* According to POSIX, "if iconv() encounters a character in the input
88 buffer that is valid, but for which an identical character does not
89 exist in the target codeset, iconv() shall perform an
90 implementation-defined conversion on this character." It will "update
91 the variables pointed to by the arguments to reflect the extent of the
92 conversion and return the number of non-identical conversions performed".
93 On error, it returns -1.
94 In other words, non-zero return always indicates failure, either because
95 the input was not fully converted, or because it was converted in a
96 non-reversible way.
98 if (iconv (cd, &ib, &inlen, &ob, &outlen) != 0)
100 free (ret);
101 return false;
103 *ob = 0;
104 *output = ret;
105 return true;
109 bool
110 string_ascii_p (char const *p)
112 for (; *p; p++)
113 if (*p & ~0x7f)
114 return false;
115 return true;