1 /* Copyright (C) 2000-2020 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 /* Create a table from CHARSET to Unicode.
20 This is a good test for CHARSET's iconv() module, in particular the
21 FROM_LOOP BODY macro. */
30 /* If nonzero, ignore conversions outside Unicode plane 0. */
33 /* Converts a byte buffer to a hexadecimal string. */
35 hexbuf (unsigned char buf
[], unsigned int buflen
)
42 sprintf (msg
, "0x%02X", buf
[0]);
45 sprintf (msg
, "0x%02X%02X", buf
[0], buf
[1]);
48 sprintf (msg
, "0x%02X%02X%02X", buf
[0], buf
[1], buf
[2]);
51 sprintf (msg
, "0x%02X%02X%02X%02X", buf
[0], buf
[1], buf
[2], buf
[3]);
59 /* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (12 bytes)
60 using the conversion descriptor CD. Returns the number of written bytes,
61 or 0 if ambiguous, or -1 if invalid. */
63 try (iconv_t cd
, unsigned char buf
[], unsigned int buflen
, unsigned char *out
)
65 const char *inbuf
= (const char *) buf
;
66 size_t inbytesleft
= buflen
;
67 char *outbuf
= (char *) out
;
68 size_t outbytesleft
= 12;
71 iconv (cd
, NULL
, NULL
, NULL
, NULL
);
72 result
= iconv (cd
, (char **) &inbuf
, &inbytesleft
, &outbuf
, &outbytesleft
);
73 if (result
!= (size_t)(-1))
74 result
= iconv (cd
, NULL
, NULL
, &outbuf
, &outbytesleft
);
76 if (result
== (size_t)(-1))
82 else if (errno
== EINVAL
)
88 int saved_errno
= errno
;
89 fprintf (stderr
, "%s: iconv error: ", hexbuf (buf
, buflen
));
99 fprintf (stderr
, "%s: inbytes = %ld, outbytes = %ld\n",
100 hexbuf (buf
, buflen
),
101 (long) (buflen
- inbytesleft
),
102 (long) (12 - outbytesleft
));
105 return 12 - outbytesleft
;
109 /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
111 utf8_decode (const unsigned char *out
, unsigned int outlen
)
113 static char hexbuf
[84];
123 sprintf (p
, "0x%04X", out
[0]);
124 out
+= 1; outlen
-= 1;
126 else if (out
[0] >= 0xc0 && out
[0] < 0xe0 && outlen
>= 2)
128 sprintf (p
, "0x%04X", ((out
[0] & 0x1f) << 6) + (out
[1] & 0x3f));
129 out
+= 2; outlen
-= 2;
131 else if (out
[0] >= 0xe0 && out
[0] < 0xf0 && outlen
>= 3)
133 sprintf (p
, "0x%04X", ((out
[0] & 0x0f) << 12)
134 + ((out
[1] & 0x3f) << 6) + (out
[2] & 0x3f));
135 out
+= 3; outlen
-= 3;
137 else if (out
[0] >= 0xf0 && out
[0] < 0xf8 && outlen
>= 4)
139 sprintf (p
, "0x%04X", ((out
[0] & 0x07) << 18)
140 + ((out
[1] & 0x3f) << 12)
141 + ((out
[2] & 0x3f) << 6) + (out
[3] & 0x3f));
142 out
+= 4; outlen
-= 4;
144 else if (out
[0] >= 0xf8 && out
[0] < 0xfc && outlen
>= 5)
146 sprintf (p
, "0x%04X", ((out
[0] & 0x03) << 24)
147 + ((out
[1] & 0x3f) << 18)
148 + ((out
[2] & 0x3f) << 12)
149 + ((out
[3] & 0x3f) << 6) + (out
[4] & 0x3f));
150 out
+= 5; outlen
-= 5;
152 else if (out
[0] >= 0xfc && out
[0] < 0xfe && outlen
>= 6)
154 sprintf (p
, "0x%04X", ((out
[0] & 0x01) << 30)
155 + ((out
[1] & 0x3f) << 24)
156 + ((out
[2] & 0x3f) << 18)
157 + ((out
[3] & 0x3f) << 12)
158 + ((out
[4] & 0x3f) << 6) + (out
[5] & 0x3f));
159 out
+= 6; outlen
-= 6;
163 sprintf (p
, "0x????");
164 out
+= 1; outlen
-= 1;
167 if (bmp_only
&& strlen (p
) > 6)
168 /* Ignore conversions outside Unicode plane 0. */
178 main (int argc
, char *argv
[])
186 fprintf (stderr
, "Usage: tst-table-from charset\n");
191 cd
= iconv_open ("UTF-8", charset
);
192 if (cd
== (iconv_t
)(-1))
194 perror ("iconv_open");
198 /* When testing UTF-8 or GB18030, stop at 0x10000, otherwise the output
199 file gets too big. */
200 bmp_only
= (strcmp (charset
, "UTF-8") == 0
201 || strcmp (charset
, "GB18030") == 0);
202 search_depth
= (strcmp (charset
, "UTF-8") == 0 ? 3 : 4);
205 unsigned char out
[12];
206 unsigned char buf
[4];
207 unsigned int i0
, i1
, i2
, i3
;
210 for (i0
= 0; i0
< 0x100; i0
++)
213 result
= try (cd
, buf
, 1, out
);
219 const char *unicode
= utf8_decode (out
, result
);
221 printf ("0x%02X\t%s\n", i0
, unicode
);
225 for (i1
= 0; i1
< 0x100; i1
++)
228 result
= try (cd
, buf
, 2, out
);
234 const char *unicode
= utf8_decode (out
, result
);
236 printf ("0x%02X%02X\t%s\n", i0
, i1
, unicode
);
240 for (i2
= 0; i2
< 0x100; i2
++)
243 result
= try (cd
, buf
, 3, out
);
249 const char *unicode
= utf8_decode (out
, result
);
251 printf ("0x%02X%02X%02X\t%s\n",
252 i0
, i1
, i2
, unicode
);
254 else if (search_depth
> 3)
256 for (i3
= 0; i3
< 0x100; i3
++)
259 result
= try (cd
, buf
, 4, out
);
265 const char *unicode
=
266 utf8_decode (out
, result
);
268 printf ("0x%02X%02X%02X%02X\t%s\n",
269 i0
, i1
, i2
, i3
, unicode
);
274 "%s: incomplete byte sequence\n",
287 if (iconv_close (cd
) < 0)
289 perror ("iconv_close");
293 if (ferror (stdin
) || fflush (stdout
) || ferror (stdout
))
295 fprintf (stderr
, "I/O error\n");