Update, sync with libidn.texi.
[libidn.git] / examples / example2.c
blob6076071db318a4c3c4d563ca019aac8c2d5c7d48
1 /* example2.c --- Example code showing how to use punycode.
2 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson
3 * Copyright (C) 2002 Adam M. Costello
5 * This file is part of GNU Libidn.
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <locale.h> /* setlocale() */
25 * This file is derived from RFC 3492 written by Adam M. Costello.
27 * Disclaimer and license: Regarding this entire document or any
28 * portion of it (including the pseudocode and C code), the author
29 * makes no guarantees and is not responsible for any damage resulting
30 * from its use. The author grants irrevocable permission to anyone
31 * to use, modify, and distribute it in any way that does not diminish
32 * the rights of anyone else to use, modify, and distribute it,
33 * provided that redistributed derivative works do not contain
34 * misleading author or version information. Derivative works need
35 * not be licensed under similar terms.
39 #include <assert.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
44 #include <punycode.h>
46 /* For testing, we'll just set some compile-time limits rather than */
47 /* use malloc(), and set a compile-time option rather than using a */
48 /* command-line option. */
50 enum
52 unicode_max_length = 256,
53 ace_max_length = 256
56 static void
57 usage (char **argv)
59 fprintf (stderr,
60 "\n"
61 "%s -e reads code points and writes a Punycode string.\n"
62 "%s -d reads a Punycode string and writes code points.\n"
63 "\n"
64 "Input and output are plain text in the native character set.\n"
65 "Code points are in the form u+hex separated by whitespace.\n"
66 "Although the specification allows Punycode strings to contain\n"
67 "any characters from the ASCII repertoire, this test code\n"
68 "supports only the printable characters, and needs the Punycode\n"
69 "string to be followed by a newline.\n"
70 "The case of the u in u+hex is the force-to-uppercase flag.\n",
71 argv[0], argv[0]);
72 exit (EXIT_FAILURE);
75 static void
76 fail (const char *msg)
78 fputs (msg, stderr);
79 exit (EXIT_FAILURE);
82 static const char too_big[] =
83 "input or output is too large, recompile with larger limits\n";
84 static const char invalid_input[] = "invalid input\n";
85 static const char overflow[] = "arithmetic overflow\n";
86 static const char io_error[] = "I/O error\n";
88 /* The following string is used to convert printable */
89 /* characters between ASCII and the native charset: */
91 static const char print_ascii[] = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" " !\"#$%&'()*+,-./" "0123456789:;<=>?" "\0x40" /* at sign */
92 "ABCDEFGHIJKLMNO"
93 "PQRSTUVWXYZ[\\]^_" "`abcdefghijklmno" "pqrstuvwxyz{|}~\n";
95 int
96 main (int argc, char **argv)
98 enum punycode_status status;
99 int r;
100 size_t input_length, output_length, j;
101 unsigned char case_flags[unicode_max_length];
103 setlocale (LC_ALL, "");
105 if (argc != 2)
106 usage (argv);
107 if (argv[1][0] != '-')
108 usage (argv);
109 if (argv[1][2] != 0)
110 usage (argv);
112 if (argv[1][1] == 'e')
114 uint32_t input[unicode_max_length];
115 unsigned long codept;
116 char output[ace_max_length + 1], uplus[3];
117 int c;
119 /* Read the input code points: */
121 input_length = 0;
123 for (;;)
125 r = scanf ("%2s%lx", uplus, &codept);
126 if (ferror (stdin))
127 fail (io_error);
128 if (r == EOF || r == 0)
129 break;
131 if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1)
133 fail (invalid_input);
136 if (input_length == unicode_max_length)
137 fail (too_big);
139 if (uplus[0] == 'u')
140 case_flags[input_length] = 0;
141 else if (uplus[0] == 'U')
142 case_flags[input_length] = 1;
143 else
144 fail (invalid_input);
146 input[input_length++] = codept;
149 /* Encode: */
151 output_length = ace_max_length;
152 status = punycode_encode (input_length, input, case_flags,
153 &output_length, output);
154 if (status == punycode_bad_input)
155 fail (invalid_input);
156 if (status == punycode_big_output)
157 fail (too_big);
158 if (status == punycode_overflow)
159 fail (overflow);
160 assert (status == punycode_success);
162 /* Convert to native charset and output: */
164 for (j = 0; j < output_length; ++j)
166 c = output[j];
167 assert (c >= 0 && c <= 127);
168 if (print_ascii[c] == 0)
169 fail (invalid_input);
170 output[j] = print_ascii[c];
173 output[j] = 0;
174 r = puts (output);
175 if (r == EOF)
176 fail (io_error);
177 return EXIT_SUCCESS;
180 if (argv[1][1] == 'd')
182 char input[ace_max_length + 2], *p, *pp;
183 uint32_t output[unicode_max_length];
185 /* Read the Punycode input string and convert to ASCII: */
187 fgets (input, ace_max_length + 2, stdin);
188 if (ferror (stdin))
189 fail (io_error);
190 if (feof (stdin))
191 fail (invalid_input);
192 input_length = strlen (input) - 1;
193 if (input[input_length] != '\n')
194 fail (too_big);
195 input[input_length] = 0;
197 for (p = input; *p != 0; ++p)
199 pp = strchr (print_ascii, *p);
200 if (pp == 0)
201 fail (invalid_input);
202 *p = pp - print_ascii;
205 /* Decode: */
207 output_length = unicode_max_length;
208 status = punycode_decode (input_length, input, &output_length,
209 output, case_flags);
210 if (status == punycode_bad_input)
211 fail (invalid_input);
212 if (status == punycode_big_output)
213 fail (too_big);
214 if (status == punycode_overflow)
215 fail (overflow);
216 assert (status == punycode_success);
218 /* Output the result: */
220 for (j = 0; j < output_length; ++j)
222 r = printf ("%s+%04lX\n",
223 case_flags[j] ? "U" : "u", (unsigned long) output[j]);
224 if (r < 0)
225 fail (io_error);
228 return EXIT_SUCCESS;
231 usage (argv);
232 return EXIT_SUCCESS; /* not reached, but quiets compiler warning */