examples/example2.c

   1 /* example2.c --- Example code showing how to use punycode.
   2  * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
   3  * Copyright (C) 2002  Adam M. Costello
   4  *
   5  * This file is part of GNU Libidn.
   6  *
   7  * This program is free software: you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation, either version 3 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  19  *
  20  */
  21
  22 #include <locale.h>             /* setlocale() */
  23
  24 /*
  25  * This file is derived from RFC 3492 written by Adam M. Costello.
  26  *
  27  * Disclaimer and license: Regarding this entire document or any
  28  * portion of it (including the pseudocode and C code), the author
  29  * makes no guarantees and is not responsible for any damage resulting
  30  * from its use.  The author grants irrevocable permission to anyone
  31  * to use, modify, and distribute it in any way that does not diminish
  32  * the rights of anyone else to use, modify, and distribute it,
  33  * provided that redistributed derivative works do not contain
  34  * misleading author or version information.  Derivative works need
  35  * not be licensed under similar terms.
  36  *
  37  */
  38
  39 #include <assert.h>
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <string.h>
  43
  44 #include <punycode.h>
  45
  46 /* For testing, we'll just set some compile-time limits rather than */
  47 /* use malloc(), and set a compile-time option rather than using a  */
  48 /* command-line option.                                             */
  49
  50 enum
  51 {
  52   unicode_max_length = 256,
  53   ace_max_length = 256
  54 };
  55
  56 static void
  57 usage (char **argv)
  58 {
  59   fprintf (stderr,
  60            "\n"
  61            "%s -e reads code points and writes a Punycode string.\n"
  62            "%s -d reads a Punycode string and writes code points.\n"
  63            "\n"
  64            "Input and output are plain text in the native character set.\n"
  65            "Code points are in the form u+hex separated by whitespace.\n"
  66            "Although the specification allows Punycode strings to contain\n"
  67            "any characters from the ASCII repertoire, this test code\n"
  68            "supports only the printable characters, and needs the Punycode\n"
  69            "string to be followed by a newline.\n"
  70            "The case of the u in u+hex is the force-to-uppercase flag.\n",
  71            argv[0], argv[0]);
  72   exit (EXIT_FAILURE);
  73 }
  74
  75 static void
  76 fail (const char *msg)
  77 {
  78   fputs (msg, stderr);
  79   exit (EXIT_FAILURE);
  80 }
  81
  82 static const char too_big[] =
  83   "input or output is too large, recompile with larger limits\n";
  84 static const char invalid_input[] = "invalid input\n";
  85 static const char overflow[] = "arithmetic overflow\n";
  86 static const char io_error[] = "I/O error\n";
  87
  88 /* The following string is used to convert printable */
  89 /* characters between ASCII and the native charset:  */
  90
  91 static const char print_ascii[] = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" " !\"#$%&'()*+,-./" "0123456789:;<=>?" "\0x40"  /* at sign */
  92   "ABCDEFGHIJKLMNO"
  93   "PQRSTUVWXYZ[\\]^_" "`abcdefghijklmno" "pqrstuvwxyz{|}~\n";
  94
  95 int
  96 main (int argc, char **argv)
  97 {
  98   enum punycode_status status;
  99   int r;
 100   size_t input_length, output_length, j;
 101   unsigned char case_flags[unicode_max_length];
 102
 103   setlocale (LC_ALL, "");
 104
 105   if (argc != 2)
 106     usage (argv);
 107   if (argv[1][0] != '-')
 108     usage (argv);
 109   if (argv[1][2] != 0)
 110     usage (argv);
 111
 112   if (argv[1][1] == 'e')
 113     {
 114       uint32_t input[unicode_max_length];
 115       unsigned long codept;
 116       char output[ace_max_length + 1], uplus[3];
 117       int c;
 118
 119       /* Read the input code points: */
 120
 121       input_length = 0;
 122
 123       for (;;)
 124         {
 125           r = scanf ("%2s%lx", uplus, &codept);
 126           if (ferror (stdin))
 127             fail (io_error);
 128           if (r == EOF || r == 0)
 129             break;
 130
 131           if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1)
 132             {
 133               fail (invalid_input);
 134             }
 135
 136           if (input_length == unicode_max_length)
 137             fail (too_big);
 138
 139           if (uplus[0] == 'u')
 140             case_flags[input_length] = 0;
 141           else if (uplus[0] == 'U')
 142             case_flags[input_length] = 1;
 143           else
 144             fail (invalid_input);
 145
 146           input[input_length++] = codept;
 147         }
 148
 149       /* Encode: */
 150
 151       output_length = ace_max_length;
 152       status = punycode_encode (input_length, input, case_flags,
 153                                 &output_length, output);
 154       if (status == punycode_bad_input)
 155         fail (invalid_input);
 156       if (status == punycode_big_output)
 157         fail (too_big);
 158       if (status == punycode_overflow)
 159         fail (overflow);
 160       assert (status == punycode_success);
 161
 162       /* Convert to native charset and output: */
 163
 164       for (j = 0; j < output_length; ++j)
 165         {
 166           c = output[j];
 167           assert (c >= 0 && c <= 127);
 168           if (print_ascii[c] == 0)
 169             fail (invalid_input);
 170           output[j] = print_ascii[c];
 171         }
 172
 173       output[j] = 0;
 174       r = puts (output);
 175       if (r == EOF)
 176         fail (io_error);
 177       return EXIT_SUCCESS;
 178     }
 179
 180   if (argv[1][1] == 'd')
 181     {
 182       char input[ace_max_length + 2], *p, *pp;
 183       uint32_t output[unicode_max_length];
 184
 185       /* Read the Punycode input string and convert to ASCII: */
 186
 187       fgets (input, ace_max_length + 2, stdin);
 188       if (ferror (stdin))
 189         fail (io_error);
 190       if (feof (stdin))
 191         fail (invalid_input);
 192       input_length = strlen (input) - 1;
 193       if (input[input_length] != '\n')
 194         fail (too_big);
 195       input[input_length] = 0;
 196
 197       for (p = input; *p != 0; ++p)
 198         {
 199           pp = strchr (print_ascii, *p);
 200           if (pp == 0)
 201             fail (invalid_input);
 202           *p = pp - print_ascii;
 203         }
 204
 205       /* Decode: */
 206
 207       output_length = unicode_max_length;
 208       status = punycode_decode (input_length, input, &output_length,
 209                                 output, case_flags);
 210       if (status == punycode_bad_input)
 211         fail (invalid_input);
 212       if (status == punycode_big_output)
 213         fail (too_big);
 214       if (status == punycode_overflow)
 215         fail (overflow);
 216       assert (status == punycode_success);
 217
 218       /* Output the result: */
 219
 220       for (j = 0; j < output_length; ++j)
 221         {
 222           r = printf ("%s+%04lX\n",
 223                       case_flags[j] ? "U" : "u", (unsigned long) output[j]);
 224           if (r < 0)
 225             fail (io_error);
 226         }
 227
 228       return EXIT_SUCCESS;
 229     }
 230
 231   usage (argv);
 232   return EXIT_SUCCESS;          /* not reached, but quiets compiler warning */
 233 }