example2.c

   1 /* example2.c   Example code showing how to use punycode.
   2  * Copyright (C) 2002, 2003  Simon Josefsson
   3  * Copyright (C) 2002  Adam M. Costello
   4  *
   5  * This file is part of GNU Libidn.
   6  *
   7  * GNU Libidn is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * GNU Libidn is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with GNU Libidn; if not, write to the Free Software
  19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  */
  22
  23 /*
  24  * This file is derived from RFC 3492 written by Adam M. Costello.
  25  *
  26  * Disclaimer and license: Regarding this entire document or any
  27  * portion of it (including the pseudocode and C code), the author
  28  * makes no guarantees and is not responsible for any damage resulting
  29  * from its use.  The author grants irrevocable permission to anyone
  30  * to use, modify, and distribute it in any way that does not diminish
  31  * the rights of anyone else to use, modify, and distribute it,
  32  * provided that redistributed derivative works do not contain
  33  * misleading author or version information.  Derivative works need
  34  * not be licensed under similar terms.
  35  *
  36  */
  37
  38 #include <assert.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43 #include <punycode.h>
  44
  45 /* For testing, we'll just set some compile-time limits rather than */
  46 /* use malloc(), and set a compile-time option rather than using a  */
  47 /* command-line option.                                             */
  48
  49 enum
  50 {
  51   unicode_max_length = 256,
  52   ace_max_length = 256
  53 };
  54
  55 static void
  56 usage (char **argv)
  57 {
  58   fprintf (stderr,
  59            "\n"
  60            "%s -e reads code points and writes a Punycode string.\n"
  61            "%s -d reads a Punycode string and writes code points.\n"
  62            "\n"
  63            "Input and output are plain text in the native character set.\n"
  64            "Code points are in the form u+hex separated by whitespace.\n"
  65            "Although the specification allows Punycode strings to contain\n"
  66            "any characters from the ASCII repertoire, this test code\n"
  67            "supports only the printable characters, and needs the Punycode\n"
  68            "string to be followed by a newline.\n"
  69            "The case of the u in u+hex is the force-to-uppercase flag.\n",
  70            argv[0], argv[0]);
  71   exit (EXIT_FAILURE);
  72 }
  73
  74 static void
  75 fail (const char *msg)
  76 {
  77   fputs (msg, stderr);
  78   exit (EXIT_FAILURE);
  79 }
  80
  81 static const char too_big[] =
  82   "input or output is too large, recompile with larger limits\n";
  83 static const char invalid_input[] = "invalid input\n";
  84 static const char overflow[] = "arithmetic overflow\n";
  85 static const char io_error[] = "I/O error\n";
  86
  87 /* The following string is used to convert printable */
  88 /* characters between ASCII and the native charset:  */
  89
  90 static const char print_ascii[] = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" " !\"#$%&'()*+,-./" "0123456789:;<=>?" "\0x40"  /* at sign */
  91   "ABCDEFGHIJKLMNO"
  92   "PQRSTUVWXYZ[\\]^_" "`abcdefghijklmno" "pqrstuvwxyz{|}~\n";
  93
  94 int
  95 main (int argc, char **argv)
  96 {
  97   enum punycode_status status;
  98   int r;
  99   size_t input_length, output_length, j;
 100   unsigned char case_flags[unicode_max_length];
 101
 102   if (argc != 2)
 103     usage (argv);
 104   if (argv[1][0] != '-')
 105     usage (argv);
 106   if (argv[1][2] != 0)
 107     usage (argv);
 108
 109   if (argv[1][1] == 'e')
 110     {
 111       uint32_t input[unicode_max_length];
 112       unsigned long codept;
 113       char output[ace_max_length + 1], uplus[3];
 114       int c;
 115
 116       /* Read the input code points: */
 117
 118       input_length = 0;
 119
 120       for (;;)
 121         {
 122           r = scanf ("%2s%lx", uplus, &codept);
 123           if (ferror (stdin))
 124             fail (io_error);
 125           if (r == EOF || r == 0)
 126             break;
 127
 128           if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1)
 129             {
 130               fail (invalid_input);
 131             }
 132
 133           if (input_length == unicode_max_length)
 134             fail (too_big);
 135
 136           if (uplus[0] == 'u')
 137             case_flags[input_length] = 0;
 138           else if (uplus[0] == 'U')
 139             case_flags[input_length] = 1;
 140           else
 141             fail (invalid_input);
 142
 143           input[input_length++] = codept;
 144         }
 145
 146       /* Encode: */
 147
 148       output_length = ace_max_length;
 149       status = punycode_encode (input_length, input, case_flags,
 150                                 &output_length, output);
 151       if (status == punycode_bad_input)
 152         fail (invalid_input);
 153       if (status == punycode_big_output)
 154         fail (too_big);
 155       if (status == punycode_overflow)
 156         fail (overflow);
 157       assert (status == punycode_success);
 158
 159       /* Convert to native charset and output: */
 160
 161       for (j = 0; j < output_length; ++j)
 162         {
 163           c = output[j];
 164           assert (c >= 0 && c <= 127);
 165           if (print_ascii[c] == 0)
 166             fail (invalid_input);
 167           output[j] = print_ascii[c];
 168         }
 169
 170       output[j] = 0;
 171       r = puts (output);
 172       if (r == EOF)
 173         fail (io_error);
 174       return EXIT_SUCCESS;
 175     }
 176
 177   if (argv[1][1] == 'd')
 178     {
 179       char input[ace_max_length + 2], *p, *pp;
 180       uint32_t output[unicode_max_length];
 181
 182       /* Read the Punycode input string and convert to ASCII: */
 183
 184       fgets (input, ace_max_length + 2, stdin);
 185       if (ferror (stdin))
 186         fail (io_error);
 187       if (feof (stdin))
 188         fail (invalid_input);
 189       input_length = strlen (input) - 1;
 190       if (input[input_length] != '\n')
 191         fail (too_big);
 192       input[input_length] = 0;
 193
 194       for (p = input; *p != 0; ++p)
 195         {
 196           pp = strchr (print_ascii, *p);
 197           if (pp == 0)
 198             fail (invalid_input);
 199           *p = pp - print_ascii;
 200         }
 201
 202       /* Decode: */
 203
 204       output_length = unicode_max_length;
 205       status = punycode_decode (input_length, input, &output_length,
 206                                 output, case_flags);
 207       if (status == punycode_bad_input)
 208         fail (invalid_input);
 209       if (status == punycode_big_output)
 210         fail (too_big);
 211       if (status == punycode_overflow)
 212         fail (overflow);
 213       assert (status == punycode_success);
 214
 215       /* Output the result: */
 216
 217       for (j = 0; j < output_length; ++j)
 218         {
 219           r = printf ("%s+%04lX\n",
 220                       case_flags[j] ? "U" : "u", (unsigned long) output[j]);
 221           if (r < 0)
 222             fail (io_error);
 223         }
 224
 225       return EXIT_SUCCESS;
 226     }
 227
 228   usage (argv);
 229   return EXIT_SUCCESS;          /* not reached, but quiets compiler warning */
 230 }