example2.c

   1 /* example2.c   Example code showing how to use punycode.
   2  * Copyright (C) 2002  Adam M. Costello
   3  * Copyright (C) 2002  Simon Josefsson
   4  *
   5  * This file is part of Libstringprep.
   6  *
   7  * Libstringprep is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libstringprep is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libstringprep; if not, write to the Free Software
  19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  */
  22
  23 /*
  24  * This file is derived from from draft-ietf-idn-punycode-03.txt by
  25  * Adam M. Costello.
  26  *
  27  * Disclaimer and license: Regarding this entire document or any
  28  * portion of it (including the pseudocode and C code), the author
  29  * makes no guarantees and is not responsible for any damage resulting
  30  * from its use.  The author grants irrevocable permission to anyone
  31  * to use, modify, and distribute it in any way that does not diminish
  32  * the rights of anyone else to use, modify, and distribute it,
  33  * provided that redistributed derivative works do not contain
  34  * misleading author or version information.  Derivative works need
  35  * not be licensed under similar terms.
  36  *
  37  */
  38
  39 #include <assert.h>
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <string.h>
  43
  44 #include "punycode.h"
  45
  46 /* For testing, we'll just set some compile-time limits rather than */
  47 /* use malloc(), and set a compile-time option rather than using a  */
  48 /* command-line option.                                             */
  49
  50 enum
  51 {
  52   unicode_max_length = 256,
  53   ace_max_length = 256
  54 };
  55
  56 static void
  57 usage (char **argv)
  58 {
  59   fprintf (stderr,
  60            "\n"
  61            "%s -e reads code points and writes a Punycode string.\n"
  62            "%s -d reads a Punycode string and writes code points.\n"
  63            "\n"
  64            "Input and output are plain text in the native character set.\n"
  65            "Code points are in the form u+hex separated by whitespace.\n"
  66            "Although the specification allows Punycode strings to contain\n"
  67            "any characters from the ASCII repertoire, this test code\n"
  68            "supports only the printable characters, and needs the Punycode\n"
  69            "string to be followed by a newline.\n"
  70            "The case of the u in u+hex is the force-to-uppercase flag.\n",
  71            argv[0], argv[0]);
  72   exit (EXIT_FAILURE);
  73 }
  74
  75
  76 static void
  77 fail (const char *msg)
  78 {
  79   fputs (msg, stderr);
  80   exit (EXIT_FAILURE);
  81 }
  82
  83 static const char too_big[] =
  84   "input or output is too large, recompile with larger limits\n";
  85 static const char invalid_input[] = "invalid input\n";
  86 static const char overflow[] = "arithmetic overflow\n";
  87 static const char io_error[] = "I/O error\n";
  88
  89
  90 /* The following string is used to convert printable */
  91 /* characters between ASCII and the native charset:  */
  92
  93 static const char print_ascii[] =
  94   "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
  95   "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
  96   " !\"#$%&'()*+,-./"
  97   "0123456789:;<=>?"
  98   "@ABCDEFGHIJKLMNO"
  99   "PQRSTUVWXYZ[\\]^_" "`abcdefghijklmno" "pqrstuvwxyz{|}~\n";
 100
 101
 102 int
 103 main (int argc, char **argv)
 104 {
 105   int status;
 106   int r;
 107   unsigned int input_length, output_length, j;
 108   unsigned char case_flags[unicode_max_length];
 109
 110   if (argc != 2)
 111     usage (argv);
 112   if (argv[1][0] != '-')
 113     usage (argv);
 114   if (argv[1][2] != 0)
 115     usage (argv);
 116
 117   if (argv[1][1] == 'e')
 118     {
 119       unsigned long input[unicode_max_length];
 120       unsigned long codept;
 121       char output[ace_max_length + 1], uplus[3];
 122       int c;
 123
 124       /* Read the input code points: */
 125
 126       input_length = 0;
 127
 128       for (;;)
 129         {
 130           r = scanf ("%2s%lx", uplus, &codept);
 131           if (ferror (stdin))
 132             fail (io_error);
 133           if (r == EOF || r == 0)
 134             break;
 135
 136           if (r != 2 || uplus[1] != '+' || codept > (unsigned long) -1)
 137             {
 138               fail (invalid_input);
 139             }
 140
 141           if (input_length == unicode_max_length)
 142             fail (too_big);
 143
 144           if (uplus[0] == 'u')
 145             case_flags[input_length] = 0;
 146           else if (uplus[0] == 'U')
 147             case_flags[input_length] = 1;
 148           else
 149             fail (invalid_input);
 150
 151           input[input_length++] = codept;
 152         }
 153
 154       /* Encode: */
 155
 156       output_length = ace_max_length;
 157       status = punycode_encode (input_length, input, case_flags,
 158                                 &output_length, output);
 159       if (status == PUNYCODE_BAD_INPUT)
 160         fail (invalid_input);
 161       if (status == PUNYCODE_BIG_OUTPUT)
 162         fail (too_big);
 163       if (status == PUNYCODE_OVERFLOW)
 164         fail (overflow);
 165       assert (status == PUNYCODE_SUCCESS);
 166
 167       /* Convert to native charset and output: */
 168
 169       for (j = 0; j < output_length; ++j)
 170         {
 171           c = output[j];
 172           assert (c >= 0 && c <= 127);
 173           if (print_ascii[c] == 0)
 174             fail (invalid_input);
 175           output[j] = print_ascii[c];
 176         }
 177
 178       output[j] = 0;
 179       r = puts (output);
 180       if (r == EOF)
 181         fail (io_error);
 182       return EXIT_SUCCESS;
 183     }
 184
 185   if (argv[1][1] == 'd')
 186     {
 187       char input[ace_max_length + 2], *p, *pp;
 188       unsigned long output[unicode_max_length];
 189
 190       /* Read the Punycode input string and convert to ASCII: */
 191
 192       fgets (input, ace_max_length + 2, stdin);
 193       if (ferror (stdin))
 194         fail (io_error);
 195       if (feof (stdin))
 196         fail (invalid_input);
 197       input_length = strlen (input) - 1;
 198       if (input[input_length] != '\n')
 199         fail (too_big);
 200       input[input_length] = 0;
 201
 202       for (p = input; *p != 0; ++p)
 203         {
 204           pp = strchr (print_ascii, *p);
 205           if (pp == 0)
 206             fail (invalid_input);
 207           *p = pp - print_ascii;
 208         }
 209
 210       /* Decode: */
 211
 212       output_length = unicode_max_length;
 213       status = punycode_decode (input_length, input, &output_length,
 214                                 output, case_flags);
 215       if (status == PUNYCODE_BAD_INPUT)
 216         fail (invalid_input);
 217       if (status == PUNYCODE_BIG_OUTPUT)
 218         fail (too_big);
 219       if (status == PUNYCODE_OVERFLOW)
 220         fail (overflow);
 221       assert (status == PUNYCODE_SUCCESS);
 222
 223       /* Output the result: */
 224
 225       for (j = 0; j < output_length; ++j)
 226         {
 227           r = printf ("%s+%04lX\n",
 228                       case_flags[j] ? "U" : "u", (unsigned long) output[j]);
 229           if (r < 0)
 230             fail (io_error);
 231         }
 232
 233       return EXIT_SUCCESS;
 234     }
 235
 236   usage (argv);
 237   return EXIT_SUCCESS;          /* not reached, but quiets compiler warning */
 238
 239 }