example2.c

   1 /* example2.c   Example code showing how to use punycode.
   2  * Copyright (C) 2002, 2003  Simon Josefsson
   3  * Copyright (C) 2002  Adam M. Costello
   4  *
   5  * This file is part of GNU Libidn.
   6  *
   7  * GNU Libidn is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * GNU Libidn is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with GNU Libidn; if not, write to the Free Software
  19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20  *
  21  */
  22
  23 /*
  24  * This file is derived from from draft-ietf-idn-punycode-03.txt by
  25  * Adam M. Costello.
  26  *
  27  * Disclaimer and license: Regarding this entire document or any
  28  * portion of it (including the pseudocode and C code), the author
  29  * makes no guarantees and is not responsible for any damage resulting
  30  * from its use.  The author grants irrevocable permission to anyone
  31  * to use, modify, and distribute it in any way that does not diminish
  32  * the rights of anyone else to use, modify, and distribute it,
  33  * provided that redistributed derivative works do not contain
  34  * misleading author or version information.  Derivative works need
  35  * not be licensed under similar terms.
  36  *
  37  */
  38
  39 #include <assert.h>
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <string.h>
  43
  44 #include "punycode.h"
  45
  46 /* For testing, we'll just set some compile-time limits rather than */
  47 /* use malloc(), and set a compile-time option rather than using a  */
  48 /* command-line option.                                             */
  49
  50 enum
  51 {
  52   unicode_max_length = 256,
  53   ace_max_length = 256
  54 };
  55
  56 static void
  57 usage (char **argv)
  58 {
  59   fprintf (stderr,
  60            "\n"
  61            "%s -e reads code points and writes a Punycode string.\n"
  62            "%s -d reads a Punycode string and writes code points.\n"
  63            "\n"
  64            "Input and output are plain text in the native character set.\n"
  65            "Code points are in the form u+hex separated by whitespace.\n"
  66            "Although the specification allows Punycode strings to contain\n"
  67            "any characters from the ASCII repertoire, this test code\n"
  68            "supports only the printable characters, and needs the Punycode\n"
  69            "string to be followed by a newline.\n"
  70            "The case of the u in u+hex is the force-to-uppercase flag.\n",
  71            argv[0], argv[0]);
  72   exit (EXIT_FAILURE);
  73 }
  74
  75 static void
  76 fail (const char *msg)
  77 {
  78   fputs (msg, stderr);
  79   exit (EXIT_FAILURE);
  80 }
  81
  82 static const char too_big[] =
  83   "input or output is too large, recompile with larger limits\n";
  84 static const char invalid_input[] = "invalid input\n";
  85 static const char overflow[] = "arithmetic overflow\n";
  86 static const char io_error[] = "I/O error\n";
  87
  88
  89 /* The following string is used to convert printable */
  90 /* characters between ASCII and the native charset:  */
  91
  92 static const char print_ascii[] =
  93   "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
  94   "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
  95   " !\"#$%&'()*+,-./"
  96   "0123456789:;<=>?"
  97   "\x40""ABCDEFGHIJKLMNO"
  98   "PQRSTUVWXYZ[\\]^_" "`abcdefghijklmno" "pqrstuvwxyz{|}~\n";
  99
 100
 101 int
 102 main (int argc, char **argv)
 103 {
 104   int status;
 105   int r;
 106   size_t input_length, output_length, j;
 107   unsigned char case_flags[unicode_max_length];
 108
 109   if (argc != 2)
 110     usage (argv);
 111   if (argv[1][0] != '-')
 112     usage (argv);
 113   if (argv[1][2] != 0)
 114     usage (argv);
 115
 116   if (argv[1][1] == 'e')
 117     {
 118       unsigned long input[unicode_max_length];
 119       unsigned long codept;
 120       char output[ace_max_length + 1], uplus[3];
 121       int c;
 122
 123       /* Read the input code points: */
 124
 125       input_length = 0;
 126
 127       for (;;)
 128         {
 129           r = scanf ("%2s%lx", uplus, &codept);
 130           if (ferror (stdin))
 131             fail (io_error);
 132           if (r == EOF || r == 0)
 133             break;
 134
 135           if (r != 2 || uplus[1] != '+' || codept > (unsigned long) -1)
 136             {
 137               fail (invalid_input);
 138             }
 139
 140           if (input_length == unicode_max_length)
 141             fail (too_big);
 142
 143           if (uplus[0] == 'u')
 144             case_flags[input_length] = 0;
 145           else if (uplus[0] == 'U')
 146             case_flags[input_length] = 1;
 147           else
 148             fail (invalid_input);
 149
 150           input[input_length++] = codept;
 151         }
 152
 153       /* Encode: */
 154
 155       output_length = ace_max_length;
 156       status = punycode_encode (input_length, input, case_flags,
 157                                 &output_length, output);
 158       if (status == PUNYCODE_BAD_INPUT)
 159         fail (invalid_input);
 160       if (status == PUNYCODE_BIG_OUTPUT)
 161         fail (too_big);
 162       if (status == PUNYCODE_OVERFLOW)
 163         fail (overflow);
 164       assert (status == PUNYCODE_SUCCESS);
 165
 166       /* Convert to native charset and output: */
 167
 168       for (j = 0; j < output_length; ++j)
 169         {
 170           c = output[j];
 171           assert (c >= 0 && c <= 127);
 172           if (print_ascii[c] == 0)
 173             fail (invalid_input);
 174           output[j] = print_ascii[c];
 175         }
 176
 177       output[j] = 0;
 178       r = puts (output);
 179       if (r == EOF)
 180         fail (io_error);
 181       return EXIT_SUCCESS;
 182     }
 183
 184   if (argv[1][1] == 'd')
 185     {
 186       char input[ace_max_length + 2], *p, *pp;
 187       unsigned long output[unicode_max_length];
 188
 189       /* Read the Punycode input string and convert to ASCII: */
 190
 191       fgets (input, ace_max_length + 2, stdin);
 192       if (ferror (stdin))
 193         fail (io_error);
 194       if (feof (stdin))
 195         fail (invalid_input);
 196       input_length = strlen (input) - 1;
 197       if (input[input_length] != '\n')
 198         fail (too_big);
 199       input[input_length] = 0;
 200
 201       for (p = input; *p != 0; ++p)
 202         {
 203           pp = strchr (print_ascii, *p);
 204           if (pp == 0)
 205             fail (invalid_input);
 206           *p = pp - print_ascii;
 207         }
 208
 209       /* Decode: */
 210
 211       output_length = unicode_max_length;
 212       status = punycode_decode (input_length, input, &output_length,
 213                                 output, case_flags);
 214       if (status == PUNYCODE_BAD_INPUT)
 215         fail (invalid_input);
 216       if (status == PUNYCODE_BIG_OUTPUT)
 217         fail (too_big);
 218       if (status == PUNYCODE_OVERFLOW)
 219         fail (overflow);
 220       assert (status == PUNYCODE_SUCCESS);
 221
 222       /* Output the result: */
 223
 224       for (j = 0; j < output_length; ++j)
 225         {
 226           r = printf ("%s+%04lX\n",
 227                       case_flags[j] ? "U" : "u", (unsigned long) output[j]);
 228           if (r < 0)
 229             fail (io_error);
 230         }
 231
 232       return EXIT_SUCCESS;
 233     }
 234
 235   usage (argv);
 236   return EXIT_SUCCESS;          /* not reached, but quiets compiler warning */
 237
 238 }