tests/uninorm/test-u32-normalize-big.c

   1 /* Test of Unicode compliance of normalization of UTF-32 strings.
   2    Copyright (C) 2009-2018 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18
  19 #include <config.h>
  20
  21 /* Specification.  */
  22 #include "test-u32-normalize-big.h"
  23
  24 #if GNULIB_TEST_UNINORM_U32_NORMALIZE
  25
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28
  29 #include "xalloc.h"
  30 #include "unistr.h"
  31 #include "macros.h"
  32
  33 #define ASSERT_WITH_LINE(expr, file, line) \
  34   do                                                                         \
  35     {                                                                        \
  36       if (!(expr))                                                           \
  37         {                                                                    \
  38           fprintf (stderr, "%s:%d: assertion failed for %s:%u\n",            \
  39                    __FILE__, __LINE__, file, line);                          \
  40           fflush (stderr);                                                   \
  41           abort ();                                                          \
  42         }                                                                    \
  43     }                                                                        \
  44   while (0)
  45
  46 static int
  47 cmp_ucs4_t (const void *a, const void *b)
  48 {
  49   ucs4_t a_value = *(const ucs4_t *)a;
  50   ucs4_t b_value = *(const ucs4_t *)b;
  51   return (a_value < b_value ? -1 : a_value > b_value ? 1 : 0);
  52 }
  53
  54 void
  55 read_normalization_test_file (const char *filename,
  56                               struct normalization_test_file *file)
  57 {
  58   FILE *stream;
  59   unsigned int lineno;
  60   int part_index;
  61   struct normalization_test_line *lines;
  62   size_t lines_length;
  63   size_t lines_allocated;
  64
  65   stream = fopen (filename, "r");
  66   if (stream == NULL)
  67     {
  68       fprintf (stderr, "error during fopen of '%s'\n", filename);
  69       exit (1);
  70     }
  71
  72   for (part_index = 0; part_index < 4; part_index++)
  73     {
  74       file->parts[part_index].lines = NULL;
  75       file->parts[part_index].lines_length = 0;
  76     }
  77
  78   lineno = 0;
  79
  80   part_index = -1;
  81   lines = NULL;
  82   lines_length = 0;
  83   lines_allocated = 0;
  84
  85   for (;;)
  86     {
  87       char buf[1000+1];
  88       char *ptr;
  89       int c;
  90       struct normalization_test_line line;
  91       size_t sequence_index;
  92
  93       lineno++;
  94
  95       /* Read a line.  */
  96       ptr = buf;
  97       do
  98         {
  99           c = getc (stream);
 100           if (c == EOF || c == '\n')
 101             break;
 102           *ptr++ = c;
 103         }
 104       while (ptr < buf + 1000);
 105       *ptr = '\0';
 106       if (c == EOF)
 107         break;
 108
 109       /* Ignore empty lines and comment lines.  */
 110       if (buf[0] == '\0' || buf[0] == '#')
 111         continue;
 112
 113       /* Handle lines that introduce a new part.  */
 114       if (buf[0] == '@')
 115         {
 116           /* Switch to the next part.  */
 117           if (part_index >= 0)
 118             {
 119               lines =
 120                 (struct normalization_test_line *)
 121                 xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
 122               file->parts[part_index].lines = lines;
 123               file->parts[part_index].lines_length = lines_length;
 124             }
 125           part_index++;
 126           lines = NULL;
 127           lines_length = 0;
 128           lines_allocated = 0;
 129           continue;
 130         }
 131
 132       /* It's a line containing 5 sequences of Unicode characters.
 133          Parse it and append it to the current part.  */
 134       if (!(part_index >= 0 && part_index < 4))
 135         {
 136           fprintf (stderr, "unexpected structure of '%s'\n", filename);
 137           exit (1);
 138         }
 139       ptr = buf;
 140       line.lineno = lineno;
 141       for (sequence_index = 0; sequence_index < 5; sequence_index++)
 142         line.sequences[sequence_index] = NULL;
 143       for (sequence_index = 0; sequence_index < 5; sequence_index++)
 144         {
 145           uint32_t *sequence = XNMALLOC (1, uint32_t);
 146           size_t sequence_length = 0;
 147
 148           for (;;)
 149             {
 150               char *endptr;
 151               unsigned int uc;
 152
 153               uc = strtoul (ptr, &endptr, 16);
 154               if (endptr == ptr)
 155                 break;
 156               ptr = endptr;
 157
 158               /* Append uc to the sequence.  */
 159               sequence =
 160                 (uint32_t *)
 161                 xnrealloc (sequence, sequence_length + 2, sizeof (uint32_t));
 162               sequence[sequence_length] = uc;
 163               sequence_length++;
 164
 165               if (*ptr == ' ')
 166                 ptr++;
 167             }
 168           if (sequence_length == 0)
 169             {
 170               fprintf (stderr, "empty character sequence in '%s'\n", filename);
 171               exit (1);
 172             }
 173           sequence[sequence_length] = 0; /* terminator */
 174
 175           line.sequences[sequence_index] = sequence;
 176
 177           if (*ptr != ';')
 178             {
 179               fprintf (stderr, "error parsing '%s'\n", filename);
 180               exit (1);
 181             }
 182           ptr++;
 183         }
 184
 185       /* Append the line to the current part.  */
 186       if (lines_length == lines_allocated)
 187         {
 188           lines_allocated = 2 * lines_allocated;
 189           if (lines_allocated < 7)
 190             lines_allocated = 7;
 191           lines =
 192             (struct normalization_test_line *)
 193             xnrealloc (lines, lines_allocated, sizeof (struct normalization_test_line));
 194         }
 195       lines[lines_length] = line;
 196       lines_length++;
 197     }
 198
 199   if (part_index >= 0)
 200     {
 201       lines =
 202         (struct normalization_test_line *)
 203         xnrealloc (lines, lines_length, sizeof (struct normalization_test_line));
 204       file->parts[part_index].lines = lines;
 205       file->parts[part_index].lines_length = lines_length;
 206     }
 207
 208   {
 209     /* Collect all c1 values from the part 1 in an array.  */
 210     const struct normalization_test_part *p = &file->parts[1];
 211     ucs4_t *c1_array = XNMALLOC (p->lines_length + 1, ucs4_t);
 212     size_t line_index;
 213
 214     for (line_index = 0; line_index < p->lines_length; line_index++)
 215       {
 216         const uint32_t *sequence = p->lines[line_index].sequences[0];
 217         /* In part 1, every sequences[0] consists of a single character.  */
 218         if (!(sequence[0] != 0 && sequence[1] == 0))
 219           abort ();
 220         c1_array[line_index] = sequence[0];
 221       }
 222
 223     /* Sort this array.  */
 224     qsort (c1_array, p->lines_length, sizeof (ucs4_t), cmp_ucs4_t);
 225
 226     /* Add the sentinel at the end.  */
 227     c1_array[p->lines_length] = 0x110000;
 228
 229     file->part1_c1_sorted = c1_array;
 230   }
 231
 232   file->filename = xstrdup (filename);
 233
 234   if (ferror (stream) || fclose (stream))
 235     {
 236       fprintf (stderr, "error reading from '%s'\n", filename);
 237       exit (1);
 238     }
 239 }
 240
 241 void
 242 test_specific (const struct normalization_test_file *file,
 243                int (*check) (const uint32_t *c1, size_t c1_length,
 244                              const uint32_t *c2, size_t c2_length,
 245                              const uint32_t *c3, size_t c3_length,
 246                              const uint32_t *c4, size_t c4_length,
 247                              const uint32_t *c5, size_t c5_length))
 248 {
 249   size_t part_index;
 250
 251   for (part_index = 0; part_index < 4; part_index++)
 252     {
 253       const struct normalization_test_part *p = &file->parts[part_index];
 254       size_t line_index;
 255
 256       for (line_index = 0; line_index < p->lines_length; line_index++)
 257         {
 258           const struct normalization_test_line *l = &p->lines[line_index];
 259
 260           ASSERT_WITH_LINE (check (l->sequences[0], u32_strlen (l->sequences[0]),
 261                                    l->sequences[1], u32_strlen (l->sequences[1]),
 262                                    l->sequences[2], u32_strlen (l->sequences[2]),
 263                                    l->sequences[3], u32_strlen (l->sequences[3]),
 264                                    l->sequences[4], u32_strlen (l->sequences[4]))
 265                             == 0,
 266                             file->filename, l->lineno);
 267         }
 268     }
 269 }
 270
 271 void
 272 test_other (const struct normalization_test_file *file, uninorm_t nf)
 273 {
 274   /* Check that for every character not listed in part 1 of the
 275      NormalizationTest.txt file, the character maps to itself in each
 276      of the four normalization forms.  */
 277   const ucs4_t *p = file->part1_c1_sorted;
 278   ucs4_t uc;
 279
 280   for (uc = 0; uc < 0x110000; uc++)
 281     {
 282       if (uc >= 0xD800 && uc < 0xE000)
 283         {
 284           /* A surrogate, not a character.  Skip uc.  */
 285         }
 286       else if (uc == *p)
 287         {
 288           /* Skip uc.  */
 289           p++;
 290         }
 291       else
 292         {
 293           uint32_t input[1];
 294           size_t length;
 295           uint32_t *result;
 296
 297           input[0] = uc;
 298           result = u32_normalize (nf, input, 1, NULL, &length);
 299           ASSERT (result != NULL && length == 1 && result[0] == uc);
 300         }
 301     }
 302 }
 303
 304 #endif