tests/test-c32ispunct.c

   1 /* Test of c32ispunct() function.
   2    Copyright (C) 2020 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 #include <config.h>
  18
  19 #include <uchar.h>
  20
  21 #include "signature.h"
  22 SIGNATURE_CHECK (c32ispunct, int, (wint_t));
  23
  24 #include <locale.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <wchar.h>
  28
  29 #include "macros.h"
  30
  31 /* Returns the value of c32ispunct for the multibyte character s[0..n-1].  */
  32 static int
  33 for_character (const char *s, size_t n)
  34 {
  35   mbstate_t state;
  36   char32_t wc;
  37   size_t ret;
  38
  39   memset (&state, '\0', sizeof (mbstate_t));
  40   wc = (char32_t) 0xBADFACE;
  41   ret = mbrtoc32 (&wc, s, n, &state);
  42   ASSERT (ret == n);
  43
  44   return c32ispunct (wc);
  45 }
  46
  47 int
  48 main (int argc, char *argv[])
  49 {
  50   int is;
  51   char buf[4];
  52
  53   /* configure should already have checked that the locale is supported.  */
  54   if (setlocale (LC_ALL, "") == NULL)
  55     return 1;
  56
  57   /* Test WEOF.  */
  58   is = c32ispunct (WEOF);
  59   ASSERT (is == 0);
  60
  61   /* Test single-byte characters.
  62      POSIX specifies in
  63        <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html>
  64      no explicit list of punctuation or symbol characters.  */
  65   {
  66     int c;
  67
  68     for (c = 0; c < 0x100; c++)
  69       switch (c)
  70         {
  71         case '\t': case '\v': case '\f':
  72         case ' ': case '!': case '"': case '#': case '%':
  73         case '&': case '\'': case '(': case ')': case '*':
  74         case '+': case ',': case '-': case '.': case '/':
  75         case '0': case '1': case '2': case '3': case '4':
  76         case '5': case '6': case '7': case '8': case '9':
  77         case ':': case ';': case '<': case '=': case '>':
  78         case '?':
  79         case 'A': case 'B': case 'C': case 'D': case 'E':
  80         case 'F': case 'G': case 'H': case 'I': case 'J':
  81         case 'K': case 'L': case 'M': case 'N': case 'O':
  82         case 'P': case 'Q': case 'R': case 'S': case 'T':
  83         case 'U': case 'V': case 'W': case 'X': case 'Y':
  84         case 'Z':
  85         case '[': case '\\': case ']': case '^': case '_':
  86         case 'a': case 'b': case 'c': case 'd': case 'e':
  87         case 'f': case 'g': case 'h': case 'i': case 'j':
  88         case 'k': case 'l': case 'm': case 'n': case 'o':
  89         case 'p': case 'q': case 'r': case 's': case 't':
  90         case 'u': case 'v': case 'w': case 'x': case 'y':
  91         case 'z': case '{': case '|': case '}': case '~':
  92           /* c is in the ISO C "basic character set".  */
  93           buf[0] = (unsigned char) c;
  94           is = for_character (buf, 1);
  95           switch (c)
  96             {
  97             case ' ':
  98             case '0': case '1': case '2': case '3': case '4':
  99             case '5': case '6': case '7': case '8': case '9':
 100             case 'A': case 'B': case 'C': case 'D': case 'E':
 101             case 'F': case 'G': case 'H': case 'I': case 'J':
 102             case 'K': case 'L': case 'M': case 'N': case 'O':
 103             case 'P': case 'Q': case 'R': case 'S': case 'T':
 104             case 'U': case 'V': case 'W': case 'X': case 'Y':
 105             case 'Z':
 106             case 'a': case 'b': case 'c': case 'd': case 'e':
 107             case 'f': case 'g': case 'h': case 'i': case 'j':
 108             case 'k': case 'l': case 'm': case 'n': case 'o':
 109             case 'p': case 'q': case 'r': case 's': case 't':
 110             case 'u': case 'v': case 'w': case 'x': case 'y':
 111             case 'z':
 112               /* c is an alphanumeric or space character.  */
 113               ASSERT (is == 0);
 114               break;
 115             case '!': case '"': case '#': case '%':
 116             case '&': case '\'': case '(': case ')': case '*':
 117             case '+': case ',': case '-': case '.': case '/':
 118             case ':': case ';': case '<': case '=': case '>':
 119             case '?':
 120             case '[': case '\\': case ']': case '^': case '_':
 121             case '{': case '|': case '}': case '~':
 122               /* These characters are usually expected to be punctuation or
 123                  symbol characters.  */
 124               ASSERT (is != 0);
 125               break;
 126             default:
 127               ASSERT (is == 0);
 128               break;
 129             }
 130           break;
 131         }
 132   }
 133
 134   if (argc > 1)
 135     switch (argv[1][0])
 136       {
 137       case '0':
 138         /* C locale; tested above.  */
 139         return 0;
 140
 141       case '1':
 142         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
 143         {
 144         #if !(defined __FreeBSD__ || defined __DragonFly__)
 145           /* U+00BF INVERTED QUESTION MARK */
 146           is = for_character ("\277", 1);
 147           ASSERT (is != 0);
 148         #endif
 149         #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
 150           /* U+00D7 MULTIPLICATION SIGN */
 151           is = for_character ("\327", 1);
 152           ASSERT (is != 0);
 153         #endif
 154           /* U+00D8 LATIN CAPITAL LETTER O WITH STROKE */
 155           is = for_character ("\330", 1);
 156           ASSERT (is == 0);
 157           /* U+00DF LATIN SMALL LETTER SHARP S */
 158           is = for_character ("\337", 1);
 159           ASSERT (is == 0);
 160         }
 161         return 0;
 162
 163       case '2':
 164         /* Locale encoding is EUC-JP.  */
 165         {
 166         #if !((defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__)
 167           /* U+00BF INVERTED QUESTION MARK */
 168           is = for_character ("\217\242\304", 3);
 169           ASSERT (is != 0);
 170         #endif
 171         #if !(defined __FreeBSD__ || defined __DragonFly__)
 172           /* U+00D7 MULTIPLICATION SIGN */
 173           is = for_character ("\241\337", 2);
 174           ASSERT (is != 0);
 175         #endif
 176           /* U+00D8 LATIN CAPITAL LETTER O WITH STROKE */
 177           is = for_character ("\217\251\254", 3);
 178           ASSERT (is == 0);
 179           /* U+00DF LATIN SMALL LETTER SHARP S */
 180           is = for_character ("\217\251\316", 3);
 181           ASSERT (is == 0);
 182           /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
 183           is = for_character ("\217\251\250", 3);
 184           ASSERT (is == 0);
 185         #if !((defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__)
 186           /* U+2192 RIGHTWARDS ARROW */
 187           is = for_character ("\242\252", 2);
 188           ASSERT (is != 0);
 189         #endif
 190         #if !(defined __FreeBSD__ || defined __DragonFly__)
 191           /* U+3001 IDEOGRAPHIC COMMA */
 192           is = for_character ("\241\242", 2);
 193           ASSERT (is != 0);
 194         #endif
 195           /* U+FF11 FULLWIDTH DIGIT ONE */
 196           is = for_character ("\243\261", 2);
 197           ASSERT (is == 0);
 198           /* U+FF4D FULLWIDTH LATIN SMALL LETTER M */
 199           is = for_character ("\243\355", 2);
 200           ASSERT (is == 0);
 201         }
 202         return 0;
 203
 204       case '3':
 205         /* Locale encoding is UTF-8.  */
 206         {
 207           /* U+00BF INVERTED QUESTION MARK */
 208           is = for_character ("\302\277", 2);
 209           ASSERT (is != 0);
 210         #if !defined __sun
 211           /* U+00D7 MULTIPLICATION SIGN */
 212           is = for_character ("\303\227", 2);
 213           ASSERT (is != 0);
 214         #endif
 215           /* U+00D8 LATIN CAPITAL LETTER O WITH STROKE */
 216           is = for_character ("\303\230", 2);
 217           ASSERT (is == 0);
 218           /* U+00DF LATIN SMALL LETTER SHARP S */
 219           is = for_character ("\303\237", 2);
 220           ASSERT (is == 0);
 221           /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
 222           is = for_character ("\305\201", 2);
 223           ASSERT (is == 0);
 224           /* U+05F3 HEBREW PUNCTUATION GERESH */
 225           is = for_character ("\327\263", 2);
 226           ASSERT (is != 0);
 227         #if !(defined __sun || (defined _WIN32 && !defined __CYGWIN__))
 228           /* U+2192 RIGHTWARDS ARROW */
 229           is = for_character ("\342\206\222", 3);
 230           ASSERT (is != 0);
 231         #endif
 232           /* U+3001 IDEOGRAPHIC COMMA */
 233           is = for_character ("\343\200\201", 3);
 234           ASSERT (is != 0);
 235           /* U+FF11 FULLWIDTH DIGIT ONE */
 236           is = for_character ("\357\274\221", 3);
 237           ASSERT (is == 0);
 238           /* U+FF4D FULLWIDTH LATIN SMALL LETTER M */
 239           is = for_character ("\357\275\215", 3);
 240           ASSERT (is == 0);
 241           /* U+10330 GOTHIC LETTER AHSA */
 242           is = for_character ("\360\220\214\260", 4);
 243           ASSERT (is == 0);
 244         #if !defined __sun
 245           /* U+1D100 MUSICAL SYMBOL SINGLE BARLINE */
 246           is = for_character ("\360\235\204\200", 4);
 247           ASSERT (is != 0);
 248         #endif
 249         #if !(defined __GLIBC__ || defined _AIX || defined __CYGWIN__ || (defined _WIN32 && !defined __CYGWIN__))
 250           /* U+E003A TAG COLON */
 251           is = for_character ("\363\240\200\272", 4);
 252           ASSERT (is == 0);
 253         #endif
 254         }
 255         return 0;
 256
 257       case '4':
 258         /* Locale encoding is GB18030.  */
 259         {
 260         #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
 261           /* U+00BF INVERTED QUESTION MARK */
 262           is = for_character ("\201\060\206\067", 4);
 263           ASSERT (is != 0);
 264         #endif
 265         #if !(defined __FreeBSD__ || defined __DragonFly__)
 266           /* U+00D7 MULTIPLICATION SIGN */
 267           is = for_character ("\241\301", 2);
 268           ASSERT (is != 0);
 269         #endif
 270           /* U+00D8 LATIN CAPITAL LETTER O WITH STROKE */
 271           is = for_character ("\201\060\211\061", 4);
 272           ASSERT (is == 0);
 273           /* U+00DF LATIN SMALL LETTER SHARP S */
 274           is = for_character ("\201\060\211\070", 4);
 275           ASSERT (is == 0);
 276           /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
 277           is = for_character ("\201\060\221\071", 4);
 278           ASSERT (is == 0);
 279         #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
 280           /* U+05F3 HEBREW PUNCTUATION GERESH */
 281           is = for_character ("\201\060\374\067", 4);
 282           ASSERT (is != 0);
 283         #endif
 284         #if !(defined __FreeBSD__ || defined __DragonFly__)
 285           /* U+2192 RIGHTWARDS ARROW */
 286           is = for_character ("\241\372", 2);
 287           ASSERT (is != 0);
 288           /* U+3001 IDEOGRAPHIC COMMA */
 289           is = for_character ("\241\242", 2);
 290           ASSERT (is != 0);
 291         #endif
 292           /* U+FF11 FULLWIDTH DIGIT ONE */
 293           is = for_character ("\243\261", 2);
 294           ASSERT (is == 0);
 295           /* U+FF4D FULLWIDTH LATIN SMALL LETTER M */
 296           is = for_character ("\243\355", 2);
 297           ASSERT (is == 0);
 298           /* U+10330 GOTHIC LETTER AHSA */
 299           is = for_character ("\220\060\322\066", 4);
 300           ASSERT (is == 0);
 301         #if !((defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__ || defined __sun)
 302           /* U+1D100 MUSICAL SYMBOL SINGLE BARLINE */
 303           is = for_character ("\224\062\273\064", 4);
 304           ASSERT (is != 0);
 305         #endif
 306         #if !defined __GLIBC__
 307           /* U+E003A TAG COLON */
 308           is = for_character ("\323\066\233\066", 4);
 309           ASSERT (is == 0);
 310         #endif
 311         }
 312         return 0;
 313
 314       }
 315
 316   return 1;
 317 }