tests/test-mbrlen.c

   1 /* Test of conversion of multibyte character to wide character.
   2    Copyright (C) 2008-2024 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2023.  */
  18
  19 #include <config.h>
  20
  21 #include <wchar.h>
  22
  23 #include "signature.h"
  24 SIGNATURE_CHECK (mbrlen, size_t, (char const *, size_t, mbstate_t *));
  25
  26 #include <locale.h>
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "macros.h"
  32
  33 int
  34 main (int argc, char *argv[])
  35 {
  36   mbstate_t state;
  37   size_t ret;
  38
  39   /* configure should already have checked that the locale is supported.  */
  40   if (setlocale (LC_ALL, "") == NULL)
  41     return 1;
  42
  43   /* Test zero-length input.  */
  44   {
  45     memset (&state, '\0', sizeof (mbstate_t));
  46     ret = mbrlen ("x", 0, &state);
  47     ASSERT (ret == (size_t)(-2));
  48     ASSERT (mbsinit (&state));
  49   }
  50
  51   /* Test NUL byte input.  */
  52   {
  53     memset (&state, '\0', sizeof (mbstate_t));
  54     ret = mbrlen ("", 1, &state);
  55     ASSERT (ret == 0);
  56     ASSERT (mbsinit (&state));
  57   }
  58
  59   /* Test single-byte input.  */
  60   {
  61     int c;
  62     char buf[1];
  63
  64     memset (&state, '\0', sizeof (mbstate_t));
  65     for (c = 0; c < 0x100; c++)
  66       switch (c)
  67         {
  68         case '\t': case '\v': case '\f':
  69         case ' ': case '!': case '"': case '#': case '%':
  70         case '&': case '\'': case '(': case ')': case '*':
  71         case '+': case ',': case '-': case '.': case '/':
  72         case '0': case '1': case '2': case '3': case '4':
  73         case '5': case '6': case '7': case '8': case '9':
  74         case ':': case ';': case '<': case '=': case '>':
  75         case '?':
  76         case 'A': case 'B': case 'C': case 'D': case 'E':
  77         case 'F': case 'G': case 'H': case 'I': case 'J':
  78         case 'K': case 'L': case 'M': case 'N': case 'O':
  79         case 'P': case 'Q': case 'R': case 'S': case 'T':
  80         case 'U': case 'V': case 'W': case 'X': case 'Y':
  81         case 'Z':
  82         case '[': case '\\': case ']': case '^': case '_':
  83         case 'a': case 'b': case 'c': case 'd': case 'e':
  84         case 'f': case 'g': case 'h': case 'i': case 'j':
  85         case 'k': case 'l': case 'm': case 'n': case 'o':
  86         case 'p': case 'q': case 'r': case 's': case 't':
  87         case 'u': case 'v': case 'w': case 'x': case 'y':
  88         case 'z': case '{': case '|': case '}': case '~':
  89           /* c is in the ISO C "basic character set".  */
  90           ASSERT (c < 0x80);
  91           /* c is an ASCII character.  */
  92           buf[0] = c;
  93
  94           ret = mbrlen (buf, 1, &state);
  95           ASSERT (ret == 1);
  96           ASSERT (mbsinit (&state));
  97
  98           break;
  99         default:
 100           break;
 101         }
 102   }
 103
 104   /* Test special calling convention, passing a NULL pointer.  */
 105   {
 106     memset (&state, '\0', sizeof (mbstate_t));
 107     ret = mbrlen (NULL, 5, &state);
 108     ASSERT (ret == 0);
 109     ASSERT (mbsinit (&state));
 110   }
 111
 112 #ifdef __ANDROID__
 113   /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
 114      "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
 115      locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
 116      that is, effectively the "C.UTF-8" locale.  */
 117   if (argc > 1 && strcmp (argv[1], "1") == 0 && MB_CUR_MAX > 1)
 118     argv[1] = "3";
 119 #endif
 120
 121   if (argc > 1)
 122     switch (argv[1][0])
 123       {
 124       case '1':
 125         /* C or POSIX locale.  */
 126         {
 127           int c;
 128           char buf[1];
 129
 130           memset (&state, '\0', sizeof (mbstate_t));
 131           for (c = 0; c < 0x100; c++)
 132             if (c != 0)
 133               {
 134                 /* We are testing all nonnull bytes.  */
 135                 buf[0] = c;
 136
 137                 ret = mbrlen (buf, 1, &state);
 138                 /* POSIX:2018 says: "In the POSIX locale an [EILSEQ] error
 139                    cannot occur since all byte values are valid characters."  */
 140                 ASSERT (ret == 1);
 141                 ASSERT (mbsinit (&state));
 142               }
 143         }
 144         return test_exit_status;
 145
 146       case '2':
 147         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
 148         {
 149           char input[] = "B\374\337er"; /* "Büßer" */
 150           memset (&state, '\0', sizeof (mbstate_t));
 151
 152           ret = mbrlen (input, 1, &state);
 153           ASSERT (ret == 1);
 154           ASSERT (mbsinit (&state));
 155           input[0] = '\0';
 156
 157           ret = mbrlen (input + 1, 1, &state);
 158           ASSERT (ret == 1);
 159           ASSERT (mbsinit (&state));
 160           input[1] = '\0';
 161
 162           ret = mbrlen (input + 2, 3, &state);
 163           ASSERT (ret == 1);
 164           ASSERT (mbsinit (&state));
 165           input[2] = '\0';
 166
 167           ret = mbrlen (input + 3, 2, &state);
 168           ASSERT (ret == 1);
 169           ASSERT (mbsinit (&state));
 170           input[3] = '\0';
 171
 172           ret = mbrlen (input + 4, 1, &state);
 173           ASSERT (ret == 1);
 174           ASSERT (mbsinit (&state));
 175         }
 176         return test_exit_status;
 177
 178       case '3':
 179         /* Locale encoding is UTF-8.  */
 180         {
 181           char input[] = "B\303\274\303\237er"; /* "Büßer" */
 182           memset (&state, '\0', sizeof (mbstate_t));
 183
 184           ret = mbrlen (input, 1, &state);
 185           ASSERT (ret == 1);
 186           ASSERT (mbsinit (&state));
 187           input[0] = '\0';
 188
 189           ret = mbrlen (input + 1, 1, &state);
 190           ASSERT (ret == (size_t)(-2));
 191           ASSERT (!mbsinit (&state));
 192           input[1] = '\0';
 193
 194           ret = mbrlen (input + 2, 5, &state);
 195           ASSERT (ret == 1);
 196           ASSERT (mbsinit (&state));
 197           input[2] = '\0';
 198
 199           ret = mbrlen (input + 3, 4, &state);
 200           ASSERT (ret == 2);
 201           ASSERT (mbsinit (&state));
 202           input[3] = '\0';
 203           input[4] = '\0';
 204
 205           ret = mbrlen (input + 5, 2, &state);
 206           ASSERT (ret == 1);
 207           ASSERT (mbsinit (&state));
 208           input[5] = '\0';
 209
 210           ret = mbrlen (input + 6, 1, &state);
 211           ASSERT (ret == 1);
 212           ASSERT (mbsinit (&state));
 213         }
 214         return test_exit_status;
 215
 216       case '4':
 217         /* Locale encoding is EUC-JP.  */
 218         {
 219           char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
 220           memset (&state, '\0', sizeof (mbstate_t));
 221
 222           ret = mbrlen (input, 1, &state);
 223           ASSERT (ret == 1);
 224           ASSERT (mbsinit (&state));
 225           input[0] = '\0';
 226
 227           ret = mbrlen (input + 1, 2, &state);
 228           ASSERT (ret == 2);
 229           ASSERT (mbsinit (&state));
 230           input[1] = '\0';
 231           input[2] = '\0';
 232
 233           ret = mbrlen (input + 3, 1, &state);
 234           ASSERT (ret == (size_t)(-2));
 235           ASSERT (!mbsinit (&state));
 236           input[3] = '\0';
 237
 238           ret = mbrlen (input + 4, 4, &state);
 239           ASSERT (ret == 1);
 240           ASSERT (mbsinit (&state));
 241           input[4] = '\0';
 242
 243           ret = mbrlen (input + 5, 3, &state);
 244           ASSERT (ret == 2);
 245           ASSERT (mbsinit (&state));
 246           input[5] = '\0';
 247           input[6] = '\0';
 248
 249           ret = mbrlen (input + 7, 1, &state);
 250           ASSERT (ret == 1);
 251           ASSERT (mbsinit (&state));
 252         }
 253         return test_exit_status;
 254
 255       case '5':
 256         /* Locale encoding is GB18030.  */
 257         {
 258           char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
 259           memset (&state, '\0', sizeof (mbstate_t));
 260
 261           ret = mbrlen (input, 1, &state);
 262           ASSERT (ret == 1);
 263           ASSERT (mbsinit (&state));
 264           input[0] = '\0';
 265
 266           ret = mbrlen (input + 1, 1, &state);
 267           ASSERT (ret == (size_t)(-2));
 268           ASSERT (!mbsinit (&state));
 269           input[1] = '\0';
 270
 271           ret = mbrlen (input + 2, 7, &state);
 272           ASSERT (ret == 1);
 273           ASSERT (mbsinit (&state));
 274           input[2] = '\0';
 275
 276           ret = mbrlen (input + 3, 6, &state);
 277           ASSERT (ret == 4);
 278           ASSERT (mbsinit (&state));
 279           input[3] = '\0';
 280           input[4] = '\0';
 281           input[5] = '\0';
 282           input[6] = '\0';
 283
 284           ret = mbrlen (input + 7, 2, &state);
 285           ASSERT (ret == 1);
 286           ASSERT (mbsinit (&state));
 287           input[7] = '\0';
 288
 289           ret = mbrlen (input + 8, 1, &state);
 290           ASSERT (ret == 1);
 291           ASSERT (mbsinit (&state));
 292         }
 293         return test_exit_status;
 294       }
 295
 296   return 1;
 297 }