lib/fnmatch.c

   1 /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <https://www.gnu.org/licenses/>.  */
  17
  18 #ifndef _LIBC
  19 # include <libc-config.h>
  20 #endif
  21
  22 /* Enable GNU extensions in fnmatch.h.  */
  23 #ifndef _GNU_SOURCE
  24 # define _GNU_SOURCE    1
  25 #endif
  26
  27 #include <fnmatch.h>
  28
  29 #include <assert.h>
  30 #include <errno.h>
  31 #include <ctype.h>
  32 #include <string.h>
  33 #include <stdckdint.h>
  34 #include <stdlib.h>
  35 #if defined _LIBC || HAVE_ALLOCA
  36 # include <alloca.h>
  37 #endif
  38 #include <stddef.h>
  39 #include <uchar.h>
  40 #if defined _LIBC || !_GL_SMALL_WCHAR_T
  41 /* It's OK to use wchar_t, since it's wide enough.  */
  42 # include <wchar.h>
  43 # include <wctype.h>
  44 # define WCHAR_T wchar_t
  45 # define WINT_T wint_t
  46 # define BTOWC btowc
  47 # define MBSRTOWCS mbsrtowcs
  48 # define WCSLEN wcslen
  49 # define WCSCAT wcscat
  50 # define WMEMPCPY wmempcpy
  51 # define WMEMCHR wmemchr
  52 # define TOWLOWER towlower
  53 # define WCTYPE_T wctype_t
  54 # define WCTYPE wctype
  55 # define ISWCTYPE iswctype
  56 #else
  57 /* wchar_t is too small, use char32_t instead.  */
  58 # include "unistr.h"
  59 # define WCHAR_T char32_t
  60 # define WINT_T char32_t
  61 # define BTOWC btoc32
  62 # define MBSRTOWCS mbsrtoc32s
  63 # define WCSLEN u32_strlen
  64 # define WCSCAT u32_strcat
  65 # define WMEMPCPY u32_pcpy
  66 # define WMEMCHR(S, C, N) u32_chr (S, N, C)
  67 # define TOWLOWER c32tolower
  68 # define WCTYPE_T c32_type_test_t
  69 # define WCTYPE c32_get_type_test
  70 # define ISWCTYPE c32_apply_type_test
  71 #endif
  72
  73 /* We need some of the locale data (the collation sequence information)
  74    but there is no interface to get this information in general.  Therefore
  75    we support a correct implementation only in glibc.  */
  76 #ifdef _LIBC
  77 # include "../locale/localeinfo.h"
  78 # include "../locale/coll-lookup.h"
  79 # include <shlib-compat.h>
  80
  81 # define CONCAT(a,b) __CONCAT(a,b)
  82 # define btowc __btowc
  83 # define iswctype __iswctype
  84 # define mbsrtowcs __mbsrtowcs
  85 # define mempcpy __mempcpy
  86 # define strnlen __strnlen
  87 # define towlower __towlower
  88 # define wcscat __wcscat
  89 # define wcslen __wcslen
  90 # define wctype __wctype
  91 # define wmemchr __wmemchr
  92 # define wmempcpy __wmempcpy
  93 # define fnmatch __fnmatch
  94 extern int fnmatch (const char *pattern, const char *string, int flags);
  95 #endif
  96
  97 #ifdef _LIBC
  98 # if __glibc_has_attribute (__fallthrough__)
  99 #  define FALLTHROUGH __attribute__ ((__fallthrough__))
 100 # else
 101 #  define FALLTHROUGH ((void) 0)
 102 # endif
 103 #else
 104 # include "attribute.h"
 105 #endif
 106
 107 #include <flexmember.h>
 108
 109 #ifdef _LIBC
 110 typedef ptrdiff_t idx_t;
 111 #else
 112 # include "idx.h"
 113 #endif
 114
 115 /* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set.  */
 116 #define NO_LEADING_PERIOD(flags) \
 117   ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD))
 118
 119 #ifndef _LIBC
 120 # if HAVE_ALLOCA
 121 /* The OS usually guarantees only one guard page at the bottom of the stack,
 122    and a page size can be as small as 4096 bytes.  So we cannot safely
 123    allocate anything larger than 4096 bytes.  Also care for the possibility
 124    of a few compiler-allocated temporary stack slots.  */
 125 #  define __libc_use_alloca(n) ((n) < 4032)
 126 # else
 127 /* Just use malloc.  */
 128 #  define __libc_use_alloca(n) false
 129 #  undef alloca
 130 #  define alloca(n) malloc (n)
 131 # endif
 132 # define alloca_account(size, avar) ((avar) += (size), alloca (size))
 133 #endif
 134
 135 /* Provide support for user-defined character classes, based on the functions
 136    from ISO C 90 amendment 1.  */
 137 #ifdef CHARCLASS_NAME_MAX
 138 # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
 139 #else
 140 /* This shouldn't happen but some implementation might still have this
 141    problem.  Use a reasonable default value.  */
 142 # define CHAR_CLASS_MAX_LENGTH 256
 143 #endif
 144
 145 #define IS_CHAR_CLASS(string) WCTYPE (string)
 146
 147 /* Avoid depending on library functions or files
 148    whose names are inconsistent.  */
 149
 150 /* Global variable.  */
 151 static int posixly_correct;
 152
 153 /* Note that this evaluates C many times.  */
 154 #define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
 155 #define CHAR    char
 156 #define UCHAR   unsigned char
 157 #define INT     int
 158 #define FCT     internal_fnmatch
 159 #define EXT     ext_match
 160 #define END     end_pattern
 161 #define STRUCT  fnmatch_struct
 162 #define L_(CS)  CS
 163 #define UCHAR_TO_WCHAR(C) BTOWC (C)
 164 #define STRLEN(S) strlen (S)
 165 #define STRCAT(D, S) strcat (D, S)
 166 #define MEMPCPY(D, S, N) mempcpy (D, S, N)
 167 #define MEMCHR(S, C, N) memchr (S, C, N)
 168 #define WIDE_CHAR_VERSION 0
 169 #ifdef _LIBC
 170 # include <locale/weight.h>
 171 # define FINDIDX findidx
 172 #endif
 173 #include "fnmatch_loop.c"
 174
 175
 176 #define FOLD(c) ((flags & FNM_CASEFOLD) ? TOWLOWER (c) : (c))
 177 #define CHAR    WCHAR_T
 178 #define UCHAR   WINT_T
 179 #define INT     WINT_T
 180 #define FCT     internal_fnwmatch
 181 #define EXT     ext_wmatch
 182 #define END     end_wpattern
 183 #define L_(CS)  L##CS
 184 #define UCHAR_TO_WCHAR(C) (C)
 185 #define STRLEN(S) WCSLEN (S)
 186 #define STRCAT(D, S) WCSCAT (D, S)
 187 #define MEMPCPY(D, S, N) WMEMPCPY (D, S, N)
 188 #define MEMCHR(S, C, N) WMEMCHR (S, C, N)
 189 #define WIDE_CHAR_VERSION 1
 190 #ifdef _LIBC
 191 /* Change the name the header defines so it doesn't conflict with
 192    the <locale/weight.h> version included above.  */
 193 # define findidx findidxwc
 194 # include <locale/weightwc.h>
 195 # undef findidx
 196 # define FINDIDX findidxwc
 197 #endif
 198
 199 #undef IS_CHAR_CLASS
 200 /* We have to convert the wide character string in a multibyte string.  But
 201    we know that the character class names consist of alphanumeric characters
 202    from the portable character set, and since the wide character encoding
 203    for a member of the portable character set is the same code point as
 204    its single-byte encoding, we can use a simplified method to convert the
 205    string to a multibyte character string.  */
 206 static WCTYPE_T
 207 is_char_class (const WCHAR_T *wcs)
 208 {
 209   char s[CHAR_CLASS_MAX_LENGTH + 1];
 210   char *cp = s;
 211
 212   do
 213     {
 214       /* Test for a printable character from the portable character set.  */
 215 #ifdef _LIBC
 216       if (*wcs < 0x20 || *wcs > 0x7e
 217           || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60)
 218         return (WCTYPE_T) 0;
 219 #else
 220       switch (*wcs)
 221         {
 222         case L' ': case L'!': case L'"': case L'#': case L'%':
 223         case L'&': case L'\'': case L'(': case L')': case L'*':
 224         case L'+': case L',': case L'-': case L'.': case L'/':
 225         case L'0': case L'1': case L'2': case L'3': case L'4':
 226         case L'5': case L'6': case L'7': case L'8': case L'9':
 227         case L':': case L';': case L'<': case L'=': case L'>':
 228         case L'?':
 229         case L'A': case L'B': case L'C': case L'D': case L'E':
 230         case L'F': case L'G': case L'H': case L'I': case L'J':
 231         case L'K': case L'L': case L'M': case L'N': case L'O':
 232         case L'P': case L'Q': case L'R': case L'S': case L'T':
 233         case L'U': case L'V': case L'W': case L'X': case L'Y':
 234         case L'Z':
 235         case L'[': case L'\\': case L']': case L'^': case L'_':
 236         case L'a': case L'b': case L'c': case L'd': case L'e':
 237         case L'f': case L'g': case L'h': case L'i': case L'j':
 238         case L'k': case L'l': case L'm': case L'n': case L'o':
 239         case L'p': case L'q': case L'r': case L's': case L't':
 240         case L'u': case L'v': case L'w': case L'x': case L'y':
 241         case L'z': case L'{': case L'|': case L'}': case L'~':
 242           break;
 243         default:
 244           return (WCTYPE_T) 0;
 245         }
 246 #endif
 247
 248       /* Avoid overrunning the buffer.  */
 249       if (cp == s + CHAR_CLASS_MAX_LENGTH)
 250         return (WCTYPE_T) 0;
 251
 252       *cp++ = (char) *wcs++;
 253     }
 254   while (*wcs != L'\0');
 255
 256   *cp = '\0';
 257
 258   return WCTYPE (s);
 259 }
 260 #define IS_CHAR_CLASS(string) is_char_class (string)
 261
 262 #include "fnmatch_loop.c"
 263
 264
 265 int
 266 fnmatch (const char *pattern, const char *string, int flags)
 267 {
 268   if (__glibc_unlikely (MB_CUR_MAX != 1))
 269     {
 270       mbstate_t ps;
 271       size_t n;
 272       const char *p;
 273       WCHAR_T *wpattern_malloc = NULL;
 274       WCHAR_T *wpattern;
 275       WCHAR_T *wstring_malloc = NULL;
 276       WCHAR_T *wstring;
 277       size_t alloca_used = 0;
 278
 279       /* Convert the strings into wide characters.  */
 280       memset (&ps, '\0', sizeof (ps));
 281       p = pattern;
 282       n = strnlen (pattern, 1024);
 283       if (__glibc_likely (n < 1024))
 284         {
 285           wpattern = (WCHAR_T *) alloca_account ((n + 1) * sizeof (WCHAR_T),
 286                                                  alloca_used);
 287           n = MBSRTOWCS (wpattern, &p, n + 1, &ps);
 288           if (__glibc_unlikely (n == (size_t) -1))
 289             /* Something wrong.
 290                XXX Do we have to set 'errno' to something which mbsrtows hasn't
 291                already done?  */
 292             return -1;
 293           if (p)
 294             {
 295               memset (&ps, '\0', sizeof (ps));
 296               goto prepare_wpattern;
 297             }
 298         }
 299       else
 300         {
 301         prepare_wpattern:
 302           n = MBSRTOWCS (NULL, &pattern, 0, &ps);
 303           if (__glibc_unlikely (n == (size_t) -1))
 304             /* Something wrong.
 305                XXX Do we have to set 'errno' to something which mbsrtows hasn't
 306                already done?  */
 307             return -1;
 308           if (__glibc_unlikely (n >= (size_t) -1 / sizeof (WCHAR_T)))
 309             {
 310               __set_errno (ENOMEM);
 311               return -2;
 312             }
 313           wpattern_malloc = wpattern
 314             = (WCHAR_T *) malloc ((n + 1) * sizeof (WCHAR_T));
 315           assert (mbsinit (&ps));
 316           if (wpattern == NULL)
 317             return -2;
 318           (void) MBSRTOWCS (wpattern, &pattern, n + 1, &ps);
 319         }
 320
 321       assert (mbsinit (&ps));
 322       n = strnlen (string, 1024);
 323       p = string;
 324       if (__glibc_likely (n < 1024))
 325         {
 326           wstring = (WCHAR_T *) alloca_account ((n + 1) * sizeof (WCHAR_T),
 327                                                 alloca_used);
 328           n = MBSRTOWCS (wstring, &p, n + 1, &ps);
 329           if (__glibc_unlikely (n == (size_t) -1))
 330             {
 331               /* Something wrong.
 332                  XXX Do we have to set 'errno' to something which
 333                  mbsrtows hasn't already done?  */
 334             free_return:
 335               free (wpattern_malloc);
 336               return -1;
 337             }
 338           if (p)
 339             {
 340               memset (&ps, '\0', sizeof (ps));
 341               goto prepare_wstring;
 342             }
 343         }
 344       else
 345         {
 346         prepare_wstring:
 347           n = MBSRTOWCS (NULL, &string, 0, &ps);
 348           if (__glibc_unlikely (n == (size_t) -1))
 349             /* Something wrong.
 350                XXX Do we have to set 'errno' to something which mbsrtows hasn't
 351                already done?  */
 352             goto free_return;
 353           if (__glibc_unlikely (n >= (size_t) -1 / sizeof (WCHAR_T)))
 354             {
 355               free (wpattern_malloc);
 356               __set_errno (ENOMEM);
 357               return -2;
 358             }
 359
 360           wstring_malloc = wstring
 361             = (WCHAR_T *) malloc ((n + 1) * sizeof (WCHAR_T));
 362           if (wstring == NULL)
 363             {
 364               free (wpattern_malloc);
 365               return -2;
 366             }
 367           assert (mbsinit (&ps));
 368           (void) MBSRTOWCS (wstring, &string, n + 1, &ps);
 369         }
 370
 371       int res = internal_fnwmatch (wpattern, wstring, wstring + n,
 372                                    flags & FNM_PERIOD, flags, NULL,
 373                                    alloca_used);
 374
 375       free (wstring_malloc);
 376       free (wpattern_malloc);
 377
 378       return res;
 379     }
 380
 381   return internal_fnmatch (pattern, string, string + strlen (string),
 382                            flags & FNM_PERIOD, flags, NULL, 0);
 383 }
 384
 385 #undef fnmatch
 386 versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3);
 387 #if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3)
 388 strong_alias (__fnmatch, __fnmatch_old)
 389 compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0);
 390 #endif
 391 libc_hidden_ver (__fnmatch, fnmatch)