posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-1993,1996-2001,2003-2005,2007
   2    Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 struct STRUCT
  21 {
  22   const CHAR *pattern;
  23   const CHAR *string;
  24   int no_leading_period;
  25 };
  26
  27 /* Match STRING against the filename pattern PATTERN, returning zero if
  28    it matches, nonzero if not.  */
  29 static int FCT (const CHAR *pattern, const CHAR *string,
  30                 const CHAR *string_end, int no_leading_period, int flags,
  31                 struct STRUCT *ends)
  32      internal_function;
  33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  34                 const CHAR *string_end, int no_leading_period, int flags)
  35      internal_function;
  36 static const CHAR *END (const CHAR *patternp) internal_function;
  37
  38 static int
  39 internal_function
  40 FCT (pattern, string, string_end, no_leading_period, flags, ends)
  41      const CHAR *pattern;
  42      const CHAR *string;
  43      const CHAR *string_end;
  44      int no_leading_period;
  45      int flags;
  46      struct STRUCT *ends;
  47 {
  48   register const CHAR *p = pattern, *n = string;
  49   register UCHAR c;
  50 #ifdef _LIBC
  51 # if WIDE_CHAR_VERSION
  52   const char *collseq = (const char *)
  53     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  54 # else
  55   const UCHAR *collseq = (const UCHAR *)
  56     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  57 # endif
  58 #endif
  59
  60   while ((c = *p++) != L('\0'))
  61     {
  62       int new_no_leading_period = 0;
  63       c = FOLD (c);
  64
  65       switch (c)
  66         {
  67         case L('?'):
  68           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  69             {
  70               int res;
  71
  72               res = EXT (c, p, n, string_end, no_leading_period,
  73                          flags);
  74               if (res != -1)
  75                 return res;
  76             }
  77
  78           if (n == string_end)
  79             return FNM_NOMATCH;
  80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  81             return FNM_NOMATCH;
  82           else if (*n == L('.') && no_leading_period)
  83             return FNM_NOMATCH;
  84           break;
  85
  86         case L('\\'):
  87           if (!(flags & FNM_NOESCAPE))
  88             {
  89               c = *p++;
  90               if (c == L('\0'))
  91                 /* Trailing \ loses.  */
  92                 return FNM_NOMATCH;
  93               c = FOLD (c);
  94             }
  95           if (n == string_end || FOLD ((UCHAR) *n) != c)
  96             return FNM_NOMATCH;
  97           break;
  98
  99         case L('*'):
 100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 101             {
 102               int res;
 103
 104               res = EXT (c, p, n, string_end, no_leading_period,
 105                          flags);
 106               if (res != -1)
 107                 return res;
 108             }
 109           else if (ends != NULL)
 110             {
 111               ends->pattern = p - 1;
 112               ends->string = n;
 113               ends->no_leading_period = no_leading_period;
 114               return 0;
 115             }
 116
 117           if (n != string_end && *n == L('.') && no_leading_period)
 118             return FNM_NOMATCH;
 119
 120           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 121             {
 122               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 123                 {
 124                   const CHAR *endp = END (p);
 125                   if (endp != p)
 126                     {
 127                       /* This is a pattern.  Skip over it.  */
 128                       p = endp;
 129                       continue;
 130                     }
 131                 }
 132
 133               if (c == L('?'))
 134                 {
 135                   /* A ? needs to match one character.  */
 136                   if (n == string_end)
 137                     /* There isn't another character; no match.  */
 138                     return FNM_NOMATCH;
 139                   else if (*n == L('/')
 140                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 141                     /* A slash does not match a wildcard under
 142                        FNM_FILE_NAME.  */
 143                     return FNM_NOMATCH;
 144                   else
 145                     /* One character of the string is consumed in matching
 146                        this ? wildcard, so *??? won't match if there are
 147                        less than three characters.  */
 148                     ++n;
 149                 }
 150             }
 151
 152           if (c == L('\0'))
 153             /* The wildcard(s) is/are the last element of the pattern.
 154                If the name is a file name and contains another slash
 155                this means it cannot match, unless the FNM_LEADING_DIR
 156                flag is set.  */
 157             {
 158               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 159
 160               if (flags & FNM_FILE_NAME)
 161                 {
 162                   if (flags & FNM_LEADING_DIR)
 163                     result = 0;
 164                   else
 165                     {
 166                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 167                         result = 0;
 168                     }
 169                 }
 170
 171               return result;
 172             }
 173           else
 174             {
 175               const CHAR *endp;
 176               struct STRUCT end;
 177
 178               end.pattern = NULL;
 179               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 180                              string_end - n);
 181               if (endp == NULL)
 182                 endp = string_end;
 183
 184               if (c == L('[')
 185                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 186                       && (c == L('@') || c == L('+') || c == L('!'))
 187                       && *p == L('(')))
 188                 {
 189                   int flags2 = ((flags & FNM_FILE_NAME)
 190                                 ? flags : (flags & ~FNM_PERIOD));
 191
 192                   for (--p; n < endp; ++n, no_leading_period = 0)
 193                     if (FCT (p, n, string_end, no_leading_period, flags2,
 194                              &end) == 0)
 195                       goto found;
 196                 }
 197               else if (c == L('/') && (flags & FNM_FILE_NAME))
 198                 {
 199                   while (n < string_end && *n != L('/'))
 200                     ++n;
 201                   if (n < string_end && *n == L('/')
 202                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 203                                NULL) == 0))
 204                     return 0;
 205                 }
 206               else
 207                 {
 208                   int flags2 = ((flags & FNM_FILE_NAME)
 209                                 ? flags : (flags & ~FNM_PERIOD));
 210
 211                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 212                     c = *p;
 213                   c = FOLD (c);
 214                   for (--p; n < endp; ++n, no_leading_period = 0)
 215                     if (FOLD ((UCHAR) *n) == c
 216                         && (FCT (p, n, string_end, no_leading_period, flags2,
 217                                  &end) == 0))
 218                       {
 219                       found:
 220                         if (end.pattern == NULL)
 221                           return 0;
 222                         break;
 223                       }
 224                   if (end.pattern != NULL)
 225                     {
 226                       p = end.pattern;
 227                       n = end.string;
 228                       no_leading_period = end.no_leading_period;
 229                       continue;
 230                     }
 231                 }
 232             }
 233
 234           /* If we come here no match is possible with the wildcard.  */
 235           return FNM_NOMATCH;
 236
 237         case L('['):
 238           {
 239             /* Nonzero if the sense of the character class is inverted.  */
 240             register int not;
 241             CHAR cold;
 242             UCHAR fn;
 243
 244             if (posixly_correct == 0)
 245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 246
 247             if (n == string_end)
 248               return FNM_NOMATCH;
 249
 250             if (*n == L('.') && no_leading_period)
 251               return FNM_NOMATCH;
 252
 253             if (*n == L('/') && (flags & FNM_FILE_NAME))
 254               /* `/' cannot be matched.  */
 255               return FNM_NOMATCH;
 256
 257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 258             if (not)
 259               ++p;
 260
 261             fn = FOLD ((UCHAR) *n);
 262
 263             c = *p++;
 264             for (;;)
 265               {
 266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 267                   {
 268                     if (*p == L('\0'))
 269                       return FNM_NOMATCH;
 270                     c = FOLD ((UCHAR) *p);
 271                     ++p;
 272
 273                     goto normal_bracket;
 274                   }
 275                 else if (c == L('[') && *p == L(':'))
 276                   {
 277                     /* Leave room for the null.  */
 278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 279                     size_t c1 = 0;
 280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 281                     wctype_t wt;
 282 #endif
 283                     const CHAR *startp = p;
 284
 285                     for (;;)
 286                       {
 287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 288                           /* The name is too long and therefore the pattern
 289                              is ill-formed.  */
 290                           return FNM_NOMATCH;
 291
 292                         c = *++p;
 293                         if (c == L(':') && p[1] == L(']'))
 294                           {
 295                             p += 2;
 296                             break;
 297                           }
 298                         if (c < L('a') || c >= L('z'))
 299                           {
 300                             /* This cannot possibly be a character class name.
 301                                Match it as a normal range.  */
 302                             p = startp;
 303                             c = L('[');
 304                             goto normal_bracket;
 305                           }
 306                         str[c1++] = c;
 307                       }
 308                     str[c1] = L('\0');
 309
 310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 311                     wt = IS_CHAR_CLASS (str);
 312                     if (wt == 0)
 313                       /* Invalid character class name.  */
 314                       return FNM_NOMATCH;
 315
 316 # if defined _LIBC && ! WIDE_CHAR_VERSION
 317                     /* The following code is glibc specific but does
 318                        there a good job in speeding up the code since
 319                        we can avoid the btowc() call.  */
 320                     if (_ISCTYPE ((UCHAR) *n, wt))
 321                       goto matched;
 322 # else
 323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 324                       goto matched;
 325 # endif
 326 #else
 327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 339                       goto matched;
 340 #endif
 341                     c = *p++;
 342                   }
 343 #ifdef _LIBC
 344                 else if (c == L('[') && *p == L('='))
 345                   {
 346                     UCHAR str[1];
 347                     uint32_t nrules =
 348                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 349                     const CHAR *startp = p;
 350
 351                     c = *++p;
 352                     if (c == L('\0'))
 353                       {
 354                         p = startp;
 355                         c = L('[');
 356                         goto normal_bracket;
 357                       }
 358                     str[0] = c;
 359
 360                     c = *++p;
 361                     if (c != L('=') || p[1] != L(']'))
 362                       {
 363                         p = startp;
 364                         c = L('[');
 365                         goto normal_bracket;
 366                       }
 367                     p += 2;
 368
 369                     if (nrules == 0)
 370                       {
 371                         if ((UCHAR) *n == str[0])
 372                           goto matched;
 373                       }
 374                     else
 375                       {
 376                         const int32_t *table;
 377 # if WIDE_CHAR_VERSION
 378                         const int32_t *weights;
 379                         const int32_t *extra;
 380 # else
 381                         const unsigned char *weights;
 382                         const unsigned char *extra;
 383 # endif
 384                         const int32_t *indirect;
 385                         int32_t idx;
 386                         const UCHAR *cp = (const UCHAR *) str;
 387
 388                         /* This #include defines a local function!  */
 389 # if WIDE_CHAR_VERSION
 390 #  include <locale/weightwc.h>
 391 # else
 392 #  include <locale/weight.h>
 393 # endif
 394
 395 # if WIDE_CHAR_VERSION
 396                         table = (const int32_t *)
 397                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 398                         weights = (const int32_t *)
 399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 400                         extra = (const int32_t *)
 401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 402                         indirect = (const int32_t *)
 403                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 404 # else
 405                         table = (const int32_t *)
 406                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 407                         weights = (const unsigned char *)
 408                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 409                         extra = (const unsigned char *)
 410                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 411                         indirect = (const int32_t *)
 412                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 413 # endif
 414
 415                         idx = findidx (&cp);
 416                         if (idx != 0)
 417                           {
 418                             /* We found a table entry.  Now see whether the
 419                                character we are currently at has the same
 420                                equivalance class value.  */
 421                             int len = weights[idx & 0xffffff];
 422                             int32_t idx2;
 423                             const UCHAR *np = (const UCHAR *) n;
 424
 425                             idx2 = findidx (&np);
 426                             if (idx2 != 0
 427                                 && (idx >> 24) == (idx2 >> 24)
 428                                 && len == weights[idx2 & 0xffffff])
 429                               {
 430                                 int cnt = 0;
 431
 432                                 idx &= 0xffffff;
 433                                 idx2 &= 0xffffff;
 434
 435                                 while (cnt < len
 436                                        && (weights[idx + 1 + cnt]
 437                                            == weights[idx2 + 1 + cnt]))
 438                                   ++cnt;
 439
 440                                 if (cnt == len)
 441                                   goto matched;
 442                               }
 443                           }
 444                       }
 445
 446                     c = *p++;
 447                   }
 448 #endif
 449                 else if (c == L('\0'))
 450                   /* [ (unterminated) loses.  */
 451                   return FNM_NOMATCH;
 452                 else
 453                   {
 454                     int is_range = 0;
 455
 456 #ifdef _LIBC
 457                     int is_seqval = 0;
 458
 459                     if (c == L('[') && *p == L('.'))
 460                       {
 461                         uint32_t nrules =
 462                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 463                         const CHAR *startp = p;
 464                         size_t c1 = 0;
 465
 466                         while (1)
 467                           {
 468                             c = *++p;
 469                             if (c == L('.') && p[1] == L(']'))
 470                               {
 471                                 p += 2;
 472                                 break;
 473                               }
 474                             if (c == '\0')
 475                               return FNM_NOMATCH;
 476                             ++c1;
 477                           }
 478
 479                         /* We have to handling the symbols differently in
 480                            ranges since then the collation sequence is
 481                            important.  */
 482                         is_range = *p == L('-') && p[1] != L('\0');
 483
 484                         if (nrules == 0)
 485                           {
 486                             /* There are no names defined in the collation
 487                                data.  Therefore we only accept the trivial
 488                                names consisting of the character itself.  */
 489                             if (c1 != 1)
 490                               return FNM_NOMATCH;
 491
 492                             if (!is_range && *n == startp[1])
 493                               goto matched;
 494
 495                             cold = startp[1];
 496                             c = *p++;
 497                           }
 498                         else
 499                           {
 500                             int32_t table_size;
 501                             const int32_t *symb_table;
 502 # ifdef WIDE_CHAR_VERSION
 503                             char str[c1];
 504                             unsigned int strcnt;
 505 # else
 506 #  define str (startp + 1)
 507 # endif
 508                             const unsigned char *extra;
 509                             int32_t idx;
 510                             int32_t elem;
 511                             int32_t second;
 512                             int32_t hash;
 513
 514 # ifdef WIDE_CHAR_VERSION
 515                             /* We have to convert the name to a single-byte
 516                                string.  This is possible since the names
 517                                consist of ASCII characters and the internal
 518                                representation is UCS4.  */
 519                             for (strcnt = 0; strcnt < c1; ++strcnt)
 520                               str[strcnt] = startp[1 + strcnt];
 521 #endif
 522
 523                             table_size =
 524                               _NL_CURRENT_WORD (LC_COLLATE,
 525                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 526                             symb_table = (const int32_t *)
 527                               _NL_CURRENT (LC_COLLATE,
 528                                            _NL_COLLATE_SYMB_TABLEMB);
 529                             extra = (const unsigned char *)
 530                               _NL_CURRENT (LC_COLLATE,
 531                                            _NL_COLLATE_SYMB_EXTRAMB);
 532
 533                             /* Locate the character in the hashing table.  */
 534                             hash = elem_hash (str, c1);
 535
 536                             idx = 0;
 537                             elem = hash % table_size;
 538                             if (symb_table[2 * elem] != 0)
 539                               {
 540                                 second = hash % (table_size - 2) + 1;
 541
 542                                 do
 543                                   {
 544                                     /* First compare the hashing value.  */
 545                                     if (symb_table[2 * elem] == hash
 546                                         && (c1
 547                                             == extra[symb_table[2 * elem + 1]])
 548                                         && memcmp (str,
 549                                                    &extra[symb_table[2 * elem
 550                                                                      + 1]
 551                                                           + 1], c1) == 0)
 552                                       {
 553                                         /* Yep, this is the entry.  */
 554                                         idx = symb_table[2 * elem + 1];
 555                                         idx += 1 + extra[idx];
 556                                         break;
 557                                       }
 558
 559                                     /* Next entry.  */
 560                                     elem += second;
 561                                   }
 562                                 while (symb_table[2 * elem] != 0);
 563                               }
 564
 565                             if (symb_table[2 * elem] != 0)
 566                               {
 567                                 /* Compare the byte sequence but only if
 568                                    this is not part of a range.  */
 569 # ifdef WIDE_CHAR_VERSION
 570                                 int32_t *wextra;
 571
 572                                 idx += 1 + extra[idx];
 573                                 /* Adjust for the alignment.  */
 574                                 idx = (idx + 3) & ~3;
 575
 576                                 wextra = (int32_t *) &extra[idx + 4];
 577 # endif
 578
 579                                 if (! is_range)
 580                                   {
 581 # ifdef WIDE_CHAR_VERSION
 582                                     for (c1 = 0;
 583                                          (int32_t) c1 < wextra[idx];
 584                                          ++c1)
 585                                       if (n[c1] != wextra[1 + c1])
 586                                         break;
 587
 588                                     if ((int32_t) c1 == wextra[idx])
 589                                       goto matched;
 590 # else
 591                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 592                                       if (n[c1] != extra[1 + c1])
 593                                         break;
 594
 595                                     if (c1 == extra[idx])
 596                                       goto matched;
 597 # endif
 598                                   }
 599
 600                                 /* Get the collation sequence value.  */
 601                                 is_seqval = 1;
 602 # ifdef WIDE_CHAR_VERSION
 603                                 cold = wextra[1 + wextra[idx]];
 604 # else
 605                                 /* Adjust for the alignment.  */
 606                                 idx += 1 + extra[idx];
 607                                 idx = (idx + 3) & ~4;
 608                                 cold = *((int32_t *) &extra[idx]);
 609 # endif
 610
 611                                 c = *p++;
 612                               }
 613                             else if (c1 == 1)
 614                               {
 615                                 /* No valid character.  Match it as a
 616                                    single byte.  */
 617                                 if (!is_range && *n == str[0])
 618                                   goto matched;
 619
 620                                 cold = str[0];
 621                                 c = *p++;
 622                               }
 623                             else
 624                               return FNM_NOMATCH;
 625                           }
 626                       }
 627                     else
 628 # undef str
 629 #endif
 630                       {
 631                         c = FOLD (c);
 632                       normal_bracket:
 633
 634                         /* We have to handling the symbols differently in
 635                            ranges since then the collation sequence is
 636                            important.  */
 637                         is_range = (*p == L('-') && p[1] != L('\0')
 638                                     && p[1] != L(']'));
 639
 640                         if (!is_range && c == fn)
 641                           goto matched;
 642
 643                         /* This is needed if we goto normal_bracket; from
 644                            outside of is_seqval's scope.  */
 645                         is_seqval = 0;
 646                         cold = c;
 647                         c = *p++;
 648                       }
 649
 650                     if (c == L('-') && *p != L(']'))
 651                       {
 652 #if _LIBC
 653                         /* We have to find the collation sequence
 654                            value for C.  Collation sequence is nothing
 655                            we can regularly access.  The sequence
 656                            value is defined by the order in which the
 657                            definitions of the collation values for the
 658                            various characters appear in the source
 659                            file.  A strange concept, nowhere
 660                            documented.  */
 661                         uint32_t fcollseq;
 662                         uint32_t lcollseq;
 663                         UCHAR cend = *p++;
 664
 665 # ifdef WIDE_CHAR_VERSION
 666                         /* Search in the `names' array for the characters.  */
 667                         fcollseq = __collseq_table_lookup (collseq, fn);
 668                         if (fcollseq == ~((uint32_t) 0))
 669                           /* XXX We don't know anything about the character
 670                              we are supposed to match.  This means we are
 671                              failing.  */
 672                           goto range_not_matched;
 673
 674                         if (is_seqval)
 675                           lcollseq = cold;
 676                         else
 677                           lcollseq = __collseq_table_lookup (collseq, cold);
 678 # else
 679                         fcollseq = collseq[fn];
 680                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 681 # endif
 682
 683                         is_seqval = 0;
 684                         if (cend == L('[') && *p == L('.'))
 685                           {
 686                             uint32_t nrules =
 687                               _NL_CURRENT_WORD (LC_COLLATE,
 688                                                 _NL_COLLATE_NRULES);
 689                             const CHAR *startp = p;
 690                             size_t c1 = 0;
 691
 692                             while (1)
 693                               {
 694                                 c = *++p;
 695                                 if (c == L('.') && p[1] == L(']'))
 696                                   {
 697                                     p += 2;
 698                                     break;
 699                                   }
 700                                 if (c == '\0')
 701                                   return FNM_NOMATCH;
 702                                 ++c1;
 703                               }
 704
 705                             if (nrules == 0)
 706                               {
 707                                 /* There are no names defined in the
 708                                    collation data.  Therefore we only
 709                                    accept the trivial names consisting
 710                                    of the character itself.  */
 711                                 if (c1 != 1)
 712                                   return FNM_NOMATCH;
 713
 714                                 cend = startp[1];
 715                               }
 716                             else
 717                               {
 718                                 int32_t table_size;
 719                                 const int32_t *symb_table;
 720 # ifdef WIDE_CHAR_VERSION
 721                                 char str[c1];
 722                                 unsigned int strcnt;
 723 # else
 724 #  define str (startp + 1)
 725 # endif
 726                                 const unsigned char *extra;
 727                                 int32_t idx;
 728                                 int32_t elem;
 729                                 int32_t second;
 730                                 int32_t hash;
 731
 732 # ifdef WIDE_CHAR_VERSION
 733                                 /* We have to convert the name to a single-byte
 734                                    string.  This is possible since the names
 735                                    consist of ASCII characters and the internal
 736                                    representation is UCS4.  */
 737                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 738                                   str[strcnt] = startp[1 + strcnt];
 739 # endif
 740
 741                                 table_size =
 742                                   _NL_CURRENT_WORD (LC_COLLATE,
 743                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 744                                 symb_table = (const int32_t *)
 745                                   _NL_CURRENT (LC_COLLATE,
 746                                                _NL_COLLATE_SYMB_TABLEMB);
 747                                 extra = (const unsigned char *)
 748                                   _NL_CURRENT (LC_COLLATE,
 749                                                _NL_COLLATE_SYMB_EXTRAMB);
 750
 751                                 /* Locate the character in the hashing
 752                                    table.  */
 753                                 hash = elem_hash (str, c1);
 754
 755                                 idx = 0;
 756                                 elem = hash % table_size;
 757                                 if (symb_table[2 * elem] != 0)
 758                                   {
 759                                     second = hash % (table_size - 2) + 1;
 760
 761                                     do
 762                                       {
 763                                         /* First compare the hashing value.  */
 764                                         if (symb_table[2 * elem] == hash
 765                                             && (c1
 766                                                 == extra[symb_table[2 * elem + 1]])
 767                                             && memcmp (str,
 768                                                        &extra[symb_table[2 * elem + 1]
 769                                                               + 1], c1) == 0)
 770                                           {
 771                                             /* Yep, this is the entry.  */
 772                                             idx = symb_table[2 * elem + 1];
 773                                             idx += 1 + extra[idx];
 774                                             break;
 775                                           }
 776
 777                                         /* Next entry.  */
 778                                         elem += second;
 779                                       }
 780                                     while (symb_table[2 * elem] != 0);
 781                                   }
 782
 783                                 if (symb_table[2 * elem] != 0)
 784                                   {
 785                                     /* Compare the byte sequence but only if
 786                                        this is not part of a range.  */
 787 # ifdef WIDE_CHAR_VERSION
 788                                     int32_t *wextra;
 789
 790                                     idx += 1 + extra[idx];
 791                                     /* Adjust for the alignment.  */
 792                                     idx = (idx + 3) & ~4;
 793
 794                                     wextra = (int32_t *) &extra[idx + 4];
 795 # endif
 796                                     /* Get the collation sequence value.  */
 797                                     is_seqval = 1;
 798 # ifdef WIDE_CHAR_VERSION
 799                                     cend = wextra[1 + wextra[idx]];
 800 # else
 801                                     /* Adjust for the alignment.  */
 802                                     idx += 1 + extra[idx];
 803                                     idx = (idx + 3) & ~4;
 804                                     cend = *((int32_t *) &extra[idx]);
 805 # endif
 806                                   }
 807                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 808                                   {
 809                                     cend = str[0];
 810                                     c = *p++;
 811                                   }
 812                                 else
 813                                   return FNM_NOMATCH;
 814                               }
 815 # undef str
 816                           }
 817                         else
 818                           {
 819                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 820                               cend = *p++;
 821                             if (cend == L('\0'))
 822                               return FNM_NOMATCH;
 823                             cend = FOLD (cend);
 824                           }
 825
 826                         /* XXX It is not entirely clear to me how to handle
 827                            characters which are not mentioned in the
 828                            collation specification.  */
 829                         if (
 830 # ifdef WIDE_CHAR_VERSION
 831                             lcollseq == 0xffffffff ||
 832 # endif
 833                             lcollseq <= fcollseq)
 834                           {
 835                             /* We have to look at the upper bound.  */
 836                             uint32_t hcollseq;
 837
 838                             if (is_seqval)
 839                               hcollseq = cend;
 840                             else
 841                               {
 842 # ifdef WIDE_CHAR_VERSION
 843                                 hcollseq =
 844                                   __collseq_table_lookup (collseq, cend);
 845                                 if (hcollseq == ~((uint32_t) 0))
 846                                   {
 847                                     /* Hum, no information about the upper
 848                                        bound.  The matching succeeds if the
 849                                        lower bound is matched exactly.  */
 850                                     if (lcollseq != fcollseq)
 851                                       goto range_not_matched;
 852
 853                                     goto matched;
 854                                   }
 855 # else
 856                                 hcollseq = collseq[cend];
 857 # endif
 858                               }
 859
 860                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 861                               goto matched;
 862                           }
 863 # ifdef WIDE_CHAR_VERSION
 864                       range_not_matched:
 865 # endif
 866 #else
 867                         /* We use a boring value comparison of the character
 868                            values.  This is better than comparing using
 869                            `strcoll' since the latter would have surprising
 870                            and sometimes fatal consequences.  */
 871                         UCHAR cend = *p++;
 872
 873                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 874                           cend = *p++;
 875                         if (cend == L('\0'))
 876                           return FNM_NOMATCH;
 877
 878                         /* It is a range.  */
 879                         if (cold <= fn && fn <= cend)
 880                           goto matched;
 881 #endif
 882
 883                         c = *p++;
 884                       }
 885                   }
 886
 887                 if (c == L(']'))
 888                   break;
 889               }
 890
 891             if (!not)
 892               return FNM_NOMATCH;
 893             break;
 894
 895           matched:
 896             /* Skip the rest of the [...] that already matched.  */
 897             do
 898               {
 899               ignore_next:
 900                 c = *p++;
 901
 902                 if (c == L('\0'))
 903                   /* [... (unterminated) loses.  */
 904                   return FNM_NOMATCH;
 905
 906                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 907                   {
 908                     if (*p == L('\0'))
 909                       return FNM_NOMATCH;
 910                     /* XXX 1003.2d11 is unclear if this is right.  */
 911                     ++p;
 912                   }
 913                 else if (c == L('[') && *p == L(':'))
 914                   {
 915                     int c1 = 0;
 916                     const CHAR *startp = p;
 917
 918                     while (1)
 919                       {
 920                         c = *++p;
 921                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 922                           return FNM_NOMATCH;
 923
 924                         if (*p == L(':') && p[1] == L(']'))
 925                           break;
 926
 927                         if (c < L('a') || c >= L('z'))
 928                           {
 929                             p = startp;
 930                             goto ignore_next;
 931                           }
 932                       }
 933                     p += 2;
 934                     c = *p++;
 935                   }
 936                 else if (c == L('[') && *p == L('='))
 937                   {
 938                     c = *++p;
 939                     if (c == L('\0'))
 940                       return FNM_NOMATCH;
 941                     c = *++p;
 942                     if (c != L('=') || p[1] != L(']'))
 943                       return FNM_NOMATCH;
 944                     p += 2;
 945                     c = *p++;
 946                   }
 947                 else if (c == L('[') && *p == L('.'))
 948                   {
 949                     ++p;
 950                     while (1)
 951                       {
 952                         c = *++p;
 953                         if (c == '\0')
 954                           return FNM_NOMATCH;
 955
 956                         if (*p == L('.') && p[1] == L(']'))
 957                           break;
 958                       }
 959                     p += 2;
 960                     c = *p++;
 961                   }
 962               }
 963             while (c != L(']'));
 964             if (not)
 965               return FNM_NOMATCH;
 966           }
 967           break;
 968
 969         case L('+'):
 970         case L('@'):
 971         case L('!'):
 972           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 973             {
 974               int res;
 975
 976               res = EXT (c, p, n, string_end, no_leading_period, flags);
 977               if (res != -1)
 978                 return res;
 979             }
 980           goto normal_match;
 981
 982         case L('/'):
 983           if (NO_LEADING_PERIOD (flags))
 984             {
 985               if (n == string_end || c != (UCHAR) *n)
 986                 return FNM_NOMATCH;
 987
 988               new_no_leading_period = 1;
 989               break;
 990             }
 991           /* FALLTHROUGH */
 992         default:
 993         normal_match:
 994           if (n == string_end || c != FOLD ((UCHAR) *n))
 995             return FNM_NOMATCH;
 996         }
 997
 998       no_leading_period = new_no_leading_period;
 999       ++n;
1000     }
1001
1002   if (n == string_end)
1003     return 0;
1004
1005   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1006     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1007     return 0;
1008
1009   return FNM_NOMATCH;
1010 }
1011
1012
1013 static const CHAR *
1014 internal_function
1015 END (const CHAR *pattern)
1016 {
1017   const CHAR *p = pattern;
1018
1019   while (1)
1020     if (*++p == L('\0'))
1021       /* This is an invalid pattern.  */
1022       return pattern;
1023     else if (*p == L('['))
1024       {
1025         /* Handle brackets special.  */
1026         if (posixly_correct == 0)
1027           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1028
1029         /* Skip the not sign.  We have to recognize it because of a possibly
1030            following ']'.  */
1031         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1032           ++p;
1033         /* A leading ']' is recognized as such.  */
1034         if (*p == L(']'))
1035           ++p;
1036         /* Skip over all characters of the list.  */
1037         while (*p != L(']'))
1038           if (*p++ == L('\0'))
1039             /* This is no valid pattern.  */
1040             return pattern;
1041       }
1042     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1043               || *p == L('!')) && p[1] == L('('))
1044       p = END (p + 1);
1045     else if (*p == L(')'))
1046       break;
1047
1048   return p + 1;
1049 }
1050
1051
1052 static int
1053 internal_function
1054 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1055      int no_leading_period, int flags)
1056 {
1057   const CHAR *startp;
1058   int level;
1059   struct patternlist
1060   {
1061     struct patternlist *next;
1062     CHAR str[0];
1063   } *list = NULL;
1064   struct patternlist **lastp = &list;
1065   size_t pattern_len = STRLEN (pattern);
1066   const CHAR *p;
1067   const CHAR *rs;
1068
1069   /* Parse the pattern.  Store the individual parts in the list.  */
1070   level = 0;
1071   for (startp = p = pattern + 1; level >= 0; ++p)
1072     if (*p == L('\0'))
1073       /* This is an invalid pattern.  */
1074       return -1;
1075     else if (*p == L('['))
1076       {
1077         /* Handle brackets special.  */
1078         if (posixly_correct == 0)
1079           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1080
1081         /* Skip the not sign.  We have to recognize it because of a possibly
1082            following ']'.  */
1083         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1084           ++p;
1085         /* A leading ']' is recognized as such.  */
1086         if (*p == L(']'))
1087           ++p;
1088         /* Skip over all characters of the list.  */
1089         while (*p != L(']'))
1090           if (*p++ == L('\0'))
1091             /* This is no valid pattern.  */
1092             return -1;
1093       }
1094     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1095               || *p == L('!')) && p[1] == L('('))
1096       /* Remember the nesting level.  */
1097       ++level;
1098     else if (*p == L(')'))
1099       {
1100         if (level-- == 0)
1101           {
1102             /* This means we found the end of the pattern.  */
1103 #define NEW_PATTERN \
1104             struct patternlist *newp;                                         \
1105                                                                               \
1106             if (opt == L('?') || opt == L('@'))                               \
1107               newp = alloca (sizeof (struct patternlist)                      \
1108                              + (pattern_len * sizeof (CHAR)));                \
1109             else                                                              \
1110               newp = alloca (sizeof (struct patternlist)                      \
1111                              + ((p - startp + 1) * sizeof (CHAR)));           \
1112             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1113             newp->next = NULL;                                                \
1114             *lastp = newp;                                                    \
1115             lastp = &newp->next
1116             NEW_PATTERN;
1117           }
1118       }
1119     else if (*p == L('|'))
1120       {
1121         if (level == 0)
1122           {
1123             NEW_PATTERN;
1124             startp = p + 1;
1125           }
1126       }
1127   assert (list != NULL);
1128   assert (p[-1] == L(')'));
1129 #undef NEW_PATTERN
1130
1131   switch (opt)
1132     {
1133     case L('*'):
1134       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1135         return 0;
1136       /* FALLTHROUGH */
1137
1138     case L('+'):
1139       do
1140         {
1141           for (rs = string; rs <= string_end; ++rs)
1142             /* First match the prefix with the current pattern with the
1143                current pattern.  */
1144             if (FCT (list->str, string, rs, no_leading_period,
1145                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1146                      NULL) == 0
1147                 /* This was successful.  Now match the rest with the rest
1148                    of the pattern.  */
1149                 && (FCT (p, rs, string_end,
1150                          rs == string
1151                          ? no_leading_period
1152                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1153                          flags & FNM_FILE_NAME
1154                          ? flags : flags & ~FNM_PERIOD, NULL) == 0
1155                     /* This didn't work.  Try the whole pattern.  */
1156                     || (rs != string
1157                         && FCT (pattern - 1, rs, string_end,
1158                                 rs == string
1159                                 ? no_leading_period
1160                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1161                                    ? 1 : 0),
1162                                 flags & FNM_FILE_NAME
1163                                 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1164               /* It worked.  Signal success.  */
1165               return 0;
1166         }
1167       while ((list = list->next) != NULL);
1168
1169       /* None of the patterns lead to a match.  */
1170       return FNM_NOMATCH;
1171
1172     case L('?'):
1173       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1174         return 0;
1175       /* FALLTHROUGH */
1176
1177     case L('@'):
1178       do
1179         /* I cannot believe it but `strcat' is actually acceptable
1180            here.  Match the entire string with the prefix from the
1181            pattern list and the rest of the pattern following the
1182            pattern list.  */
1183         if (FCT (STRCAT (list->str, p), string, string_end,
1184                  no_leading_period,
1185                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1186                  NULL) == 0)
1187           /* It worked.  Signal success.  */
1188           return 0;
1189       while ((list = list->next) != NULL);
1190
1191       /* None of the patterns lead to a match.  */
1192       return FNM_NOMATCH;
1193
1194     case L('!'):
1195       for (rs = string; rs <= string_end; ++rs)
1196         {
1197           struct patternlist *runp;
1198
1199           for (runp = list; runp != NULL; runp = runp->next)
1200             if (FCT (runp->str, string, rs,  no_leading_period,
1201                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1202                      NULL) == 0)
1203               break;
1204
1205           /* If none of the patterns matched see whether the rest does.  */
1206           if (runp == NULL
1207               && (FCT (p, rs, string_end,
1208                        rs == string
1209                        ? no_leading_period
1210                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1211                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1212                        NULL) == 0))
1213             /* This is successful.  */
1214             return 0;
1215         }
1216
1217       /* None of the patterns together with the rest of the pattern
1218          lead to a match.  */
1219       return FNM_NOMATCH;
1220
1221     default:
1222       assert (! "Invalid extended matching operator");
1223       break;
1224     }
1225
1226   return -1;
1227 }
1228
1229
1230 #undef FOLD
1231 #undef CHAR
1232 #undef UCHAR
1233 #undef INT
1234 #undef FCT
1235 #undef EXT
1236 #undef END
1237 #undef STRUCT
1238 #undef MEMPCPY
1239 #undef MEMCHR
1240 #undef STRCOLL
1241 #undef STRLEN
1242 #undef STRCAT
1243 #undef L
1244 #undef BTOWC