posix/fnmatch_loop.c

   1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005
   2    Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 /* Match STRING against the filename pattern PATTERN, returning zero if
  21    it matches, nonzero if not.  */
  22 static int FCT (const CHAR *pattern, const CHAR *string,
  23                 const CHAR *string_end, int no_leading_period, int flags)
  24      internal_function;
  25 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  26                 const CHAR *string_end, int no_leading_period, int flags)
  27      internal_function;
  28 static const CHAR *END (const CHAR *patternp) internal_function;
  29
  30 static int
  31 internal_function
  32 FCT (pattern, string, string_end, no_leading_period, flags)
  33      const CHAR *pattern;
  34      const CHAR *string;
  35      const CHAR *string_end;
  36      int no_leading_period;
  37      int flags;
  38 {
  39   register const CHAR *p = pattern, *n = string;
  40   register UCHAR c;
  41 #ifdef _LIBC
  42 # if WIDE_CHAR_VERSION
  43   const char *collseq = (const char *)
  44     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  45 # else
  46   const UCHAR *collseq = (const UCHAR *)
  47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  48 # endif
  49 #endif
  50
  51   while ((c = *p++) != L('\0'))
  52     {
  53       int new_no_leading_period = 0;
  54       c = FOLD (c);
  55
  56       switch (c)
  57         {
  58         case L('?'):
  59           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  60             {
  61               int res;
  62
  63               res = EXT (c, p, n, string_end, no_leading_period,
  64                          flags);
  65               if (res != -1)
  66                 return res;
  67             }
  68
  69           if (n == string_end)
  70             return FNM_NOMATCH;
  71           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  72             return FNM_NOMATCH;
  73           else if (*n == L('.') && no_leading_period)
  74             return FNM_NOMATCH;
  75           break;
  76
  77         case L('\\'):
  78           if (!(flags & FNM_NOESCAPE))
  79             {
  80               c = *p++;
  81               if (c == L('\0'))
  82                 /* Trailing \ loses.  */
  83                 return FNM_NOMATCH;
  84               c = FOLD (c);
  85             }
  86           if (n == string_end || FOLD ((UCHAR) *n) != c)
  87             return FNM_NOMATCH;
  88           break;
  89
  90         case L('*'):
  91           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  92             {
  93               int res;
  94
  95               res = EXT (c, p, n, string_end, no_leading_period,
  96                          flags);
  97               if (res != -1)
  98                 return res;
  99             }
 100
 101           if (n != string_end && *n == L('.') && no_leading_period)
 102             return FNM_NOMATCH;
 103
 104           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 105             {
 106               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 107                 {
 108                   const CHAR *endp = END (p);
 109                   if (endp != p)
 110                     {
 111                       /* This is a pattern.  Skip over it.  */
 112                       p = endp;
 113                       continue;
 114                     }
 115                 }
 116
 117               if (c == L('?'))
 118                 {
 119                   /* A ? needs to match one character.  */
 120                   if (n == string_end)
 121                     /* There isn't another character; no match.  */
 122                     return FNM_NOMATCH;
 123                   else if (*n == L('/')
 124                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 125                     /* A slash does not match a wildcard under
 126                        FNM_FILE_NAME.  */
 127                     return FNM_NOMATCH;
 128                   else
 129                     /* One character of the string is consumed in matching
 130                        this ? wildcard, so *??? won't match if there are
 131                        less than three characters.  */
 132                     ++n;
 133                 }
 134             }
 135
 136           if (c == L('\0'))
 137             /* The wildcard(s) is/are the last element of the pattern.
 138                If the name is a file name and contains another slash
 139                this means it cannot match, unless the FNM_LEADING_DIR
 140                flag is set.  */
 141             {
 142               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 143
 144               if (flags & FNM_FILE_NAME)
 145                 {
 146                   if (flags & FNM_LEADING_DIR)
 147                     result = 0;
 148                   else
 149                     {
 150                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 151                         result = 0;
 152                     }
 153                 }
 154
 155               return result;
 156             }
 157           else
 158             {
 159               const CHAR *endp;
 160
 161               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 162                              string_end - n);
 163               if (endp == NULL)
 164                 endp = string_end;
 165
 166               if (c == L('[')
 167                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 168                       && (c == L('@') || c == L('+') || c == L('!'))
 169                       && *p == L('(')))
 170                 {
 171                   int flags2 = ((flags & FNM_FILE_NAME)
 172                                 ? flags : (flags & ~FNM_PERIOD));
 173                   int no_leading_period2 = no_leading_period;
 174
 175                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 176                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 177                         == 0)
 178                       return 0;
 179                 }
 180               else if (c == L('/') && (flags & FNM_FILE_NAME))
 181                 {
 182                   while (n < string_end && *n != L('/'))
 183                     ++n;
 184                   if (n < string_end && *n == L('/')
 185                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 186                           == 0))
 187                     return 0;
 188                 }
 189               else
 190                 {
 191                   int flags2 = ((flags & FNM_FILE_NAME)
 192                                 ? flags : (flags & ~FNM_PERIOD));
 193                   int no_leading_period2 = no_leading_period;
 194
 195                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 196                     c = *p;
 197                   c = FOLD (c);
 198                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 199                     if (FOLD ((UCHAR) *n) == c
 200                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 201                             == 0))
 202                       return 0;
 203                 }
 204             }
 205
 206           /* If we come here no match is possible with the wildcard.  */
 207           return FNM_NOMATCH;
 208
 209         case L('['):
 210           {
 211             /* Nonzero if the sense of the character class is inverted.  */
 212             register int not;
 213             CHAR cold;
 214             UCHAR fn;
 215
 216             if (posixly_correct == 0)
 217               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 218
 219             if (n == string_end)
 220               return FNM_NOMATCH;
 221
 222             if (*n == L('.') && no_leading_period)
 223               return FNM_NOMATCH;
 224
 225             if (*n == L('/') && (flags & FNM_FILE_NAME))
 226               /* `/' cannot be matched.  */
 227               return FNM_NOMATCH;
 228
 229             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 230             if (not)
 231               ++p;
 232
 233             fn = FOLD ((UCHAR) *n);
 234
 235             c = *p++;
 236             for (;;)
 237               {
 238                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 239                   {
 240                     if (*p == L('\0'))
 241                       return FNM_NOMATCH;
 242                     c = FOLD ((UCHAR) *p);
 243                     ++p;
 244
 245                     goto normal_bracket;
 246                   }
 247                 else if (c == L('[') && *p == L(':'))
 248                   {
 249                     /* Leave room for the null.  */
 250                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 251                     size_t c1 = 0;
 252 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 253                     wctype_t wt;
 254 #endif
 255                     const CHAR *startp = p;
 256
 257                     for (;;)
 258                       {
 259                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 260                           /* The name is too long and therefore the pattern
 261                              is ill-formed.  */
 262                           return FNM_NOMATCH;
 263
 264                         c = *++p;
 265                         if (c == L(':') && p[1] == L(']'))
 266                           {
 267                             p += 2;
 268                             break;
 269                           }
 270                         if (c < L('a') || c >= L('z'))
 271                           {
 272                             /* This cannot possibly be a character class name.
 273                                Match it as a normal range.  */
 274                             p = startp;
 275                             c = L('[');
 276                             goto normal_bracket;
 277                           }
 278                         str[c1++] = c;
 279                       }
 280                     str[c1] = L('\0');
 281
 282 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 283                     wt = IS_CHAR_CLASS (str);
 284                     if (wt == 0)
 285                       /* Invalid character class name.  */
 286                       return FNM_NOMATCH;
 287
 288 # if defined _LIBC && ! WIDE_CHAR_VERSION
 289                     /* The following code is glibc specific but does
 290                        there a good job in speeding up the code since
 291                        we can avoid the btowc() call.  */
 292                     if (_ISCTYPE ((UCHAR) *n, wt))
 293                       goto matched;
 294 # else
 295                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 296                       goto matched;
 297 # endif
 298 #else
 299                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 300                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 301                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 302                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 303                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 304                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 305                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 306                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 307                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 308                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 309                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 310                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 311                       goto matched;
 312 #endif
 313                     c = *p++;
 314                   }
 315 #ifdef _LIBC
 316                 else if (c == L('[') && *p == L('='))
 317                   {
 318                     UCHAR str[1];
 319                     uint32_t nrules =
 320                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 321                     const CHAR *startp = p;
 322
 323                     c = *++p;
 324                     if (c == L('\0'))
 325                       {
 326                         p = startp;
 327                         c = L('[');
 328                         goto normal_bracket;
 329                       }
 330                     str[0] = c;
 331
 332                     c = *++p;
 333                     if (c != L('=') || p[1] != L(']'))
 334                       {
 335                         p = startp;
 336                         c = L('[');
 337                         goto normal_bracket;
 338                       }
 339                     p += 2;
 340
 341                     if (nrules == 0)
 342                       {
 343                         if ((UCHAR) *n == str[0])
 344                           goto matched;
 345                       }
 346                     else
 347                       {
 348                         const int32_t *table;
 349 # if WIDE_CHAR_VERSION
 350                         const int32_t *weights;
 351                         const int32_t *extra;
 352 # else
 353                         const unsigned char *weights;
 354                         const unsigned char *extra;
 355 # endif
 356                         const int32_t *indirect;
 357                         int32_t idx;
 358                         const UCHAR *cp = (const UCHAR *) str;
 359
 360                         /* This #include defines a local function!  */
 361 # if WIDE_CHAR_VERSION
 362 #  include <locale/weightwc.h>
 363 # else
 364 #  include <locale/weight.h>
 365 # endif
 366
 367 # if WIDE_CHAR_VERSION
 368                         table = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 370                         weights = (const int32_t *)
 371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 372                         extra = (const int32_t *)
 373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 374                         indirect = (const int32_t *)
 375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 376 # else
 377                         table = (const int32_t *)
 378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 379                         weights = (const unsigned char *)
 380                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 381                         extra = (const unsigned char *)
 382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 383                         indirect = (const int32_t *)
 384                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 385 # endif
 386
 387                         idx = findidx (&cp);
 388                         if (idx != 0)
 389                           {
 390                             /* We found a table entry.  Now see whether the
 391                                character we are currently at has the same
 392                                equivalance class value.  */
 393                             int len = weights[idx];
 394                             int32_t idx2;
 395                             const UCHAR *np = (const UCHAR *) n;
 396
 397                             idx2 = findidx (&np);
 398                             if (idx2 != 0 && len == weights[idx2])
 399                               {
 400                                 int cnt = 0;
 401
 402                                 while (cnt < len
 403                                        && (weights[idx + 1 + cnt]
 404                                            == weights[idx2 + 1 + cnt]))
 405                                   ++cnt;
 406
 407                                 if (cnt == len)
 408                                   goto matched;
 409                               }
 410                           }
 411                       }
 412
 413                     c = *p++;
 414                   }
 415 #endif
 416                 else if (c == L('\0'))
 417                   /* [ (unterminated) loses.  */
 418                   return FNM_NOMATCH;
 419                 else
 420                   {
 421                     int is_range = 0;
 422
 423 #ifdef _LIBC
 424                     int is_seqval = 0;
 425
 426                     if (c == L('[') && *p == L('.'))
 427                       {
 428                         uint32_t nrules =
 429                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 430                         const CHAR *startp = p;
 431                         size_t c1 = 0;
 432
 433                         while (1)
 434                           {
 435                             c = *++p;
 436                             if (c == L('.') && p[1] == L(']'))
 437                               {
 438                                 p += 2;
 439                                 break;
 440                               }
 441                             if (c == '\0')
 442                               return FNM_NOMATCH;
 443                             ++c1;
 444                           }
 445
 446                         /* We have to handling the symbols differently in
 447                            ranges since then the collation sequence is
 448                            important.  */
 449                         is_range = *p == L('-') && p[1] != L('\0');
 450
 451                         if (nrules == 0)
 452                           {
 453                             /* There are no names defined in the collation
 454                                data.  Therefore we only accept the trivial
 455                                names consisting of the character itself.  */
 456                             if (c1 != 1)
 457                               return FNM_NOMATCH;
 458
 459                             if (!is_range && *n == startp[1])
 460                               goto matched;
 461
 462                             cold = startp[1];
 463                             c = *p++;
 464                           }
 465                         else
 466                           {
 467                             int32_t table_size;
 468                             const int32_t *symb_table;
 469 # ifdef WIDE_CHAR_VERSION
 470                             char str[c1];
 471                             unsigned int strcnt;
 472 # else
 473 #  define str (startp + 1)
 474 # endif
 475                             const unsigned char *extra;
 476                             int32_t idx;
 477                             int32_t elem;
 478                             int32_t second;
 479                             int32_t hash;
 480
 481 # ifdef WIDE_CHAR_VERSION
 482                             /* We have to convert the name to a single-byte
 483                                string.  This is possible since the names
 484                                consist of ASCII characters and the internal
 485                                representation is UCS4.  */
 486                             for (strcnt = 0; strcnt < c1; ++strcnt)
 487                               str[strcnt] = startp[1 + strcnt];
 488 #endif
 489
 490                             table_size =
 491                               _NL_CURRENT_WORD (LC_COLLATE,
 492                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 493                             symb_table = (const int32_t *)
 494                               _NL_CURRENT (LC_COLLATE,
 495                                            _NL_COLLATE_SYMB_TABLEMB);
 496                             extra = (const unsigned char *)
 497                               _NL_CURRENT (LC_COLLATE,
 498                                            _NL_COLLATE_SYMB_EXTRAMB);
 499
 500                             /* Locate the character in the hashing table.  */
 501                             hash = elem_hash (str, c1);
 502
 503                             idx = 0;
 504                             elem = hash % table_size;
 505                             if (symb_table[2 * elem] != 0)
 506                               {
 507                                 second = hash % (table_size - 2) + 1;
 508
 509                                 do
 510                                   {
 511                                     /* First compare the hashing value.  */
 512                                     if (symb_table[2 * elem] == hash
 513                                         && (c1
 514                                             == extra[symb_table[2 * elem + 1]])
 515                                         && memcmp (str,
 516                                                    &extra[symb_table[2 * elem
 517                                                                      + 1]
 518                                                           + 1], c1) == 0)
 519                                       {
 520                                         /* Yep, this is the entry.  */
 521                                         idx = symb_table[2 * elem + 1];
 522                                         idx += 1 + extra[idx];
 523                                         break;
 524                                       }
 525
 526                                     /* Next entry.  */
 527                                     elem += second;
 528                                   }
 529                                 while (symb_table[2 * elem] != 0);
 530                               }
 531
 532                             if (symb_table[2 * elem] != 0)
 533                               {
 534                                 /* Compare the byte sequence but only if
 535                                    this is not part of a range.  */
 536 # ifdef WIDE_CHAR_VERSION
 537                                 int32_t *wextra;
 538
 539                                 idx += 1 + extra[idx];
 540                                 /* Adjust for the alignment.  */
 541                                 idx = (idx + 3) & ~3;
 542
 543                                 wextra = (int32_t *) &extra[idx + 4];
 544 # endif
 545
 546                                 if (! is_range)
 547                                   {
 548 # ifdef WIDE_CHAR_VERSION
 549                                     for (c1 = 0;
 550                                          (int32_t) c1 < wextra[idx];
 551                                          ++c1)
 552                                       if (n[c1] != wextra[1 + c1])
 553                                         break;
 554
 555                                     if ((int32_t) c1 == wextra[idx])
 556                                       goto matched;
 557 # else
 558                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 559                                       if (n[c1] != extra[1 + c1])
 560                                         break;
 561
 562                                     if (c1 == extra[idx])
 563                                       goto matched;
 564 # endif
 565                                   }
 566
 567                                 /* Get the collation sequence value.  */
 568                                 is_seqval = 1;
 569 # ifdef WIDE_CHAR_VERSION
 570                                 cold = wextra[1 + wextra[idx]];
 571 # else
 572                                 /* Adjust for the alignment.  */
 573                                 idx += 1 + extra[idx];
 574                                 idx = (idx + 3) & ~4;
 575                                 cold = *((int32_t *) &extra[idx]);
 576 # endif
 577
 578                                 c = *p++;
 579                               }
 580                             else if (c1 == 1)
 581                               {
 582                                 /* No valid character.  Match it as a
 583                                    single byte.  */
 584                                 if (!is_range && *n == str[0])
 585                                   goto matched;
 586
 587                                 cold = str[0];
 588                                 c = *p++;
 589                               }
 590                             else
 591                               return FNM_NOMATCH;
 592                           }
 593                       }
 594                     else
 595 # undef str
 596 #endif
 597                       {
 598                         c = FOLD (c);
 599                       normal_bracket:
 600
 601                         /* We have to handling the symbols differently in
 602                            ranges since then the collation sequence is
 603                            important.  */
 604                         is_range = (*p == L('-') && p[1] != L('\0')
 605                                     && p[1] != L(']'));
 606
 607                         if (!is_range && c == fn)
 608                           goto matched;
 609
 610                         /* This is needed if we goto normal_bracket; from
 611                            outside of is_seqval's scope.  */
 612                         is_seqval = 0;
 613                         cold = c;
 614                         c = *p++;
 615                       }
 616
 617                     if (c == L('-') && *p != L(']'))
 618                       {
 619 #if _LIBC
 620                         /* We have to find the collation sequence
 621                            value for C.  Collation sequence is nothing
 622                            we can regularly access.  The sequence
 623                            value is defined by the order in which the
 624                            definitions of the collation values for the
 625                            various characters appear in the source
 626                            file.  A strange concept, nowhere
 627                            documented.  */
 628                         uint32_t fcollseq;
 629                         uint32_t lcollseq;
 630                         UCHAR cend = *p++;
 631
 632 # ifdef WIDE_CHAR_VERSION
 633                         /* Search in the `names' array for the characters.  */
 634                         fcollseq = __collseq_table_lookup (collseq, fn);
 635                         if (fcollseq == ~((uint32_t) 0))
 636                           /* XXX We don't know anything about the character
 637                              we are supposed to match.  This means we are
 638                              failing.  */
 639                           goto range_not_matched;
 640
 641                         if (is_seqval)
 642                           lcollseq = cold;
 643                         else
 644                           lcollseq = __collseq_table_lookup (collseq, cold);
 645 # else
 646                         fcollseq = collseq[fn];
 647                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 648 # endif
 649
 650                         is_seqval = 0;
 651                         if (cend == L('[') && *p == L('.'))
 652                           {
 653                             uint32_t nrules =
 654                               _NL_CURRENT_WORD (LC_COLLATE,
 655                                                 _NL_COLLATE_NRULES);
 656                             const CHAR *startp = p;
 657                             size_t c1 = 0;
 658
 659                             while (1)
 660                               {
 661                                 c = *++p;
 662                                 if (c == L('.') && p[1] == L(']'))
 663                                   {
 664                                     p += 2;
 665                                     break;
 666                                   }
 667                                 if (c == '\0')
 668                                   return FNM_NOMATCH;
 669                                 ++c1;
 670                               }
 671
 672                             if (nrules == 0)
 673                               {
 674                                 /* There are no names defined in the
 675                                    collation data.  Therefore we only
 676                                    accept the trivial names consisting
 677                                    of the character itself.  */
 678                                 if (c1 != 1)
 679                                   return FNM_NOMATCH;
 680
 681                                 cend = startp[1];
 682                               }
 683                             else
 684                               {
 685                                 int32_t table_size;
 686                                 const int32_t *symb_table;
 687 # ifdef WIDE_CHAR_VERSION
 688                                 char str[c1];
 689                                 unsigned int strcnt;
 690 # else
 691 #  define str (startp + 1)
 692 # endif
 693                                 const unsigned char *extra;
 694                                 int32_t idx;
 695                                 int32_t elem;
 696                                 int32_t second;
 697                                 int32_t hash;
 698
 699 # ifdef WIDE_CHAR_VERSION
 700                                 /* We have to convert the name to a single-byte
 701                                    string.  This is possible since the names
 702                                    consist of ASCII characters and the internal
 703                                    representation is UCS4.  */
 704                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 705                                   str[strcnt] = startp[1 + strcnt];
 706 # endif
 707
 708                                 table_size =
 709                                   _NL_CURRENT_WORD (LC_COLLATE,
 710                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 711                                 symb_table = (const int32_t *)
 712                                   _NL_CURRENT (LC_COLLATE,
 713                                                _NL_COLLATE_SYMB_TABLEMB);
 714                                 extra = (const unsigned char *)
 715                                   _NL_CURRENT (LC_COLLATE,
 716                                                _NL_COLLATE_SYMB_EXTRAMB);
 717
 718                                 /* Locate the character in the hashing
 719                                    table.  */
 720                                 hash = elem_hash (str, c1);
 721
 722                                 idx = 0;
 723                                 elem = hash % table_size;
 724                                 if (symb_table[2 * elem] != 0)
 725                                   {
 726                                     second = hash % (table_size - 2) + 1;
 727
 728                                     do
 729                                       {
 730                                         /* First compare the hashing value.  */
 731                                         if (symb_table[2 * elem] == hash
 732                                             && (c1
 733                                                 == extra[symb_table[2 * elem + 1]])
 734                                             && memcmp (str,
 735                                                        &extra[symb_table[2 * elem + 1]
 736                                                               + 1], c1) == 0)
 737                                           {
 738                                             /* Yep, this is the entry.  */
 739                                             idx = symb_table[2 * elem + 1];
 740                                             idx += 1 + extra[idx];
 741                                             break;
 742                                           }
 743
 744                                         /* Next entry.  */
 745                                         elem += second;
 746                                       }
 747                                     while (symb_table[2 * elem] != 0);
 748                                   }
 749
 750                                 if (symb_table[2 * elem] != 0)
 751                                   {
 752                                     /* Compare the byte sequence but only if
 753                                        this is not part of a range.  */
 754 # ifdef WIDE_CHAR_VERSION
 755                                     int32_t *wextra;
 756
 757                                     idx += 1 + extra[idx];
 758                                     /* Adjust for the alignment.  */
 759                                     idx = (idx + 3) & ~4;
 760
 761                                     wextra = (int32_t *) &extra[idx + 4];
 762 # endif
 763                                     /* Get the collation sequence value.  */
 764                                     is_seqval = 1;
 765 # ifdef WIDE_CHAR_VERSION
 766                                     cend = wextra[1 + wextra[idx]];
 767 # else
 768                                     /* Adjust for the alignment.  */
 769                                     idx += 1 + extra[idx];
 770                                     idx = (idx + 3) & ~4;
 771                                     cend = *((int32_t *) &extra[idx]);
 772 # endif
 773                                   }
 774                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 775                                   {
 776                                     cend = str[0];
 777                                     c = *p++;
 778                                   }
 779                                 else
 780                                   return FNM_NOMATCH;
 781                               }
 782 # undef str
 783                           }
 784                         else
 785                           {
 786                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 787                               cend = *p++;
 788                             if (cend == L('\0'))
 789                               return FNM_NOMATCH;
 790                             cend = FOLD (cend);
 791                           }
 792
 793                         /* XXX It is not entirely clear to me how to handle
 794                            characters which are not mentioned in the
 795                            collation specification.  */
 796                         if (
 797 # ifdef WIDE_CHAR_VERSION
 798                             lcollseq == 0xffffffff ||
 799 # endif
 800                             lcollseq <= fcollseq)
 801                           {
 802                             /* We have to look at the upper bound.  */
 803                             uint32_t hcollseq;
 804
 805                             if (is_seqval)
 806                               hcollseq = cend;
 807                             else
 808                               {
 809 # ifdef WIDE_CHAR_VERSION
 810                                 hcollseq =
 811                                   __collseq_table_lookup (collseq, cend);
 812                                 if (hcollseq == ~((uint32_t) 0))
 813                                   {
 814                                     /* Hum, no information about the upper
 815                                        bound.  The matching succeeds if the
 816                                        lower bound is matched exactly.  */
 817                                     if (lcollseq != fcollseq)
 818                                       goto range_not_matched;
 819
 820                                     goto matched;
 821                                   }
 822 # else
 823                                 hcollseq = collseq[cend];
 824 # endif
 825                               }
 826
 827                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 828                               goto matched;
 829                           }
 830 # ifdef WIDE_CHAR_VERSION
 831                       range_not_matched:
 832 # endif
 833 #else
 834                         /* We use a boring value comparison of the character
 835                            values.  This is better than comparing using
 836                            `strcoll' since the latter would have surprising
 837                            and sometimes fatal consequences.  */
 838                         UCHAR cend = *p++;
 839
 840                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 841                           cend = *p++;
 842                         if (cend == L('\0'))
 843                           return FNM_NOMATCH;
 844
 845                         /* It is a range.  */
 846                         if (cold <= fn && fn <= cend)
 847                           goto matched;
 848 #endif
 849
 850                         c = *p++;
 851                       }
 852                   }
 853
 854                 if (c == L(']'))
 855                   break;
 856               }
 857
 858             if (!not)
 859               return FNM_NOMATCH;
 860             break;
 861
 862           matched:
 863             /* Skip the rest of the [...] that already matched.  */
 864             do
 865               {
 866               ignore_next:
 867                 c = *p++;
 868
 869                 if (c == L('\0'))
 870                   /* [... (unterminated) loses.  */
 871                   return FNM_NOMATCH;
 872
 873                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 874                   {
 875                     if (*p == L('\0'))
 876                       return FNM_NOMATCH;
 877                     /* XXX 1003.2d11 is unclear if this is right.  */
 878                     ++p;
 879                   }
 880                 else if (c == L('[') && *p == L(':'))
 881                   {
 882                     int c1 = 0;
 883                     const CHAR *startp = p;
 884
 885                     while (1)
 886                       {
 887                         c = *++p;
 888                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 889                           return FNM_NOMATCH;
 890
 891                         if (*p == L(':') && p[1] == L(']'))
 892                           break;
 893
 894                         if (c < L('a') || c >= L('z'))
 895                           {
 896                             p = startp;
 897                             goto ignore_next;
 898                           }
 899                       }
 900                     p += 2;
 901                     c = *p++;
 902                   }
 903                 else if (c == L('[') && *p == L('='))
 904                   {
 905                     c = *++p;
 906                     if (c == L('\0'))
 907                       return FNM_NOMATCH;
 908                     c = *++p;
 909                     if (c != L('=') || p[1] != L(']'))
 910                       return FNM_NOMATCH;
 911                     p += 2;
 912                     c = *p++;
 913                   }
 914                 else if (c == L('[') && *p == L('.'))
 915                   {
 916                     ++p;
 917                     while (1)
 918                       {
 919                         c = *++p;
 920                         if (c == '\0')
 921                           return FNM_NOMATCH;
 922
 923                         if (*p == L('.') && p[1] == L(']'))
 924                           break;
 925                       }
 926                     p += 2;
 927                     c = *p++;
 928                   }
 929               }
 930             while (c != L(']'));
 931             if (not)
 932               return FNM_NOMATCH;
 933           }
 934           break;
 935
 936         case L('+'):
 937         case L('@'):
 938         case L('!'):
 939           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 940             {
 941               int res;
 942
 943               res = EXT (c, p, n, string_end, no_leading_period, flags);
 944               if (res != -1)
 945                 return res;
 946             }
 947           goto normal_match;
 948
 949         case L('/'):
 950           if (NO_LEADING_PERIOD (flags))
 951             {
 952               if (n == string_end || c != (UCHAR) *n)
 953                 return FNM_NOMATCH;
 954
 955               new_no_leading_period = 1;
 956               break;
 957             }
 958           /* FALLTHROUGH */
 959         default:
 960         normal_match:
 961           if (n == string_end || c != FOLD ((UCHAR) *n))
 962             return FNM_NOMATCH;
 963         }
 964
 965       no_leading_period = new_no_leading_period;
 966       ++n;
 967     }
 968
 969   if (n == string_end)
 970     return 0;
 971
 972   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 973     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 974     return 0;
 975
 976   return FNM_NOMATCH;
 977 }
 978
 979
 980 static const CHAR *
 981 internal_function
 982 END (const CHAR *pattern)
 983 {
 984   const CHAR *p = pattern;
 985
 986   while (1)
 987     if (*++p == L('\0'))
 988       /* This is an invalid pattern.  */
 989       return pattern;
 990     else if (*p == L('['))
 991       {
 992         /* Handle brackets special.  */
 993         if (posixly_correct == 0)
 994           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 995
 996         /* Skip the not sign.  We have to recognize it because of a possibly
 997            following ']'.  */
 998         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
 999           ++p;
1000         /* A leading ']' is recognized as such.  */
1001         if (*p == L(']'))
1002           ++p;
1003         /* Skip over all characters of the list.  */
1004         while (*p != L(']'))
1005           if (*p++ == L('\0'))
1006             /* This is no valid pattern.  */
1007             return pattern;
1008       }
1009     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1010               || *p == L('!')) && p[1] == L('('))
1011       p = END (p + 1);
1012     else if (*p == L(')'))
1013       break;
1014
1015   return p + 1;
1016 }
1017
1018
1019 static int
1020 internal_function
1021 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1022      int no_leading_period, int flags)
1023 {
1024   const CHAR *startp;
1025   int level;
1026   struct patternlist
1027   {
1028     struct patternlist *next;
1029     CHAR str[0];
1030   } *list = NULL;
1031   struct patternlist **lastp = &list;
1032   size_t pattern_len = STRLEN (pattern);
1033   const CHAR *p;
1034   const CHAR *rs;
1035
1036   /* Parse the pattern.  Store the individual parts in the list.  */
1037   level = 0;
1038   for (startp = p = pattern + 1; level >= 0; ++p)
1039     if (*p == L('\0'))
1040       /* This is an invalid pattern.  */
1041       return -1;
1042     else if (*p == L('['))
1043       {
1044         /* Handle brackets special.  */
1045         if (posixly_correct == 0)
1046           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1047
1048         /* Skip the not sign.  We have to recognize it because of a possibly
1049            following ']'.  */
1050         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1051           ++p;
1052         /* A leading ']' is recognized as such.  */
1053         if (*p == L(']'))
1054           ++p;
1055         /* Skip over all characters of the list.  */
1056         while (*p != L(']'))
1057           if (*p++ == L('\0'))
1058             /* This is no valid pattern.  */
1059             return -1;
1060       }
1061     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1062               || *p == L('!')) && p[1] == L('('))
1063       /* Remember the nesting level.  */
1064       ++level;
1065     else if (*p == L(')'))
1066       {
1067         if (level-- == 0)
1068           {
1069             /* This means we found the end of the pattern.  */
1070 #define NEW_PATTERN \
1071             struct patternlist *newp;                                         \
1072                                                                               \
1073             if (opt == L('?') || opt == L('@'))                               \
1074               newp = alloca (sizeof (struct patternlist)                      \
1075                              + (pattern_len * sizeof (CHAR)));                \
1076             else                                                              \
1077               newp = alloca (sizeof (struct patternlist)                      \
1078                              + ((p - startp + 1) * sizeof (CHAR)));           \
1079             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1080             newp->next = NULL;                                                \
1081             *lastp = newp;                                                    \
1082             lastp = &newp->next
1083             NEW_PATTERN;
1084           }
1085       }
1086     else if (*p == L('|'))
1087       {
1088         if (level == 0)
1089           {
1090             NEW_PATTERN;
1091             startp = p + 1;
1092           }
1093       }
1094   assert (list != NULL);
1095   assert (p[-1] == L(')'));
1096 #undef NEW_PATTERN
1097
1098   switch (opt)
1099     {
1100     case L('*'):
1101       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1102         return 0;
1103       /* FALLTHROUGH */
1104
1105     case L('+'):
1106       do
1107         {
1108           for (rs = string; rs <= string_end; ++rs)
1109             /* First match the prefix with the current pattern with the
1110                current pattern.  */
1111             if (FCT (list->str, string, rs, no_leading_period,
1112                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1113                 /* This was successful.  Now match the rest with the rest
1114                    of the pattern.  */
1115                 && (FCT (p, rs, string_end,
1116                          rs == string
1117                          ? no_leading_period
1118                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1119                          flags & FNM_FILE_NAME
1120                          ? flags : flags & ~FNM_PERIOD) == 0
1121                     /* This didn't work.  Try the whole pattern.  */
1122                     || (rs != string
1123                         && FCT (pattern - 1, rs, string_end,
1124                                 rs == string
1125                                 ? no_leading_period
1126                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1127                                    ? 1 : 0),
1128                                 flags & FNM_FILE_NAME
1129                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1130               /* It worked.  Signal success.  */
1131               return 0;
1132         }
1133       while ((list = list->next) != NULL);
1134
1135       /* None of the patterns lead to a match.  */
1136       return FNM_NOMATCH;
1137
1138     case L('?'):
1139       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1140         return 0;
1141       /* FALLTHROUGH */
1142
1143     case L('@'):
1144       do
1145         /* I cannot believe it but `strcat' is actually acceptable
1146            here.  Match the entire string with the prefix from the
1147            pattern list and the rest of the pattern following the
1148            pattern list.  */
1149         if (FCT (STRCAT (list->str, p), string, string_end,
1150                  no_leading_period,
1151                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1152           /* It worked.  Signal success.  */
1153           return 0;
1154       while ((list = list->next) != NULL);
1155
1156       /* None of the patterns lead to a match.  */
1157       return FNM_NOMATCH;
1158
1159     case L('!'):
1160       for (rs = string; rs <= string_end; ++rs)
1161         {
1162           struct patternlist *runp;
1163
1164           for (runp = list; runp != NULL; runp = runp->next)
1165             if (FCT (runp->str, string, rs,  no_leading_period,
1166                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1167               break;
1168
1169           /* If none of the patterns matched see whether the rest does.  */
1170           if (runp == NULL
1171               && (FCT (p, rs, string_end,
1172                        rs == string
1173                        ? no_leading_period
1174                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1175                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1176                   == 0))
1177             /* This is successful.  */
1178             return 0;
1179         }
1180
1181       /* None of the patterns together with the rest of the pattern
1182          lead to a match.  */
1183       return FNM_NOMATCH;
1184
1185     default:
1186       assert (! "Invalid extended matching operator");
1187       break;
1188     }
1189
1190   return -1;
1191 }
1192
1193
1194 #undef FOLD
1195 #undef CHAR
1196 #undef UCHAR
1197 #undef INT
1198 #undef FCT
1199 #undef EXT
1200 #undef END
1201 #undef MEMPCPY
1202 #undef MEMCHR
1203 #undef STRCOLL
1204 #undef STRLEN
1205 #undef STRCAT
1206 #undef L
1207 #undef BTOWC