posix/fnmatch_loop.c

   1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004
   2         Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 /* Match STRING against the filename pattern PATTERN, returning zero if
  21    it matches, nonzero if not.  */
  22 static int FCT (const CHAR *pattern, const CHAR *string,
  23                 const CHAR *string_end, int no_leading_period, int flags)
  24      internal_function;
  25 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  26                 const CHAR *string_end, int no_leading_period, int flags)
  27      internal_function;
  28 static const CHAR *END (const CHAR *patternp) internal_function;
  29
  30 static int
  31 internal_function
  32 FCT (pattern, string, string_end, no_leading_period, flags)
  33      const CHAR *pattern;
  34      const CHAR *string;
  35      const CHAR *string_end;
  36      int no_leading_period;
  37      int flags;
  38 {
  39   register const CHAR *p = pattern, *n = string;
  40   register UCHAR c;
  41 #ifdef _LIBC
  42 # if WIDE_CHAR_VERSION
  43   const char *collseq = (const char *)
  44     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  45 # else
  46   const UCHAR *collseq = (const UCHAR *)
  47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  48 # endif
  49 #endif
  50
  51   while ((c = *p++) != L('\0'))
  52     {
  53       int new_no_leading_period = 0;
  54       c = FOLD (c);
  55
  56       switch (c)
  57         {
  58         case L('?'):
  59           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  60             {
  61               int res;
  62
  63               res = EXT (c, p, n, string_end, no_leading_period,
  64                          flags);
  65               if (res != -1)
  66                 return res;
  67             }
  68
  69           if (n == string_end)
  70             return FNM_NOMATCH;
  71           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  72             return FNM_NOMATCH;
  73           else if (*n == L('.') && no_leading_period)
  74             return FNM_NOMATCH;
  75           break;
  76
  77         case L('\\'):
  78           if (!(flags & FNM_NOESCAPE))
  79             {
  80               c = *p++;
  81               if (c == L('\0'))
  82                 /* Trailing \ loses.  */
  83                 return FNM_NOMATCH;
  84               c = FOLD (c);
  85             }
  86           if (n == string_end || FOLD ((UCHAR) *n) != c)
  87             return FNM_NOMATCH;
  88           break;
  89
  90         case L('*'):
  91           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  92             {
  93               int res;
  94
  95               res = EXT (c, p, n, string_end, no_leading_period,
  96                          flags);
  97               if (res != -1)
  98                 return res;
  99             }
 100
 101           if (n != string_end && *n == L('.') && no_leading_period)
 102             return FNM_NOMATCH;
 103
 104           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 105             {
 106               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 107                 {
 108                   const CHAR *endp = END (p);
 109                   if (endp != p)
 110                     {
 111                       /* This is a pattern.  Skip over it.  */
 112                       p = endp;
 113                       continue;
 114                     }
 115                 }
 116
 117               if (c == L('?'))
 118                 {
 119                   /* A ? needs to match one character.  */
 120                   if (n == string_end)
 121                     /* There isn't another character; no match.  */
 122                     return FNM_NOMATCH;
 123                   else if (*n == L('/')
 124                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 125                     /* A slash does not match a wildcard under
 126                        FNM_FILE_NAME.  */
 127                     return FNM_NOMATCH;
 128                   else
 129                     /* One character of the string is consumed in matching
 130                        this ? wildcard, so *??? won't match if there are
 131                        less than three characters.  */
 132                     ++n;
 133                 }
 134             }
 135
 136           if (c == L('\0'))
 137             /* The wildcard(s) is/are the last element of the pattern.
 138                If the name is a file name and contains another slash
 139                this means it cannot match, unless the FNM_LEADING_DIR
 140                flag is set.  */
 141             {
 142               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 143
 144               if (flags & FNM_FILE_NAME)
 145                 {
 146                   if (flags & FNM_LEADING_DIR)
 147                     result = 0;
 148                   else
 149                     {
 150                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 151                         result = 0;
 152                     }
 153                 }
 154
 155               return result;
 156             }
 157           else
 158             {
 159               const CHAR *endp;
 160
 161               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 162                              string_end - n);
 163               if (endp == NULL)
 164                 endp = string_end;
 165
 166               if (c == L('[')
 167                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 168                       && (c == L('@') || c == L('+') || c == L('!'))
 169                       && *p == L('(')))
 170                 {
 171                   int flags2 = ((flags & FNM_FILE_NAME)
 172                                 ? flags : (flags & ~FNM_PERIOD));
 173                   int no_leading_period2 = no_leading_period;
 174
 175                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 176                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 177                         == 0)
 178                       return 0;
 179                 }
 180               else if (c == L('/') && (flags & FNM_FILE_NAME))
 181                 {
 182                   while (n < string_end && *n != L('/'))
 183                     ++n;
 184                   if (n < string_end && *n == L('/')
 185                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 186                           == 0))
 187                     return 0;
 188                 }
 189               else
 190                 {
 191                   int flags2 = ((flags & FNM_FILE_NAME)
 192                                 ? flags : (flags & ~FNM_PERIOD));
 193                   int no_leading_period2 = no_leading_period;
 194
 195                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 196                     c = *p;
 197                   c = FOLD (c);
 198                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 199                     if (FOLD ((UCHAR) *n) == c
 200                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 201                             == 0))
 202                       return 0;
 203                 }
 204             }
 205
 206           /* If we come here no match is possible with the wildcard.  */
 207           return FNM_NOMATCH;
 208
 209         case L('['):
 210           {
 211             /* Nonzero if the sense of the character class is inverted.  */
 212             register int not;
 213             CHAR cold;
 214             UCHAR fn;
 215
 216             if (posixly_correct == 0)
 217               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 218
 219             if (n == string_end)
 220               return FNM_NOMATCH;
 221
 222             if (*n == L('.') && no_leading_period)
 223               return FNM_NOMATCH;
 224
 225             if (*n == L('/') && (flags & FNM_FILE_NAME))
 226               /* `/' cannot be matched.  */
 227               return FNM_NOMATCH;
 228
 229             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 230             if (not)
 231               ++p;
 232
 233             fn = FOLD ((UCHAR) *n);
 234
 235             c = *p++;
 236             for (;;)
 237               {
 238                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 239                   {
 240                     if (*p == L('\0'))
 241                       return FNM_NOMATCH;
 242                     c = FOLD ((UCHAR) *p);
 243                     ++p;
 244
 245                     goto normal_bracket;
 246                   }
 247                 else if (c == L('[') && *p == L(':'))
 248                   {
 249                     /* Leave room for the null.  */
 250                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 251                     size_t c1 = 0;
 252 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 253                     wctype_t wt;
 254 #endif
 255                     const CHAR *startp = p;
 256
 257                     for (;;)
 258                       {
 259                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 260                           /* The name is too long and therefore the pattern
 261                              is ill-formed.  */
 262                           return FNM_NOMATCH;
 263
 264                         c = *++p;
 265                         if (c == L(':') && p[1] == L(']'))
 266                           {
 267                             p += 2;
 268                             break;
 269                           }
 270                         if (c < L('a') || c >= L('z'))
 271                           {
 272                             /* This cannot possibly be a character class name.
 273                                Match it as a normal range.  */
 274                             p = startp;
 275                             c = L('[');
 276                             goto normal_bracket;
 277                           }
 278                         str[c1++] = c;
 279                       }
 280                     str[c1] = L('\0');
 281
 282 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 283                     wt = IS_CHAR_CLASS (str);
 284                     if (wt == 0)
 285                       /* Invalid character class name.  */
 286                       return FNM_NOMATCH;
 287
 288 # if defined _LIBC && ! WIDE_CHAR_VERSION
 289                     /* The following code is glibc specific but does
 290                        there a good job in speeding up the code since
 291                        we can avoid the btowc() call.  */
 292                     if (_ISCTYPE ((UCHAR) *n, wt))
 293                       goto matched;
 294 # else
 295                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 296                       goto matched;
 297 # endif
 298 #else
 299                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 300                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 301                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 302                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 303                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 304                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 305                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 306                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 307                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 308                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 309                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 310                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 311                       goto matched;
 312 #endif
 313                     c = *p++;
 314                   }
 315 #ifdef _LIBC
 316                 else if (c == L('[') && *p == L('='))
 317                   {
 318                     UCHAR str[1];
 319                     uint32_t nrules =
 320                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 321                     const CHAR *startp = p;
 322
 323                     c = *++p;
 324                     if (c == L('\0'))
 325                       {
 326                         p = startp;
 327                         c = L('[');
 328                         goto normal_bracket;
 329                       }
 330                     str[0] = c;
 331
 332                     c = *++p;
 333                     if (c != L('=') || p[1] != L(']'))
 334                       {
 335                         p = startp;
 336                         c = L('[');
 337                         goto normal_bracket;
 338                       }
 339                     p += 2;
 340
 341                     if (nrules == 0)
 342                       {
 343                         if ((UCHAR) *n == str[0])
 344                           goto matched;
 345                       }
 346                     else
 347                       {
 348                         const int32_t *table;
 349 # if WIDE_CHAR_VERSION
 350                         const int32_t *weights;
 351                         const int32_t *extra;
 352 # else
 353                         const unsigned char *weights;
 354                         const unsigned char *extra;
 355 # endif
 356                         const int32_t *indirect;
 357                         int32_t idx;
 358                         const UCHAR *cp = (const UCHAR *) str;
 359
 360                         /* This #include defines a local function!  */
 361 # if WIDE_CHAR_VERSION
 362 #  include <locale/weightwc.h>
 363 # else
 364 #  include <locale/weight.h>
 365 # endif
 366
 367 # if WIDE_CHAR_VERSION
 368                         table = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 370                         weights = (const int32_t *)
 371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 372                         extra = (const int32_t *)
 373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 374                         indirect = (const int32_t *)
 375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 376 # else
 377                         table = (const int32_t *)
 378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 379                         weights = (const unsigned char *)
 380                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 381                         extra = (const unsigned char *)
 382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 383                         indirect = (const int32_t *)
 384                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 385 # endif
 386
 387                         idx = findidx (&cp);
 388                         if (idx != 0)
 389                           {
 390                             /* We found a table entry.  Now see whether the
 391                                character we are currently at has the same
 392                                equivalance class value.  */
 393                             int len = weights[idx];
 394                             int32_t idx2;
 395                             const UCHAR *np = (const UCHAR *) n;
 396
 397                             idx2 = findidx (&np);
 398                             if (idx2 != 0 && len == weights[idx2])
 399                               {
 400                                 int cnt = 0;
 401
 402                                 while (cnt < len
 403                                        && (weights[idx + 1 + cnt]
 404                                            == weights[idx2 + 1 + cnt]))
 405                                   ++cnt;
 406
 407                                 if (cnt == len)
 408                                   goto matched;
 409                               }
 410                           }
 411                       }
 412
 413                     c = *p++;
 414                   }
 415 #endif
 416                 else if (c == L('\0'))
 417                   /* [ (unterminated) loses.  */
 418                   return FNM_NOMATCH;
 419                 else
 420                   {
 421                     int is_range = 0;
 422
 423 #ifdef _LIBC
 424                     int is_seqval = 0;
 425
 426                     if (c == L('[') && *p == L('.'))
 427                       {
 428                         uint32_t nrules =
 429                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 430                         const CHAR *startp = p;
 431                         size_t c1 = 0;
 432
 433                         while (1)
 434                           {
 435                             c = *++p;
 436                             if (c == L('.') && p[1] == L(']'))
 437                               {
 438                                 p += 2;
 439                                 break;
 440                               }
 441                             if (c == '\0')
 442                               return FNM_NOMATCH;
 443                             ++c1;
 444                           }
 445
 446                         /* We have to handling the symbols differently in
 447                            ranges since then the collation sequence is
 448                            important.  */
 449                         is_range = *p == L('-') && p[1] != L('\0');
 450
 451                         if (nrules == 0)
 452                           {
 453                             /* There are no names defined in the collation
 454                                data.  Therefore we only accept the trivial
 455                                names consisting of the character itself.  */
 456                             if (c1 != 1)
 457                               return FNM_NOMATCH;
 458
 459                             if (!is_range && *n == startp[1])
 460                               goto matched;
 461
 462                             cold = startp[1];
 463                             c = *p++;
 464                           }
 465                         else
 466                           {
 467                             int32_t table_size;
 468                             const int32_t *symb_table;
 469 # ifdef WIDE_CHAR_VERSION
 470                             char str[c1];
 471                             unsigned int strcnt;
 472 # else
 473 #  define str (startp + 1)
 474 # endif
 475                             const unsigned char *extra;
 476                             int32_t idx;
 477                             int32_t elem;
 478                             int32_t second;
 479                             int32_t hash;
 480
 481 # ifdef WIDE_CHAR_VERSION
 482                             /* We have to convert the name to a single-byte
 483                                string.  This is possible since the names
 484                                consist of ASCII characters and the internal
 485                                representation is UCS4.  */
 486                             for (strcnt = 0; strcnt < c1; ++strcnt)
 487                               str[strcnt] = startp[1 + strcnt];
 488 #endif
 489
 490                             table_size =
 491                               _NL_CURRENT_WORD (LC_COLLATE,
 492                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 493                             symb_table = (const int32_t *)
 494                               _NL_CURRENT (LC_COLLATE,
 495                                            _NL_COLLATE_SYMB_TABLEMB);
 496                             extra = (const unsigned char *)
 497                               _NL_CURRENT (LC_COLLATE,
 498                                            _NL_COLLATE_SYMB_EXTRAMB);
 499
 500                             /* Locate the character in the hashing table.  */
 501                             hash = elem_hash (str, c1);
 502
 503                             idx = 0;
 504                             elem = hash % table_size;
 505                             second = hash % (table_size - 2);
 506                             while (symb_table[2 * elem] != 0)
 507                               {
 508                                 /* First compare the hashing value.  */
 509                                 if (symb_table[2 * elem] == hash
 510                                     && c1 == extra[symb_table[2 * elem + 1]]
 511                                     && memcmp (str,
 512                                                &extra[symb_table[2 * elem + 1]
 513                                                      + 1], c1) == 0)
 514                                   {
 515                                     /* Yep, this is the entry.  */
 516                                     idx = symb_table[2 * elem + 1];
 517                                     idx += 1 + extra[idx];
 518                                     break;
 519                                   }
 520
 521                                 /* Next entry.  */
 522                                 elem += second;
 523                               }
 524
 525                             if (symb_table[2 * elem] != 0)
 526                               {
 527                                 /* Compare the byte sequence but only if
 528                                    this is not part of a range.  */
 529 # ifdef WIDE_CHAR_VERSION
 530                                 int32_t *wextra;
 531
 532                                 idx += 1 + extra[idx];
 533                                 /* Adjust for the alignment.  */
 534                                 idx = (idx + 3) & ~3;
 535
 536                                 wextra = (int32_t *) &extra[idx + 4];
 537 # endif
 538
 539                                 if (! is_range)
 540                                   {
 541 # ifdef WIDE_CHAR_VERSION
 542                                     for (c1 = 0;
 543                                          (int32_t) c1 < wextra[idx];
 544                                          ++c1)
 545                                       if (n[c1] != wextra[1 + c1])
 546                                         break;
 547
 548                                     if ((int32_t) c1 == wextra[idx])
 549                                       goto matched;
 550 # else
 551                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 552                                       if (n[c1] != extra[1 + c1])
 553                                         break;
 554
 555                                     if (c1 == extra[idx])
 556                                       goto matched;
 557 # endif
 558                                   }
 559
 560                                 /* Get the collation sequence value.  */
 561                                 is_seqval = 1;
 562 # ifdef WIDE_CHAR_VERSION
 563                                 cold = wextra[1 + wextra[idx]];
 564 # else
 565                                 /* Adjust for the alignment.  */
 566                                 idx += 1 + extra[idx];
 567                                 idx = (idx + 3) & ~4;
 568                                 cold = *((int32_t *) &extra[idx]);
 569 # endif
 570
 571                                 c = *p++;
 572                               }
 573                             else if (c1 == 1)
 574                               {
 575                                 /* No valid character.  Match it as a
 576                                    single byte.  */
 577                                 if (!is_range && *n == str[0])
 578                                   goto matched;
 579
 580                                 cold = str[0];
 581                                 c = *p++;
 582                               }
 583                             else
 584                               return FNM_NOMATCH;
 585                           }
 586                       }
 587                     else
 588 # undef str
 589 #endif
 590                       {
 591                         c = FOLD (c);
 592                       normal_bracket:
 593
 594                         /* We have to handling the symbols differently in
 595                            ranges since then the collation sequence is
 596                            important.  */
 597                         is_range = (*p == L('-') && p[1] != L('\0')
 598                                     && p[1] != L(']'));
 599
 600                         if (!is_range && c == fn)
 601                           goto matched;
 602
 603                         /* This is needed if we goto normal_bracket; from
 604                            outside of is_seqval's scope.  */
 605                         is_seqval = 0;
 606                         cold = c;
 607                         c = *p++;
 608                       }
 609
 610                     if (c == L('-') && *p != L(']'))
 611                       {
 612 #if _LIBC
 613                         /* We have to find the collation sequence
 614                            value for C.  Collation sequence is nothing
 615                            we can regularly access.  The sequence
 616                            value is defined by the order in which the
 617                            definitions of the collation values for the
 618                            various characters appear in the source
 619                            file.  A strange concept, nowhere
 620                            documented.  */
 621                         uint32_t fcollseq;
 622                         uint32_t lcollseq;
 623                         UCHAR cend = *p++;
 624
 625 # ifdef WIDE_CHAR_VERSION
 626                         /* Search in the `names' array for the characters.  */
 627                         fcollseq = __collseq_table_lookup (collseq, fn);
 628                         if (fcollseq == ~((uint32_t) 0))
 629                           /* XXX We don't know anything about the character
 630                              we are supposed to match.  This means we are
 631                              failing.  */
 632                           goto range_not_matched;
 633
 634                         if (is_seqval)
 635                           lcollseq = cold;
 636                         else
 637                           lcollseq = __collseq_table_lookup (collseq, cold);
 638 # else
 639                         fcollseq = collseq[fn];
 640                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 641 # endif
 642
 643                         is_seqval = 0;
 644                         if (cend == L('[') && *p == L('.'))
 645                           {
 646                             uint32_t nrules =
 647                               _NL_CURRENT_WORD (LC_COLLATE,
 648                                                 _NL_COLLATE_NRULES);
 649                             const CHAR *startp = p;
 650                             size_t c1 = 0;
 651
 652                             while (1)
 653                               {
 654                                 c = *++p;
 655                                 if (c == L('.') && p[1] == L(']'))
 656                                   {
 657                                     p += 2;
 658                                     break;
 659                                   }
 660                                 if (c == '\0')
 661                                   return FNM_NOMATCH;
 662                                 ++c1;
 663                               }
 664
 665                             if (nrules == 0)
 666                               {
 667                                 /* There are no names defined in the
 668                                    collation data.  Therefore we only
 669                                    accept the trivial names consisting
 670                                    of the character itself.  */
 671                                 if (c1 != 1)
 672                                   return FNM_NOMATCH;
 673
 674                                 cend = startp[1];
 675                               }
 676                             else
 677                               {
 678                                 int32_t table_size;
 679                                 const int32_t *symb_table;
 680 # ifdef WIDE_CHAR_VERSION
 681                                 char str[c1];
 682                                 unsigned int strcnt;
 683 # else
 684 #  define str (startp + 1)
 685 # endif
 686                                 const unsigned char *extra;
 687                                 int32_t idx;
 688                                 int32_t elem;
 689                                 int32_t second;
 690                                 int32_t hash;
 691
 692 # ifdef WIDE_CHAR_VERSION
 693                                 /* We have to convert the name to a single-byte
 694                                    string.  This is possible since the names
 695                                    consist of ASCII characters and the internal
 696                                    representation is UCS4.  */
 697                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 698                                   str[strcnt] = startp[1 + strcnt];
 699 # endif
 700
 701                                 table_size =
 702                                   _NL_CURRENT_WORD (LC_COLLATE,
 703                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 704                                 symb_table = (const int32_t *)
 705                                   _NL_CURRENT (LC_COLLATE,
 706                                                _NL_COLLATE_SYMB_TABLEMB);
 707                                 extra = (const unsigned char *)
 708                                   _NL_CURRENT (LC_COLLATE,
 709                                                _NL_COLLATE_SYMB_EXTRAMB);
 710
 711                                 /* Locate the character in the hashing
 712                                    table.  */
 713                                 hash = elem_hash (str, c1);
 714
 715                                 idx = 0;
 716                                 elem = hash % table_size;
 717                                 second = hash % (table_size - 2);
 718                                 while (symb_table[2 * elem] != 0)
 719                                   {
 720                                 /* First compare the hashing value.  */
 721                                     if (symb_table[2 * elem] == hash
 722                                         && (c1
 723                                             == extra[symb_table[2 * elem + 1]])
 724                                         && memcmp (str,
 725                                                    &extra[symb_table[2 * elem + 1]
 726                                                          + 1], c1) == 0)
 727                                       {
 728                                         /* Yep, this is the entry.  */
 729                                         idx = symb_table[2 * elem + 1];
 730                                         idx += 1 + extra[idx];
 731                                         break;
 732                                       }
 733
 734                                     /* Next entry.  */
 735                                     elem += second;
 736                                   }
 737
 738                                 if (symb_table[2 * elem] != 0)
 739                                   {
 740                                     /* Compare the byte sequence but only if
 741                                        this is not part of a range.  */
 742 # ifdef WIDE_CHAR_VERSION
 743                                     int32_t *wextra;
 744
 745                                     idx += 1 + extra[idx];
 746                                     /* Adjust for the alignment.  */
 747                                     idx = (idx + 3) & ~4;
 748
 749                                     wextra = (int32_t *) &extra[idx + 4];
 750 # endif
 751                                     /* Get the collation sequence value.  */
 752                                     is_seqval = 1;
 753 # ifdef WIDE_CHAR_VERSION
 754                                     cend = wextra[1 + wextra[idx]];
 755 # else
 756                                     /* Adjust for the alignment.  */
 757                                     idx += 1 + extra[idx];
 758                                     idx = (idx + 3) & ~4;
 759                                     cend = *((int32_t *) &extra[idx]);
 760 # endif
 761                                   }
 762                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 763                                   {
 764                                     cend = str[0];
 765                                     c = *p++;
 766                                   }
 767                                 else
 768                                   return FNM_NOMATCH;
 769                               }
 770 # undef str
 771                           }
 772                         else
 773                           {
 774                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 775                               cend = *p++;
 776                             if (cend == L('\0'))
 777                               return FNM_NOMATCH;
 778                             cend = FOLD (cend);
 779                           }
 780
 781                         /* XXX It is not entirely clear to me how to handle
 782                            characters which are not mentioned in the
 783                            collation specification.  */
 784                         if (
 785 # ifdef WIDE_CHAR_VERSION
 786                             lcollseq == 0xffffffff ||
 787 # endif
 788                             lcollseq <= fcollseq)
 789                           {
 790                             /* We have to look at the upper bound.  */
 791                             uint32_t hcollseq;
 792
 793                             if (is_seqval)
 794                               hcollseq = cend;
 795                             else
 796                               {
 797 # ifdef WIDE_CHAR_VERSION
 798                                 hcollseq =
 799                                   __collseq_table_lookup (collseq, cend);
 800                                 if (hcollseq == ~((uint32_t) 0))
 801                                   {
 802                                     /* Hum, no information about the upper
 803                                        bound.  The matching succeeds if the
 804                                        lower bound is matched exactly.  */
 805                                     if (lcollseq != fcollseq)
 806                                       goto range_not_matched;
 807
 808                                     goto matched;
 809                                   }
 810 # else
 811                                 hcollseq = collseq[cend];
 812 # endif
 813                               }
 814
 815                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 816                               goto matched;
 817                           }
 818 # ifdef WIDE_CHAR_VERSION
 819                       range_not_matched:
 820 # endif
 821 #else
 822                         /* We use a boring value comparison of the character
 823                            values.  This is better than comparing using
 824                            `strcoll' since the latter would have surprising
 825                            and sometimes fatal consequences.  */
 826                         UCHAR cend = *p++;
 827
 828                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 829                           cend = *p++;
 830                         if (cend == L('\0'))
 831                           return FNM_NOMATCH;
 832
 833                         /* It is a range.  */
 834                         if (cold <= fn && fn <= cend)
 835                           goto matched;
 836 #endif
 837
 838                         c = *p++;
 839                       }
 840                   }
 841
 842                 if (c == L(']'))
 843                   break;
 844               }
 845
 846             if (!not)
 847               return FNM_NOMATCH;
 848             break;
 849
 850           matched:
 851             /* Skip the rest of the [...] that already matched.  */
 852             do
 853               {
 854               ignore_next:
 855                 c = *p++;
 856
 857                 if (c == L('\0'))
 858                   /* [... (unterminated) loses.  */
 859                   return FNM_NOMATCH;
 860
 861                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 862                   {
 863                     if (*p == L('\0'))
 864                       return FNM_NOMATCH;
 865                     /* XXX 1003.2d11 is unclear if this is right.  */
 866                     ++p;
 867                   }
 868                 else if (c == L('[') && *p == L(':'))
 869                   {
 870                     int c1 = 0;
 871                     const CHAR *startp = p;
 872
 873                     while (1)
 874                       {
 875                         c = *++p;
 876                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 877                           return FNM_NOMATCH;
 878
 879                         if (*p == L(':') && p[1] == L(']'))
 880                           break;
 881
 882                         if (c < L('a') || c >= L('z'))
 883                           {
 884                             p = startp;
 885                             goto ignore_next;
 886                           }
 887                       }
 888                     p += 2;
 889                     c = *p++;
 890                   }
 891                 else if (c == L('[') && *p == L('='))
 892                   {
 893                     c = *++p;
 894                     if (c == L('\0'))
 895                       return FNM_NOMATCH;
 896                     c = *++p;
 897                     if (c != L('=') || p[1] != L(']'))
 898                       return FNM_NOMATCH;
 899                     p += 2;
 900                     c = *p++;
 901                   }
 902                 else if (c == L('[') && *p == L('.'))
 903                   {
 904                     ++p;
 905                     while (1)
 906                       {
 907                         c = *++p;
 908                         if (c == '\0')
 909                           return FNM_NOMATCH;
 910
 911                         if (*p == L('.') && p[1] == L(']'))
 912                           break;
 913                       }
 914                     p += 2;
 915                     c = *p++;
 916                   }
 917               }
 918             while (c != L(']'));
 919             if (not)
 920               return FNM_NOMATCH;
 921           }
 922           break;
 923
 924         case L('+'):
 925         case L('@'):
 926         case L('!'):
 927           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 928             {
 929               int res;
 930
 931               res = EXT (c, p, n, string_end, no_leading_period, flags);
 932               if (res != -1)
 933                 return res;
 934             }
 935           goto normal_match;
 936
 937         case L('/'):
 938           if (NO_LEADING_PERIOD (flags))
 939             {
 940               if (n == string_end || c != (UCHAR) *n)
 941                 return FNM_NOMATCH;
 942
 943               new_no_leading_period = 1;
 944               break;
 945             }
 946           /* FALLTHROUGH */
 947         default:
 948         normal_match:
 949           if (n == string_end || c != FOLD ((UCHAR) *n))
 950             return FNM_NOMATCH;
 951         }
 952
 953       no_leading_period = new_no_leading_period;
 954       ++n;
 955     }
 956
 957   if (n == string_end)
 958     return 0;
 959
 960   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 961     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 962     return 0;
 963
 964   return FNM_NOMATCH;
 965 }
 966
 967
 968 static const CHAR *
 969 internal_function
 970 END (const CHAR *pattern)
 971 {
 972   const CHAR *p = pattern;
 973
 974   while (1)
 975     if (*++p == L('\0'))
 976       /* This is an invalid pattern.  */
 977       return pattern;
 978     else if (*p == L('['))
 979       {
 980         /* Handle brackets special.  */
 981         if (posixly_correct == 0)
 982           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 983
 984         /* Skip the not sign.  We have to recognize it because of a possibly
 985            following ']'.  */
 986         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
 987           ++p;
 988         /* A leading ']' is recognized as such.  */
 989         if (*p == L(']'))
 990           ++p;
 991         /* Skip over all characters of the list.  */
 992         while (*p != L(']'))
 993           if (*p++ == L('\0'))
 994             /* This is no valid pattern.  */
 995             return pattern;
 996       }
 997     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
 998               || *p == L('!')) && p[1] == L('('))
 999       p = END (p + 1);
1000     else if (*p == L(')'))
1001       break;
1002
1003   return p + 1;
1004 }
1005
1006
1007 static int
1008 internal_function
1009 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1010      int no_leading_period, int flags)
1011 {
1012   const CHAR *startp;
1013   int level;
1014   struct patternlist
1015   {
1016     struct patternlist *next;
1017     CHAR str[0];
1018   } *list = NULL;
1019   struct patternlist **lastp = &list;
1020   size_t pattern_len = STRLEN (pattern);
1021   const CHAR *p;
1022   const CHAR *rs;
1023
1024   /* Parse the pattern.  Store the individual parts in the list.  */
1025   level = 0;
1026   for (startp = p = pattern + 1; level >= 0; ++p)
1027     if (*p == L('\0'))
1028       /* This is an invalid pattern.  */
1029       return -1;
1030     else if (*p == L('['))
1031       {
1032         /* Handle brackets special.  */
1033         if (posixly_correct == 0)
1034           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1035
1036         /* Skip the not sign.  We have to recognize it because of a possibly
1037            following ']'.  */
1038         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1039           ++p;
1040         /* A leading ']' is recognized as such.  */
1041         if (*p == L(']'))
1042           ++p;
1043         /* Skip over all characters of the list.  */
1044         while (*p != L(']'))
1045           if (*p++ == L('\0'))
1046             /* This is no valid pattern.  */
1047             return -1;
1048       }
1049     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1050               || *p == L('!')) && p[1] == L('('))
1051       /* Remember the nesting level.  */
1052       ++level;
1053     else if (*p == L(')'))
1054       {
1055         if (level-- == 0)
1056           {
1057             /* This means we found the end of the pattern.  */
1058 #define NEW_PATTERN \
1059             struct patternlist *newp;                                         \
1060                                                                               \
1061             if (opt == L('?') || opt == L('@'))                               \
1062               newp = alloca (sizeof (struct patternlist)                      \
1063                              + (pattern_len * sizeof (CHAR)));                \
1064             else                                                              \
1065               newp = alloca (sizeof (struct patternlist)                      \
1066                              + ((p - startp + 1) * sizeof (CHAR)));           \
1067             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1068             newp->next = NULL;                                                \
1069             *lastp = newp;                                                    \
1070             lastp = &newp->next
1071             NEW_PATTERN;
1072           }
1073       }
1074     else if (*p == L('|'))
1075       {
1076         if (level == 0)
1077           {
1078             NEW_PATTERN;
1079             startp = p + 1;
1080           }
1081       }
1082   assert (list != NULL);
1083   assert (p[-1] == L(')'));
1084 #undef NEW_PATTERN
1085
1086   switch (opt)
1087     {
1088     case L('*'):
1089       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1090         return 0;
1091       /* FALLTHROUGH */
1092
1093     case L('+'):
1094       do
1095         {
1096           for (rs = string; rs <= string_end; ++rs)
1097             /* First match the prefix with the current pattern with the
1098                current pattern.  */
1099             if (FCT (list->str, string, rs, no_leading_period,
1100                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1101                 /* This was successful.  Now match the rest with the rest
1102                    of the pattern.  */
1103                 && (FCT (p, rs, string_end,
1104                          rs == string
1105                          ? no_leading_period
1106                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1107                          flags & FNM_FILE_NAME
1108                          ? flags : flags & ~FNM_PERIOD) == 0
1109                     /* This didn't work.  Try the whole pattern.  */
1110                     || (rs != string
1111                         && FCT (pattern - 1, rs, string_end,
1112                                 rs == string
1113                                 ? no_leading_period
1114                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1115                                    ? 1 : 0),
1116                                 flags & FNM_FILE_NAME
1117                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1118               /* It worked.  Signal success.  */
1119               return 0;
1120         }
1121       while ((list = list->next) != NULL);
1122
1123       /* None of the patterns lead to a match.  */
1124       return FNM_NOMATCH;
1125
1126     case L('?'):
1127       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1128         return 0;
1129       /* FALLTHROUGH */
1130
1131     case L('@'):
1132       do
1133         /* I cannot believe it but `strcat' is actually acceptable
1134            here.  Match the entire string with the prefix from the
1135            pattern list and the rest of the pattern following the
1136            pattern list.  */
1137         if (FCT (STRCAT (list->str, p), string, string_end,
1138                  no_leading_period,
1139                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1140           /* It worked.  Signal success.  */
1141           return 0;
1142       while ((list = list->next) != NULL);
1143
1144       /* None of the patterns lead to a match.  */
1145       return FNM_NOMATCH;
1146
1147     case L('!'):
1148       for (rs = string; rs <= string_end; ++rs)
1149         {
1150           struct patternlist *runp;
1151
1152           for (runp = list; runp != NULL; runp = runp->next)
1153             if (FCT (runp->str, string, rs,  no_leading_period,
1154                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1155               break;
1156
1157           /* If none of the patterns matched see whether the rest does.  */
1158           if (runp == NULL
1159               && (FCT (p, rs, string_end,
1160                        rs == string
1161                        ? no_leading_period
1162                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1163                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1164                   == 0))
1165             /* This is successful.  */
1166             return 0;
1167         }
1168
1169       /* None of the patterns together with the rest of the pattern
1170          lead to a match.  */
1171       return FNM_NOMATCH;
1172
1173     default:
1174       assert (! "Invalid extended matching operator");
1175       break;
1176     }
1177
1178   return -1;
1179 }
1180
1181
1182 #undef FOLD
1183 #undef CHAR
1184 #undef UCHAR
1185 #undef INT
1186 #undef FCT
1187 #undef EXT
1188 #undef END
1189 #undef MEMPCPY
1190 #undef MEMCHR
1191 #undef STRCOLL
1192 #undef STRLEN
1193 #undef STRCAT
1194 #undef L
1195 #undef BTOWC