posix/fnmatch_loop.c

   1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005,
   2    2007 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 struct STRUCT
  21 {
  22   const CHAR *pattern;
  23   const CHAR *string;
  24   int no_leading_period;
  25 };
  26
  27 /* Match STRING against the filename pattern PATTERN, returning zero if
  28    it matches, nonzero if not.  */
  29 static int FCT (const CHAR *pattern, const CHAR *string,
  30                 const CHAR *string_end, int no_leading_period, int flags,
  31                 struct STRUCT *ends)
  32      internal_function;
  33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  34                 const CHAR *string_end, int no_leading_period, int flags)
  35      internal_function;
  36 static const CHAR *END (const CHAR *patternp) internal_function;
  37
  38 static int
  39 internal_function
  40 FCT (pattern, string, string_end, no_leading_period, flags, ends)
  41      const CHAR *pattern;
  42      const CHAR *string;
  43      const CHAR *string_end;
  44      int no_leading_period;
  45      int flags;
  46      struct STRUCT *ends;
  47 {
  48   register const CHAR *p = pattern, *n = string;
  49   register UCHAR c;
  50 #ifdef _LIBC
  51 # if WIDE_CHAR_VERSION
  52   const char *collseq = (const char *)
  53     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  54 # else
  55   const UCHAR *collseq = (const UCHAR *)
  56     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  57 # endif
  58 #endif
  59
  60   while ((c = *p++) != L('\0'))
  61     {
  62       int new_no_leading_period = 0;
  63       c = FOLD (c);
  64
  65       switch (c)
  66         {
  67         case L('?'):
  68           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  69             {
  70               int res;
  71
  72               res = EXT (c, p, n, string_end, no_leading_period,
  73                          flags);
  74               if (res != -1)
  75                 return res;
  76             }
  77
  78           if (n == string_end)
  79             return FNM_NOMATCH;
  80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  81             return FNM_NOMATCH;
  82           else if (*n == L('.') && no_leading_period)
  83             return FNM_NOMATCH;
  84           break;
  85
  86         case L('\\'):
  87           if (!(flags & FNM_NOESCAPE))
  88             {
  89               c = *p++;
  90               if (c == L('\0'))
  91                 /* Trailing \ loses.  */
  92                 return FNM_NOMATCH;
  93               c = FOLD (c);
  94             }
  95           if (n == string_end || FOLD ((UCHAR) *n) != c)
  96             return FNM_NOMATCH;
  97           break;
  98
  99         case L('*'):
 100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 101             {
 102               int res;
 103
 104               res = EXT (c, p, n, string_end, no_leading_period,
 105                          flags);
 106               if (res != -1)
 107                 return res;
 108             }
 109           else if (ends != NULL)
 110             {
 111               ends->pattern = p - 1;
 112               ends->string = n;
 113               ends->no_leading_period = no_leading_period;
 114               return 0;
 115             }
 116
 117           if (n != string_end && *n == L('.') && no_leading_period)
 118             return FNM_NOMATCH;
 119
 120           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 121             {
 122               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 123                 {
 124                   const CHAR *endp = END (p);
 125                   if (endp != p)
 126                     {
 127                       /* This is a pattern.  Skip over it.  */
 128                       p = endp;
 129                       continue;
 130                     }
 131                 }
 132
 133               if (c == L('?'))
 134                 {
 135                   /* A ? needs to match one character.  */
 136                   if (n == string_end)
 137                     /* There isn't another character; no match.  */
 138                     return FNM_NOMATCH;
 139                   else if (*n == L('/')
 140                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 141                     /* A slash does not match a wildcard under
 142                        FNM_FILE_NAME.  */
 143                     return FNM_NOMATCH;
 144                   else
 145                     /* One character of the string is consumed in matching
 146                        this ? wildcard, so *??? won't match if there are
 147                        less than three characters.  */
 148                     ++n;
 149                 }
 150             }
 151
 152           if (c == L('\0'))
 153             /* The wildcard(s) is/are the last element of the pattern.
 154                If the name is a file name and contains another slash
 155                this means it cannot match, unless the FNM_LEADING_DIR
 156                flag is set.  */
 157             {
 158               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 159
 160               if (flags & FNM_FILE_NAME)
 161                 {
 162                   if (flags & FNM_LEADING_DIR)
 163                     result = 0;
 164                   else
 165                     {
 166                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 167                         result = 0;
 168                     }
 169                 }
 170
 171               return result;
 172             }
 173           else
 174             {
 175               const CHAR *endp;
 176               struct STRUCT end;
 177
 178               end.pattern = NULL;
 179               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 180                              string_end - n);
 181               if (endp == NULL)
 182                 endp = string_end;
 183
 184               if (c == L('[')
 185                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 186                       && (c == L('@') || c == L('+') || c == L('!'))
 187                       && *p == L('(')))
 188                 {
 189                   int flags2 = ((flags & FNM_FILE_NAME)
 190                                 ? flags : (flags & ~FNM_PERIOD));
 191
 192                   for (--p; n < endp; ++n, no_leading_period = 0)
 193                     if (FCT (p, n, string_end, no_leading_period, flags2,
 194                              &end) == 0)
 195                       goto found;
 196                 }
 197               else if (c == L('/') && (flags & FNM_FILE_NAME))
 198                 {
 199                   while (n < string_end && *n != L('/'))
 200                     ++n;
 201                   if (n < string_end && *n == L('/')
 202                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 203                                NULL) == 0))
 204                     return 0;
 205                 }
 206               else
 207                 {
 208                   int flags2 = ((flags & FNM_FILE_NAME)
 209                                 ? flags : (flags & ~FNM_PERIOD));
 210
 211                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 212                     c = *p;
 213                   c = FOLD (c);
 214                   for (--p; n < endp; ++n, no_leading_period = 0)
 215                     if (FOLD ((UCHAR) *n) == c
 216                         && (FCT (p, n, string_end, no_leading_period, flags2,
 217                                  &end) == 0))
 218                       {
 219                       found:
 220                         if (end.pattern == NULL)
 221                           return 0;
 222                         break;
 223                       }
 224                   if (end.pattern != NULL)
 225                     {
 226                       p = end.pattern;
 227                       n = end.string;
 228                       no_leading_period = end.no_leading_period;
 229                       continue;
 230                     }
 231                 }
 232             }
 233
 234           /* If we come here no match is possible with the wildcard.  */
 235           return FNM_NOMATCH;
 236
 237         case L('['):
 238           {
 239             /* Nonzero if the sense of the character class is inverted.  */
 240             register int not;
 241             CHAR cold;
 242             UCHAR fn;
 243
 244             if (posixly_correct == 0)
 245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 246
 247             if (n == string_end)
 248               return FNM_NOMATCH;
 249
 250             if (*n == L('.') && no_leading_period)
 251               return FNM_NOMATCH;
 252
 253             if (*n == L('/') && (flags & FNM_FILE_NAME))
 254               /* `/' cannot be matched.  */
 255               return FNM_NOMATCH;
 256
 257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 258             if (not)
 259               ++p;
 260
 261             fn = FOLD ((UCHAR) *n);
 262
 263             c = *p++;
 264             for (;;)
 265               {
 266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 267                   {
 268                     if (*p == L('\0'))
 269                       return FNM_NOMATCH;
 270                     c = FOLD ((UCHAR) *p);
 271                     ++p;
 272
 273                     goto normal_bracket;
 274                   }
 275                 else if (c == L('[') && *p == L(':'))
 276                   {
 277                     /* Leave room for the null.  */
 278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 279                     size_t c1 = 0;
 280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 281                     wctype_t wt;
 282 #endif
 283                     const CHAR *startp = p;
 284
 285                     for (;;)
 286                       {
 287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 288                           /* The name is too long and therefore the pattern
 289                              is ill-formed.  */
 290                           return FNM_NOMATCH;
 291
 292                         c = *++p;
 293                         if (c == L(':') && p[1] == L(']'))
 294                           {
 295                             p += 2;
 296                             break;
 297                           }
 298                         if (c < L('a') || c >= L('z'))
 299                           {
 300                             /* This cannot possibly be a character class name.
 301                                Match it as a normal range.  */
 302                             p = startp;
 303                             c = L('[');
 304                             goto normal_bracket;
 305                           }
 306                         str[c1++] = c;
 307                       }
 308                     str[c1] = L('\0');
 309
 310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 311                     wt = IS_CHAR_CLASS (str);
 312                     if (wt == 0)
 313                       /* Invalid character class name.  */
 314                       return FNM_NOMATCH;
 315
 316 # if defined _LIBC && ! WIDE_CHAR_VERSION
 317                     /* The following code is glibc specific but does
 318                        there a good job in speeding up the code since
 319                        we can avoid the btowc() call.  */
 320                     if (_ISCTYPE ((UCHAR) *n, wt))
 321                       goto matched;
 322 # else
 323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 324                       goto matched;
 325 # endif
 326 #else
 327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 339                       goto matched;
 340 #endif
 341                     c = *p++;
 342                   }
 343 #ifdef _LIBC
 344                 else if (c == L('[') && *p == L('='))
 345                   {
 346                     UCHAR str[1];
 347                     uint32_t nrules =
 348                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 349                     const CHAR *startp = p;
 350
 351                     c = *++p;
 352                     if (c == L('\0'))
 353                       {
 354                         p = startp;
 355                         c = L('[');
 356                         goto normal_bracket;
 357                       }
 358                     str[0] = c;
 359
 360                     c = *++p;
 361                     if (c != L('=') || p[1] != L(']'))
 362                       {
 363                         p = startp;
 364                         c = L('[');
 365                         goto normal_bracket;
 366                       }
 367                     p += 2;
 368
 369                     if (nrules == 0)
 370                       {
 371                         if ((UCHAR) *n == str[0])
 372                           goto matched;
 373                       }
 374                     else
 375                       {
 376                         const int32_t *table;
 377 # if WIDE_CHAR_VERSION
 378                         const int32_t *weights;
 379                         const int32_t *extra;
 380 # else
 381                         const unsigned char *weights;
 382                         const unsigned char *extra;
 383 # endif
 384                         const int32_t *indirect;
 385                         int32_t idx;
 386                         const UCHAR *cp = (const UCHAR *) str;
 387
 388                         /* This #include defines a local function!  */
 389 # if WIDE_CHAR_VERSION
 390 #  include <locale/weightwc.h>
 391 # else
 392 #  include <locale/weight.h>
 393 # endif
 394
 395 # if WIDE_CHAR_VERSION
 396                         table = (const int32_t *)
 397                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 398                         weights = (const int32_t *)
 399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 400                         extra = (const int32_t *)
 401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 402                         indirect = (const int32_t *)
 403                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 404 # else
 405                         table = (const int32_t *)
 406                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 407                         weights = (const unsigned char *)
 408                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 409                         extra = (const unsigned char *)
 410                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 411                         indirect = (const int32_t *)
 412                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 413 # endif
 414
 415                         idx = findidx (&cp);
 416                         if (idx != 0)
 417                           {
 418                             /* We found a table entry.  Now see whether the
 419                                character we are currently at has the same
 420                                equivalance class value.  */
 421                             int len = weights[idx];
 422                             int32_t idx2;
 423                             const UCHAR *np = (const UCHAR *) n;
 424
 425                             idx2 = findidx (&np);
 426                             if (idx2 != 0 && len == weights[idx2])
 427                               {
 428                                 int cnt = 0;
 429
 430                                 while (cnt < len
 431                                        && (weights[idx + 1 + cnt]
 432                                            == weights[idx2 + 1 + cnt]))
 433                                   ++cnt;
 434
 435                                 if (cnt == len)
 436                                   goto matched;
 437                               }
 438                           }
 439                       }
 440
 441                     c = *p++;
 442                   }
 443 #endif
 444                 else if (c == L('\0'))
 445                   /* [ (unterminated) loses.  */
 446                   return FNM_NOMATCH;
 447                 else
 448                   {
 449                     int is_range = 0;
 450
 451 #ifdef _LIBC
 452                     int is_seqval = 0;
 453
 454                     if (c == L('[') && *p == L('.'))
 455                       {
 456                         uint32_t nrules =
 457                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 458                         const CHAR *startp = p;
 459                         size_t c1 = 0;
 460
 461                         while (1)
 462                           {
 463                             c = *++p;
 464                             if (c == L('.') && p[1] == L(']'))
 465                               {
 466                                 p += 2;
 467                                 break;
 468                               }
 469                             if (c == '\0')
 470                               return FNM_NOMATCH;
 471                             ++c1;
 472                           }
 473
 474                         /* We have to handling the symbols differently in
 475                            ranges since then the collation sequence is
 476                            important.  */
 477                         is_range = *p == L('-') && p[1] != L('\0');
 478
 479                         if (nrules == 0)
 480                           {
 481                             /* There are no names defined in the collation
 482                                data.  Therefore we only accept the trivial
 483                                names consisting of the character itself.  */
 484                             if (c1 != 1)
 485                               return FNM_NOMATCH;
 486
 487                             if (!is_range && *n == startp[1])
 488                               goto matched;
 489
 490                             cold = startp[1];
 491                             c = *p++;
 492                           }
 493                         else
 494                           {
 495                             int32_t table_size;
 496                             const int32_t *symb_table;
 497 # ifdef WIDE_CHAR_VERSION
 498                             char str[c1];
 499                             unsigned int strcnt;
 500 # else
 501 #  define str (startp + 1)
 502 # endif
 503                             const unsigned char *extra;
 504                             int32_t idx;
 505                             int32_t elem;
 506                             int32_t second;
 507                             int32_t hash;
 508
 509 # ifdef WIDE_CHAR_VERSION
 510                             /* We have to convert the name to a single-byte
 511                                string.  This is possible since the names
 512                                consist of ASCII characters and the internal
 513                                representation is UCS4.  */
 514                             for (strcnt = 0; strcnt < c1; ++strcnt)
 515                               str[strcnt] = startp[1 + strcnt];
 516 #endif
 517
 518                             table_size =
 519                               _NL_CURRENT_WORD (LC_COLLATE,
 520                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 521                             symb_table = (const int32_t *)
 522                               _NL_CURRENT (LC_COLLATE,
 523                                            _NL_COLLATE_SYMB_TABLEMB);
 524                             extra = (const unsigned char *)
 525                               _NL_CURRENT (LC_COLLATE,
 526                                            _NL_COLLATE_SYMB_EXTRAMB);
 527
 528                             /* Locate the character in the hashing table.  */
 529                             hash = elem_hash (str, c1);
 530
 531                             idx = 0;
 532                             elem = hash % table_size;
 533                             if (symb_table[2 * elem] != 0)
 534                               {
 535                                 second = hash % (table_size - 2) + 1;
 536
 537                                 do
 538                                   {
 539                                     /* First compare the hashing value.  */
 540                                     if (symb_table[2 * elem] == hash
 541                                         && (c1
 542                                             == extra[symb_table[2 * elem + 1]])
 543                                         && memcmp (str,
 544                                                    &extra[symb_table[2 * elem
 545                                                                      + 1]
 546                                                           + 1], c1) == 0)
 547                                       {
 548                                         /* Yep, this is the entry.  */
 549                                         idx = symb_table[2 * elem + 1];
 550                                         idx += 1 + extra[idx];
 551                                         break;
 552                                       }
 553
 554                                     /* Next entry.  */
 555                                     elem += second;
 556                                   }
 557                                 while (symb_table[2 * elem] != 0);
 558                               }
 559
 560                             if (symb_table[2 * elem] != 0)
 561                               {
 562                                 /* Compare the byte sequence but only if
 563                                    this is not part of a range.  */
 564 # ifdef WIDE_CHAR_VERSION
 565                                 int32_t *wextra;
 566
 567                                 idx += 1 + extra[idx];
 568                                 /* Adjust for the alignment.  */
 569                                 idx = (idx + 3) & ~3;
 570
 571                                 wextra = (int32_t *) &extra[idx + 4];
 572 # endif
 573
 574                                 if (! is_range)
 575                                   {
 576 # ifdef WIDE_CHAR_VERSION
 577                                     for (c1 = 0;
 578                                          (int32_t) c1 < wextra[idx];
 579                                          ++c1)
 580                                       if (n[c1] != wextra[1 + c1])
 581                                         break;
 582
 583                                     if ((int32_t) c1 == wextra[idx])
 584                                       goto matched;
 585 # else
 586                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 587                                       if (n[c1] != extra[1 + c1])
 588                                         break;
 589
 590                                     if (c1 == extra[idx])
 591                                       goto matched;
 592 # endif
 593                                   }
 594
 595                                 /* Get the collation sequence value.  */
 596                                 is_seqval = 1;
 597 # ifdef WIDE_CHAR_VERSION
 598                                 cold = wextra[1 + wextra[idx]];
 599 # else
 600                                 /* Adjust for the alignment.  */
 601                                 idx += 1 + extra[idx];
 602                                 idx = (idx + 3) & ~4;
 603                                 cold = *((int32_t *) &extra[idx]);
 604 # endif
 605
 606                                 c = *p++;
 607                               }
 608                             else if (c1 == 1)
 609                               {
 610                                 /* No valid character.  Match it as a
 611                                    single byte.  */
 612                                 if (!is_range && *n == str[0])
 613                                   goto matched;
 614
 615                                 cold = str[0];
 616                                 c = *p++;
 617                               }
 618                             else
 619                               return FNM_NOMATCH;
 620                           }
 621                       }
 622                     else
 623 # undef str
 624 #endif
 625                       {
 626                         c = FOLD (c);
 627                       normal_bracket:
 628
 629                         /* We have to handling the symbols differently in
 630                            ranges since then the collation sequence is
 631                            important.  */
 632                         is_range = (*p == L('-') && p[1] != L('\0')
 633                                     && p[1] != L(']'));
 634
 635                         if (!is_range && c == fn)
 636                           goto matched;
 637
 638                         /* This is needed if we goto normal_bracket; from
 639                            outside of is_seqval's scope.  */
 640                         is_seqval = 0;
 641                         cold = c;
 642                         c = *p++;
 643                       }
 644
 645                     if (c == L('-') && *p != L(']'))
 646                       {
 647 #if _LIBC
 648                         /* We have to find the collation sequence
 649                            value for C.  Collation sequence is nothing
 650                            we can regularly access.  The sequence
 651                            value is defined by the order in which the
 652                            definitions of the collation values for the
 653                            various characters appear in the source
 654                            file.  A strange concept, nowhere
 655                            documented.  */
 656                         uint32_t fcollseq;
 657                         uint32_t lcollseq;
 658                         UCHAR cend = *p++;
 659
 660 # ifdef WIDE_CHAR_VERSION
 661                         /* Search in the `names' array for the characters.  */
 662                         fcollseq = __collseq_table_lookup (collseq, fn);
 663                         if (fcollseq == ~((uint32_t) 0))
 664                           /* XXX We don't know anything about the character
 665                              we are supposed to match.  This means we are
 666                              failing.  */
 667                           goto range_not_matched;
 668
 669                         if (is_seqval)
 670                           lcollseq = cold;
 671                         else
 672                           lcollseq = __collseq_table_lookup (collseq, cold);
 673 # else
 674                         fcollseq = collseq[fn];
 675                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 676 # endif
 677
 678                         is_seqval = 0;
 679                         if (cend == L('[') && *p == L('.'))
 680                           {
 681                             uint32_t nrules =
 682                               _NL_CURRENT_WORD (LC_COLLATE,
 683                                                 _NL_COLLATE_NRULES);
 684                             const CHAR *startp = p;
 685                             size_t c1 = 0;
 686
 687                             while (1)
 688                               {
 689                                 c = *++p;
 690                                 if (c == L('.') && p[1] == L(']'))
 691                                   {
 692                                     p += 2;
 693                                     break;
 694                                   }
 695                                 if (c == '\0')
 696                                   return FNM_NOMATCH;
 697                                 ++c1;
 698                               }
 699
 700                             if (nrules == 0)
 701                               {
 702                                 /* There are no names defined in the
 703                                    collation data.  Therefore we only
 704                                    accept the trivial names consisting
 705                                    of the character itself.  */
 706                                 if (c1 != 1)
 707                                   return FNM_NOMATCH;
 708
 709                                 cend = startp[1];
 710                               }
 711                             else
 712                               {
 713                                 int32_t table_size;
 714                                 const int32_t *symb_table;
 715 # ifdef WIDE_CHAR_VERSION
 716                                 char str[c1];
 717                                 unsigned int strcnt;
 718 # else
 719 #  define str (startp + 1)
 720 # endif
 721                                 const unsigned char *extra;
 722                                 int32_t idx;
 723                                 int32_t elem;
 724                                 int32_t second;
 725                                 int32_t hash;
 726
 727 # ifdef WIDE_CHAR_VERSION
 728                                 /* We have to convert the name to a single-byte
 729                                    string.  This is possible since the names
 730                                    consist of ASCII characters and the internal
 731                                    representation is UCS4.  */
 732                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 733                                   str[strcnt] = startp[1 + strcnt];
 734 # endif
 735
 736                                 table_size =
 737                                   _NL_CURRENT_WORD (LC_COLLATE,
 738                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 739                                 symb_table = (const int32_t *)
 740                                   _NL_CURRENT (LC_COLLATE,
 741                                                _NL_COLLATE_SYMB_TABLEMB);
 742                                 extra = (const unsigned char *)
 743                                   _NL_CURRENT (LC_COLLATE,
 744                                                _NL_COLLATE_SYMB_EXTRAMB);
 745
 746                                 /* Locate the character in the hashing
 747                                    table.  */
 748                                 hash = elem_hash (str, c1);
 749
 750                                 idx = 0;
 751                                 elem = hash % table_size;
 752                                 if (symb_table[2 * elem] != 0)
 753                                   {
 754                                     second = hash % (table_size - 2) + 1;
 755
 756                                     do
 757                                       {
 758                                         /* First compare the hashing value.  */
 759                                         if (symb_table[2 * elem] == hash
 760                                             && (c1
 761                                                 == extra[symb_table[2 * elem + 1]])
 762                                             && memcmp (str,
 763                                                        &extra[symb_table[2 * elem + 1]
 764                                                               + 1], c1) == 0)
 765                                           {
 766                                             /* Yep, this is the entry.  */
 767                                             idx = symb_table[2 * elem + 1];
 768                                             idx += 1 + extra[idx];
 769                                             break;
 770                                           }
 771
 772                                         /* Next entry.  */
 773                                         elem += second;
 774                                       }
 775                                     while (symb_table[2 * elem] != 0);
 776                                   }
 777
 778                                 if (symb_table[2 * elem] != 0)
 779                                   {
 780                                     /* Compare the byte sequence but only if
 781                                        this is not part of a range.  */
 782 # ifdef WIDE_CHAR_VERSION
 783                                     int32_t *wextra;
 784
 785                                     idx += 1 + extra[idx];
 786                                     /* Adjust for the alignment.  */
 787                                     idx = (idx + 3) & ~4;
 788
 789                                     wextra = (int32_t *) &extra[idx + 4];
 790 # endif
 791                                     /* Get the collation sequence value.  */
 792                                     is_seqval = 1;
 793 # ifdef WIDE_CHAR_VERSION
 794                                     cend = wextra[1 + wextra[idx]];
 795 # else
 796                                     /* Adjust for the alignment.  */
 797                                     idx += 1 + extra[idx];
 798                                     idx = (idx + 3) & ~4;
 799                                     cend = *((int32_t *) &extra[idx]);
 800 # endif
 801                                   }
 802                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 803                                   {
 804                                     cend = str[0];
 805                                     c = *p++;
 806                                   }
 807                                 else
 808                                   return FNM_NOMATCH;
 809                               }
 810 # undef str
 811                           }
 812                         else
 813                           {
 814                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 815                               cend = *p++;
 816                             if (cend == L('\0'))
 817                               return FNM_NOMATCH;
 818                             cend = FOLD (cend);
 819                           }
 820
 821                         /* XXX It is not entirely clear to me how to handle
 822                            characters which are not mentioned in the
 823                            collation specification.  */
 824                         if (
 825 # ifdef WIDE_CHAR_VERSION
 826                             lcollseq == 0xffffffff ||
 827 # endif
 828                             lcollseq <= fcollseq)
 829                           {
 830                             /* We have to look at the upper bound.  */
 831                             uint32_t hcollseq;
 832
 833                             if (is_seqval)
 834                               hcollseq = cend;
 835                             else
 836                               {
 837 # ifdef WIDE_CHAR_VERSION
 838                                 hcollseq =
 839                                   __collseq_table_lookup (collseq, cend);
 840                                 if (hcollseq == ~((uint32_t) 0))
 841                                   {
 842                                     /* Hum, no information about the upper
 843                                        bound.  The matching succeeds if the
 844                                        lower bound is matched exactly.  */
 845                                     if (lcollseq != fcollseq)
 846                                       goto range_not_matched;
 847
 848                                     goto matched;
 849                                   }
 850 # else
 851                                 hcollseq = collseq[cend];
 852 # endif
 853                               }
 854
 855                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 856                               goto matched;
 857                           }
 858 # ifdef WIDE_CHAR_VERSION
 859                       range_not_matched:
 860 # endif
 861 #else
 862                         /* We use a boring value comparison of the character
 863                            values.  This is better than comparing using
 864                            `strcoll' since the latter would have surprising
 865                            and sometimes fatal consequences.  */
 866                         UCHAR cend = *p++;
 867
 868                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 869                           cend = *p++;
 870                         if (cend == L('\0'))
 871                           return FNM_NOMATCH;
 872
 873                         /* It is a range.  */
 874                         if (cold <= fn && fn <= cend)
 875                           goto matched;
 876 #endif
 877
 878                         c = *p++;
 879                       }
 880                   }
 881
 882                 if (c == L(']'))
 883                   break;
 884               }
 885
 886             if (!not)
 887               return FNM_NOMATCH;
 888             break;
 889
 890           matched:
 891             /* Skip the rest of the [...] that already matched.  */
 892             do
 893               {
 894               ignore_next:
 895                 c = *p++;
 896
 897                 if (c == L('\0'))
 898                   /* [... (unterminated) loses.  */
 899                   return FNM_NOMATCH;
 900
 901                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 902                   {
 903                     if (*p == L('\0'))
 904                       return FNM_NOMATCH;
 905                     /* XXX 1003.2d11 is unclear if this is right.  */
 906                     ++p;
 907                   }
 908                 else if (c == L('[') && *p == L(':'))
 909                   {
 910                     int c1 = 0;
 911                     const CHAR *startp = p;
 912
 913                     while (1)
 914                       {
 915                         c = *++p;
 916                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 917                           return FNM_NOMATCH;
 918
 919                         if (*p == L(':') && p[1] == L(']'))
 920                           break;
 921
 922                         if (c < L('a') || c >= L('z'))
 923                           {
 924                             p = startp;
 925                             goto ignore_next;
 926                           }
 927                       }
 928                     p += 2;
 929                     c = *p++;
 930                   }
 931                 else if (c == L('[') && *p == L('='))
 932                   {
 933                     c = *++p;
 934                     if (c == L('\0'))
 935                       return FNM_NOMATCH;
 936                     c = *++p;
 937                     if (c != L('=') || p[1] != L(']'))
 938                       return FNM_NOMATCH;
 939                     p += 2;
 940                     c = *p++;
 941                   }
 942                 else if (c == L('[') && *p == L('.'))
 943                   {
 944                     ++p;
 945                     while (1)
 946                       {
 947                         c = *++p;
 948                         if (c == '\0')
 949                           return FNM_NOMATCH;
 950
 951                         if (*p == L('.') && p[1] == L(']'))
 952                           break;
 953                       }
 954                     p += 2;
 955                     c = *p++;
 956                   }
 957               }
 958             while (c != L(']'));
 959             if (not)
 960               return FNM_NOMATCH;
 961           }
 962           break;
 963
 964         case L('+'):
 965         case L('@'):
 966         case L('!'):
 967           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 968             {
 969               int res;
 970
 971               res = EXT (c, p, n, string_end, no_leading_period, flags);
 972               if (res != -1)
 973                 return res;
 974             }
 975           goto normal_match;
 976
 977         case L('/'):
 978           if (NO_LEADING_PERIOD (flags))
 979             {
 980               if (n == string_end || c != (UCHAR) *n)
 981                 return FNM_NOMATCH;
 982
 983               new_no_leading_period = 1;
 984               break;
 985             }
 986           /* FALLTHROUGH */
 987         default:
 988         normal_match:
 989           if (n == string_end || c != FOLD ((UCHAR) *n))
 990             return FNM_NOMATCH;
 991         }
 992
 993       no_leading_period = new_no_leading_period;
 994       ++n;
 995     }
 996
 997   if (n == string_end)
 998     return 0;
 999
1000   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1001     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1002     return 0;
1003
1004   return FNM_NOMATCH;
1005 }
1006
1007
1008 static const CHAR *
1009 internal_function
1010 END (const CHAR *pattern)
1011 {
1012   const CHAR *p = pattern;
1013
1014   while (1)
1015     if (*++p == L('\0'))
1016       /* This is an invalid pattern.  */
1017       return pattern;
1018     else if (*p == L('['))
1019       {
1020         /* Handle brackets special.  */
1021         if (posixly_correct == 0)
1022           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1023
1024         /* Skip the not sign.  We have to recognize it because of a possibly
1025            following ']'.  */
1026         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1027           ++p;
1028         /* A leading ']' is recognized as such.  */
1029         if (*p == L(']'))
1030           ++p;
1031         /* Skip over all characters of the list.  */
1032         while (*p != L(']'))
1033           if (*p++ == L('\0'))
1034             /* This is no valid pattern.  */
1035             return pattern;
1036       }
1037     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1038               || *p == L('!')) && p[1] == L('('))
1039       p = END (p + 1);
1040     else if (*p == L(')'))
1041       break;
1042
1043   return p + 1;
1044 }
1045
1046
1047 static int
1048 internal_function
1049 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1050      int no_leading_period, int flags)
1051 {
1052   const CHAR *startp;
1053   int level;
1054   struct patternlist
1055   {
1056     struct patternlist *next;
1057     CHAR str[0];
1058   } *list = NULL;
1059   struct patternlist **lastp = &list;
1060   size_t pattern_len = STRLEN (pattern);
1061   const CHAR *p;
1062   const CHAR *rs;
1063
1064   /* Parse the pattern.  Store the individual parts in the list.  */
1065   level = 0;
1066   for (startp = p = pattern + 1; level >= 0; ++p)
1067     if (*p == L('\0'))
1068       /* This is an invalid pattern.  */
1069       return -1;
1070     else if (*p == L('['))
1071       {
1072         /* Handle brackets special.  */
1073         if (posixly_correct == 0)
1074           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1075
1076         /* Skip the not sign.  We have to recognize it because of a possibly
1077            following ']'.  */
1078         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1079           ++p;
1080         /* A leading ']' is recognized as such.  */
1081         if (*p == L(']'))
1082           ++p;
1083         /* Skip over all characters of the list.  */
1084         while (*p != L(']'))
1085           if (*p++ == L('\0'))
1086             /* This is no valid pattern.  */
1087             return -1;
1088       }
1089     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1090               || *p == L('!')) && p[1] == L('('))
1091       /* Remember the nesting level.  */
1092       ++level;
1093     else if (*p == L(')'))
1094       {
1095         if (level-- == 0)
1096           {
1097             /* This means we found the end of the pattern.  */
1098 #define NEW_PATTERN \
1099             struct patternlist *newp;                                         \
1100                                                                               \
1101             if (opt == L('?') || opt == L('@'))                               \
1102               newp = alloca (sizeof (struct patternlist)                      \
1103                              + (pattern_len * sizeof (CHAR)));                \
1104             else                                                              \
1105               newp = alloca (sizeof (struct patternlist)                      \
1106                              + ((p - startp + 1) * sizeof (CHAR)));           \
1107             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1108             newp->next = NULL;                                                \
1109             *lastp = newp;                                                    \
1110             lastp = &newp->next
1111             NEW_PATTERN;
1112           }
1113       }
1114     else if (*p == L('|'))
1115       {
1116         if (level == 0)
1117           {
1118             NEW_PATTERN;
1119             startp = p + 1;
1120           }
1121       }
1122   assert (list != NULL);
1123   assert (p[-1] == L(')'));
1124 #undef NEW_PATTERN
1125
1126   switch (opt)
1127     {
1128     case L('*'):
1129       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1130         return 0;
1131       /* FALLTHROUGH */
1132
1133     case L('+'):
1134       do
1135         {
1136           for (rs = string; rs <= string_end; ++rs)
1137             /* First match the prefix with the current pattern with the
1138                current pattern.  */
1139             if (FCT (list->str, string, rs, no_leading_period,
1140                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1141                      NULL) == 0
1142                 /* This was successful.  Now match the rest with the rest
1143                    of the pattern.  */
1144                 && (FCT (p, rs, string_end,
1145                          rs == string
1146                          ? no_leading_period
1147                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1148                          flags & FNM_FILE_NAME
1149                          ? flags : flags & ~FNM_PERIOD, NULL) == 0
1150                     /* This didn't work.  Try the whole pattern.  */
1151                     || (rs != string
1152                         && FCT (pattern - 1, rs, string_end,
1153                                 rs == string
1154                                 ? no_leading_period
1155                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1156                                    ? 1 : 0),
1157                                 flags & FNM_FILE_NAME
1158                                 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1159               /* It worked.  Signal success.  */
1160               return 0;
1161         }
1162       while ((list = list->next) != NULL);
1163
1164       /* None of the patterns lead to a match.  */
1165       return FNM_NOMATCH;
1166
1167     case L('?'):
1168       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1169         return 0;
1170       /* FALLTHROUGH */
1171
1172     case L('@'):
1173       do
1174         /* I cannot believe it but `strcat' is actually acceptable
1175            here.  Match the entire string with the prefix from the
1176            pattern list and the rest of the pattern following the
1177            pattern list.  */
1178         if (FCT (STRCAT (list->str, p), string, string_end,
1179                  no_leading_period,
1180                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1181                  NULL) == 0)
1182           /* It worked.  Signal success.  */
1183           return 0;
1184       while ((list = list->next) != NULL);
1185
1186       /* None of the patterns lead to a match.  */
1187       return FNM_NOMATCH;
1188
1189     case L('!'):
1190       for (rs = string; rs <= string_end; ++rs)
1191         {
1192           struct patternlist *runp;
1193
1194           for (runp = list; runp != NULL; runp = runp->next)
1195             if (FCT (runp->str, string, rs,  no_leading_period,
1196                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1197                      NULL) == 0)
1198               break;
1199
1200           /* If none of the patterns matched see whether the rest does.  */
1201           if (runp == NULL
1202               && (FCT (p, rs, string_end,
1203                        rs == string
1204                        ? no_leading_period
1205                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1206                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1207                        NULL) == 0))
1208             /* This is successful.  */
1209             return 0;
1210         }
1211
1212       /* None of the patterns together with the rest of the pattern
1213          lead to a match.  */
1214       return FNM_NOMATCH;
1215
1216     default:
1217       assert (! "Invalid extended matching operator");
1218       break;
1219     }
1220
1221   return -1;
1222 }
1223
1224
1225 #undef FOLD
1226 #undef CHAR
1227 #undef UCHAR
1228 #undef INT
1229 #undef FCT
1230 #undef EXT
1231 #undef END
1232 #undef STRUCT
1233 #undef MEMPCPY
1234 #undef MEMCHR
1235 #undef STRCOLL
1236 #undef STRLEN
1237 #undef STRCAT
1238 #undef L
1239 #undef BTOWC