posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-2014 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <http://www.gnu.org/licenses/>.  */
  17
  18 #include <stdint.h>
  19
  20 struct STRUCT
  21 {
  22   const CHAR *pattern;
  23   const CHAR *string;
  24   int no_leading_period;
  25 };
  26
  27 /* Match STRING against the filename pattern PATTERN, returning zero if
  28    it matches, nonzero if not.  */
  29 static int FCT (const CHAR *pattern, const CHAR *string,
  30                 const CHAR *string_end, int no_leading_period, int flags,
  31                 struct STRUCT *ends, size_t alloca_used)
  32      internal_function;
  33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  34                 const CHAR *string_end, int no_leading_period, int flags,
  35                 size_t alloca_used)
  36      internal_function;
  37 static const CHAR *END (const CHAR *patternp) internal_function;
  38
  39 static int
  40 internal_function
  41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
  42      const CHAR *pattern;
  43      const CHAR *string;
  44      const CHAR *string_end;
  45      int no_leading_period;
  46      int flags;
  47      struct STRUCT *ends;
  48      size_t alloca_used;
  49 {
  50   const CHAR *p = pattern, *n = string;
  51   UCHAR c;
  52 #ifdef _LIBC
  53 # if WIDE_CHAR_VERSION
  54   const char *collseq = (const char *)
  55     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  56 # else
  57   const UCHAR *collseq = (const UCHAR *)
  58     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  59 # endif
  60 #endif
  61
  62   while ((c = *p++) != L('\0'))
  63     {
  64       int new_no_leading_period = 0;
  65       c = FOLD (c);
  66
  67       switch (c)
  68         {
  69         case L('?'):
  70           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  71             {
  72               int res = EXT (c, p, n, string_end, no_leading_period,
  73                              flags, alloca_used);
  74               if (res != -1)
  75                 return res;
  76             }
  77
  78           if (n == string_end)
  79             return FNM_NOMATCH;
  80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  81             return FNM_NOMATCH;
  82           else if (*n == L('.') && no_leading_period)
  83             return FNM_NOMATCH;
  84           break;
  85
  86         case L('\\'):
  87           if (!(flags & FNM_NOESCAPE))
  88             {
  89               c = *p++;
  90               if (c == L('\0'))
  91                 /* Trailing \ loses.  */
  92                 return FNM_NOMATCH;
  93               c = FOLD (c);
  94             }
  95           if (n == string_end || FOLD ((UCHAR) *n) != c)
  96             return FNM_NOMATCH;
  97           break;
  98
  99         case L('*'):
 100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 101             {
 102               int res = EXT (c, p, n, string_end, no_leading_period,
 103                              flags, alloca_used);
 104               if (res != -1)
 105                 return res;
 106             }
 107           else if (ends != NULL)
 108             {
 109               ends->pattern = p - 1;
 110               ends->string = n;
 111               ends->no_leading_period = no_leading_period;
 112               return 0;
 113             }
 114
 115           if (n != string_end && *n == L('.') && no_leading_period)
 116             return FNM_NOMATCH;
 117
 118           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 119             {
 120               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 121                 {
 122                   const CHAR *endp = END (p);
 123                   if (endp != p)
 124                     {
 125                       /* This is a pattern.  Skip over it.  */
 126                       p = endp;
 127                       continue;
 128                     }
 129                 }
 130
 131               if (c == L('?'))
 132                 {
 133                   /* A ? needs to match one character.  */
 134                   if (n == string_end)
 135                     /* There isn't another character; no match.  */
 136                     return FNM_NOMATCH;
 137                   else if (*n == L('/')
 138                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 139                     /* A slash does not match a wildcard under
 140                        FNM_FILE_NAME.  */
 141                     return FNM_NOMATCH;
 142                   else
 143                     /* One character of the string is consumed in matching
 144                        this ? wildcard, so *??? won't match if there are
 145                        less than three characters.  */
 146                     ++n;
 147                 }
 148             }
 149
 150           if (c == L('\0'))
 151             /* The wildcard(s) is/are the last element of the pattern.
 152                If the name is a file name and contains another slash
 153                this means it cannot match, unless the FNM_LEADING_DIR
 154                flag is set.  */
 155             {
 156               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 157
 158               if (flags & FNM_FILE_NAME)
 159                 {
 160                   if (flags & FNM_LEADING_DIR)
 161                     result = 0;
 162                   else
 163                     {
 164                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 165                         result = 0;
 166                     }
 167                 }
 168
 169               return result;
 170             }
 171           else
 172             {
 173               const CHAR *endp;
 174               struct STRUCT end;
 175
 176               end.pattern = NULL;
 177               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 178                              string_end - n);
 179               if (endp == NULL)
 180                 endp = string_end;
 181
 182               if (c == L('[')
 183                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 184                       && (c == L('@') || c == L('+') || c == L('!'))
 185                       && *p == L('(')))
 186                 {
 187                   int flags2 = ((flags & FNM_FILE_NAME)
 188                                 ? flags : (flags & ~FNM_PERIOD));
 189
 190                   for (--p; n < endp; ++n, no_leading_period = 0)
 191                     if (FCT (p, n, string_end, no_leading_period, flags2,
 192                              &end, alloca_used) == 0)
 193                       goto found;
 194                 }
 195               else if (c == L('/') && (flags & FNM_FILE_NAME))
 196                 {
 197                   while (n < string_end && *n != L('/'))
 198                     ++n;
 199                   if (n < string_end && *n == L('/')
 200                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 201                                NULL, alloca_used) == 0))
 202                     return 0;
 203                 }
 204               else
 205                 {
 206                   int flags2 = ((flags & FNM_FILE_NAME)
 207                                 ? flags : (flags & ~FNM_PERIOD));
 208
 209                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 210                     c = *p;
 211                   c = FOLD (c);
 212                   for (--p; n < endp; ++n, no_leading_period = 0)
 213                     if (FOLD ((UCHAR) *n) == c
 214                         && (FCT (p, n, string_end, no_leading_period, flags2,
 215                                  &end, alloca_used) == 0))
 216                       {
 217                       found:
 218                         if (end.pattern == NULL)
 219                           return 0;
 220                         break;
 221                       }
 222                   if (end.pattern != NULL)
 223                     {
 224                       p = end.pattern;
 225                       n = end.string;
 226                       no_leading_period = end.no_leading_period;
 227                       continue;
 228                     }
 229                 }
 230             }
 231
 232           /* If we come here no match is possible with the wildcard.  */
 233           return FNM_NOMATCH;
 234
 235         case L('['):
 236           {
 237             /* Nonzero if the sense of the character class is inverted.  */
 238             const CHAR *p_init = p;
 239             const CHAR *n_init = n;
 240             int not;
 241             CHAR cold;
 242             UCHAR fn;
 243
 244             if (posixly_correct == 0)
 245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 246
 247             if (n == string_end)
 248               return FNM_NOMATCH;
 249
 250             if (*n == L('.') && no_leading_period)
 251               return FNM_NOMATCH;
 252
 253             if (*n == L('/') && (flags & FNM_FILE_NAME))
 254               /* `/' cannot be matched.  */
 255               return FNM_NOMATCH;
 256
 257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 258             if (not)
 259               ++p;
 260
 261             fn = FOLD ((UCHAR) *n);
 262
 263             c = *p++;
 264             for (;;)
 265               {
 266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 267                   {
 268                     if (*p == L('\0'))
 269                       return FNM_NOMATCH;
 270                     c = FOLD ((UCHAR) *p);
 271                     ++p;
 272
 273                     goto normal_bracket;
 274                   }
 275                 else if (c == L('[') && *p == L(':'))
 276                   {
 277                     /* Leave room for the null.  */
 278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 279                     size_t c1 = 0;
 280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 281                     wctype_t wt;
 282 #endif
 283                     const CHAR *startp = p;
 284
 285                     for (;;)
 286                       {
 287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 288                           /* The name is too long and therefore the pattern
 289                              is ill-formed.  */
 290                           return FNM_NOMATCH;
 291
 292                         c = *++p;
 293                         if (c == L(':') && p[1] == L(']'))
 294                           {
 295                             p += 2;
 296                             break;
 297                           }
 298                         if (c < L('a') || c >= L('z'))
 299                           {
 300                             /* This cannot possibly be a character class name.
 301                                Match it as a normal range.  */
 302                             p = startp;
 303                             c = L('[');
 304                             goto normal_bracket;
 305                           }
 306                         str[c1++] = c;
 307                       }
 308                     str[c1] = L('\0');
 309
 310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 311                     wt = IS_CHAR_CLASS (str);
 312                     if (wt == 0)
 313                       /* Invalid character class name.  */
 314                       return FNM_NOMATCH;
 315
 316 # if defined _LIBC && ! WIDE_CHAR_VERSION
 317                     /* The following code is glibc specific but does
 318                        there a good job in speeding up the code since
 319                        we can avoid the btowc() call.  */
 320                     if (_ISCTYPE ((UCHAR) *n, wt))
 321                       goto matched;
 322 # else
 323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 324                       goto matched;
 325 # endif
 326 #else
 327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 339                       goto matched;
 340 #endif
 341                     c = *p++;
 342                   }
 343 #ifdef _LIBC
 344                 else if (c == L('[') && *p == L('='))
 345                   {
 346                     UCHAR str[1];
 347                     uint32_t nrules =
 348                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 349                     const CHAR *startp = p;
 350
 351                     c = *++p;
 352                     if (c == L('\0'))
 353                       {
 354                         p = startp;
 355                         c = L('[');
 356                         goto normal_bracket;
 357                       }
 358                     str[0] = c;
 359
 360                     c = *++p;
 361                     if (c != L('=') || p[1] != L(']'))
 362                       {
 363                         p = startp;
 364                         c = L('[');
 365                         goto normal_bracket;
 366                       }
 367                     p += 2;
 368
 369                     if (nrules == 0)
 370                       {
 371                         if ((UCHAR) *n == str[0])
 372                           goto matched;
 373                       }
 374                     else
 375                       {
 376                         const int32_t *table;
 377 # if WIDE_CHAR_VERSION
 378                         const int32_t *weights;
 379                         const wint_t *extra;
 380 # else
 381                         const unsigned char *weights;
 382                         const unsigned char *extra;
 383 # endif
 384                         const int32_t *indirect;
 385                         int32_t idx;
 386                         const UCHAR *cp = (const UCHAR *) str;
 387
 388 # if WIDE_CHAR_VERSION
 389                         table = (const int32_t *)
 390                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 391                         weights = (const int32_t *)
 392                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 393                         extra = (const wint_t *)
 394                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 395                         indirect = (const int32_t *)
 396                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 397 # else
 398                         table = (const int32_t *)
 399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 400                         weights = (const unsigned char *)
 401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 402                         extra = (const unsigned char *)
 403                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 404                         indirect = (const int32_t *)
 405                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 406 # endif
 407
 408                         idx = FINDIDX (table, indirect, extra, &cp, 1);
 409                         if (idx != 0)
 410                           {
 411                             /* We found a table entry.  Now see whether the
 412                                character we are currently at has the same
 413                                equivalance class value.  */
 414                             int len = weights[idx & 0xffffff];
 415                             int32_t idx2;
 416                             const UCHAR *np = (const UCHAR *) n;
 417
 418                             idx2 = FINDIDX (table, indirect, extra,
 419                                             &np, string_end - n);
 420                             if (idx2 != 0
 421                                 && (idx >> 24) == (idx2 >> 24)
 422                                 && len == weights[idx2 & 0xffffff])
 423                               {
 424                                 int cnt = 0;
 425
 426                                 idx &= 0xffffff;
 427                                 idx2 &= 0xffffff;
 428
 429                                 while (cnt < len
 430                                        && (weights[idx + 1 + cnt]
 431                                            == weights[idx2 + 1 + cnt]))
 432                                   ++cnt;
 433
 434                                 if (cnt == len)
 435                                   goto matched;
 436                               }
 437                           }
 438                       }
 439
 440                     c = *p++;
 441                   }
 442 #endif
 443                 else if (c == L('\0'))
 444                   {
 445                     /* [ unterminated, treat as normal character.  */
 446                     p = p_init;
 447                     n = n_init;
 448                     c = L('[');
 449                     goto normal_match;
 450                   }
 451                 else
 452                   {
 453                     int is_range = 0;
 454
 455 #ifdef _LIBC
 456                     int is_seqval = 0;
 457
 458                     if (c == L('[') && *p == L('.'))
 459                       {
 460                         uint32_t nrules =
 461                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 462                         const CHAR *startp = p;
 463                         size_t c1 = 0;
 464
 465                         while (1)
 466                           {
 467                             c = *++p;
 468                             if (c == L('.') && p[1] == L(']'))
 469                               {
 470                                 p += 2;
 471                                 break;
 472                               }
 473                             if (c == '\0')
 474                               return FNM_NOMATCH;
 475                             ++c1;
 476                           }
 477
 478                         /* We have to handling the symbols differently in
 479                            ranges since then the collation sequence is
 480                            important.  */
 481                         is_range = *p == L('-') && p[1] != L('\0');
 482
 483                         if (nrules == 0)
 484                           {
 485                             /* There are no names defined in the collation
 486                                data.  Therefore we only accept the trivial
 487                                names consisting of the character itself.  */
 488                             if (c1 != 1)
 489                               return FNM_NOMATCH;
 490
 491                             if (!is_range && *n == startp[1])
 492                               goto matched;
 493
 494                             cold = startp[1];
 495                             c = *p++;
 496                           }
 497                         else
 498                           {
 499                             int32_t table_size;
 500                             const int32_t *symb_table;
 501 # if WIDE_CHAR_VERSION
 502                             char str[c1];
 503                             unsigned int strcnt;
 504 # else
 505 #  define str (startp + 1)
 506 # endif
 507                             const unsigned char *extra;
 508                             int32_t idx;
 509                             int32_t elem;
 510                             int32_t second;
 511                             int32_t hash;
 512
 513 # if WIDE_CHAR_VERSION
 514                             /* We have to convert the name to a single-byte
 515                                string.  This is possible since the names
 516                                consist of ASCII characters and the internal
 517                                representation is UCS4.  */
 518                             for (strcnt = 0; strcnt < c1; ++strcnt)
 519                               str[strcnt] = startp[1 + strcnt];
 520 #endif
 521
 522                             table_size =
 523                               _NL_CURRENT_WORD (LC_COLLATE,
 524                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 525                             symb_table = (const int32_t *)
 526                               _NL_CURRENT (LC_COLLATE,
 527                                            _NL_COLLATE_SYMB_TABLEMB);
 528                             extra = (const unsigned char *)
 529                               _NL_CURRENT (LC_COLLATE,
 530                                            _NL_COLLATE_SYMB_EXTRAMB);
 531
 532                             /* Locate the character in the hashing table.  */
 533                             hash = elem_hash (str, c1);
 534
 535                             idx = 0;
 536                             elem = hash % table_size;
 537                             if (symb_table[2 * elem] != 0)
 538                               {
 539                                 second = hash % (table_size - 2) + 1;
 540
 541                                 do
 542                                   {
 543                                     /* First compare the hashing value.  */
 544                                     if (symb_table[2 * elem] == hash
 545                                         && (c1
 546                                             == extra[symb_table[2 * elem + 1]])
 547                                         && memcmp (str,
 548                                                    &extra[symb_table[2 * elem
 549                                                                      + 1]
 550                                                           + 1], c1) == 0)
 551                                       {
 552                                         /* Yep, this is the entry.  */
 553                                         idx = symb_table[2 * elem + 1];
 554                                         idx += 1 + extra[idx];
 555                                         break;
 556                                       }
 557
 558                                     /* Next entry.  */
 559                                     elem += second;
 560                                   }
 561                                 while (symb_table[2 * elem] != 0);
 562                               }
 563
 564                             if (symb_table[2 * elem] != 0)
 565                               {
 566                                 /* Compare the byte sequence but only if
 567                                    this is not part of a range.  */
 568 # if WIDE_CHAR_VERSION
 569                                 int32_t *wextra;
 570
 571                                 idx += 1 + extra[idx];
 572                                 /* Adjust for the alignment.  */
 573                                 idx = (idx + 3) & ~3;
 574
 575                                 wextra = (int32_t *) &extra[idx + 4];
 576 # endif
 577
 578                                 if (! is_range)
 579                                   {
 580 # if WIDE_CHAR_VERSION
 581                                     for (c1 = 0;
 582                                          (int32_t) c1 < wextra[idx];
 583                                          ++c1)
 584                                       if (n[c1] != wextra[1 + c1])
 585                                         break;
 586
 587                                     if ((int32_t) c1 == wextra[idx])
 588                                       goto matched;
 589 # else
 590                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 591                                       if (n[c1] != extra[1 + c1])
 592                                         break;
 593
 594                                     if (c1 == extra[idx])
 595                                       goto matched;
 596 # endif
 597                                   }
 598
 599                                 /* Get the collation sequence value.  */
 600                                 is_seqval = 1;
 601 # if WIDE_CHAR_VERSION
 602                                 cold = wextra[1 + wextra[idx]];
 603 # else
 604                                 /* Adjust for the alignment.  */
 605                                 idx += 1 + extra[idx];
 606                                 idx = (idx + 3) & ~4;
 607                                 cold = *((int32_t *) &extra[idx]);
 608 # endif
 609
 610                                 c = *p++;
 611                               }
 612                             else if (c1 == 1)
 613                               {
 614                                 /* No valid character.  Match it as a
 615                                    single byte.  */
 616                                 if (!is_range && *n == str[0])
 617                                   goto matched;
 618
 619                                 cold = str[0];
 620                                 c = *p++;
 621                               }
 622                             else
 623                               return FNM_NOMATCH;
 624                           }
 625                       }
 626                     else
 627 # undef str
 628 #endif
 629                       {
 630                         c = FOLD (c);
 631                       normal_bracket:
 632
 633                         /* We have to handling the symbols differently in
 634                            ranges since then the collation sequence is
 635                            important.  */
 636                         is_range = (*p == L('-') && p[1] != L('\0')
 637                                     && p[1] != L(']'));
 638
 639                         if (!is_range && c == fn)
 640                           goto matched;
 641
 642                         /* This is needed if we goto normal_bracket; from
 643                            outside of is_seqval's scope.  */
 644                         is_seqval = 0;
 645                         cold = c;
 646                         c = *p++;
 647                       }
 648
 649                     if (c == L('-') && *p != L(']'))
 650                       {
 651 #if _LIBC
 652                         /* We have to find the collation sequence
 653                            value for C.  Collation sequence is nothing
 654                            we can regularly access.  The sequence
 655                            value is defined by the order in which the
 656                            definitions of the collation values for the
 657                            various characters appear in the source
 658                            file.  A strange concept, nowhere
 659                            documented.  */
 660                         uint32_t fcollseq;
 661                         uint32_t lcollseq;
 662                         UCHAR cend = *p++;
 663
 664 # if WIDE_CHAR_VERSION
 665                         /* Search in the `names' array for the characters.  */
 666                         fcollseq = __collseq_table_lookup (collseq, fn);
 667                         if (fcollseq == ~((uint32_t) 0))
 668                           /* XXX We don't know anything about the character
 669                              we are supposed to match.  This means we are
 670                              failing.  */
 671                           goto range_not_matched;
 672
 673                         if (is_seqval)
 674                           lcollseq = cold;
 675                         else
 676                           lcollseq = __collseq_table_lookup (collseq, cold);
 677 # else
 678                         fcollseq = collseq[fn];
 679                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 680 # endif
 681
 682                         is_seqval = 0;
 683                         if (cend == L('[') && *p == L('.'))
 684                           {
 685                             uint32_t nrules =
 686                               _NL_CURRENT_WORD (LC_COLLATE,
 687                                                 _NL_COLLATE_NRULES);
 688                             const CHAR *startp = p;
 689                             size_t c1 = 0;
 690
 691                             while (1)
 692                               {
 693                                 c = *++p;
 694                                 if (c == L('.') && p[1] == L(']'))
 695                                   {
 696                                     p += 2;
 697                                     break;
 698                                   }
 699                                 if (c == '\0')
 700                                   return FNM_NOMATCH;
 701                                 ++c1;
 702                               }
 703
 704                             if (nrules == 0)
 705                               {
 706                                 /* There are no names defined in the
 707                                    collation data.  Therefore we only
 708                                    accept the trivial names consisting
 709                                    of the character itself.  */
 710                                 if (c1 != 1)
 711                                   return FNM_NOMATCH;
 712
 713                                 cend = startp[1];
 714                               }
 715                             else
 716                               {
 717                                 int32_t table_size;
 718                                 const int32_t *symb_table;
 719 # if WIDE_CHAR_VERSION
 720                                 char str[c1];
 721                                 unsigned int strcnt;
 722 # else
 723 #  define str (startp + 1)
 724 # endif
 725                                 const unsigned char *extra;
 726                                 int32_t idx;
 727                                 int32_t elem;
 728                                 int32_t second;
 729                                 int32_t hash;
 730
 731 # if WIDE_CHAR_VERSION
 732                                 /* We have to convert the name to a single-byte
 733                                    string.  This is possible since the names
 734                                    consist of ASCII characters and the internal
 735                                    representation is UCS4.  */
 736                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 737                                   str[strcnt] = startp[1 + strcnt];
 738 # endif
 739
 740                                 table_size =
 741                                   _NL_CURRENT_WORD (LC_COLLATE,
 742                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 743                                 symb_table = (const int32_t *)
 744                                   _NL_CURRENT (LC_COLLATE,
 745                                                _NL_COLLATE_SYMB_TABLEMB);
 746                                 extra = (const unsigned char *)
 747                                   _NL_CURRENT (LC_COLLATE,
 748                                                _NL_COLLATE_SYMB_EXTRAMB);
 749
 750                                 /* Locate the character in the hashing
 751                                    table.  */
 752                                 hash = elem_hash (str, c1);
 753
 754                                 idx = 0;
 755                                 elem = hash % table_size;
 756                                 if (symb_table[2 * elem] != 0)
 757                                   {
 758                                     second = hash % (table_size - 2) + 1;
 759
 760                                     do
 761                                       {
 762                                         /* First compare the hashing value.  */
 763                                         if (symb_table[2 * elem] == hash
 764                                             && (c1
 765                                                 == extra[symb_table[2 * elem + 1]])
 766                                             && memcmp (str,
 767                                                        &extra[symb_table[2 * elem + 1]
 768                                                               + 1], c1) == 0)
 769                                           {
 770                                             /* Yep, this is the entry.  */
 771                                             idx = symb_table[2 * elem + 1];
 772                                             idx += 1 + extra[idx];
 773                                             break;
 774                                           }
 775
 776                                         /* Next entry.  */
 777                                         elem += second;
 778                                       }
 779                                     while (symb_table[2 * elem] != 0);
 780                                   }
 781
 782                                 if (symb_table[2 * elem] != 0)
 783                                   {
 784                                     /* Compare the byte sequence but only if
 785                                        this is not part of a range.  */
 786 # if WIDE_CHAR_VERSION
 787                                     int32_t *wextra;
 788
 789                                     idx += 1 + extra[idx];
 790                                     /* Adjust for the alignment.  */
 791                                     idx = (idx + 3) & ~4;
 792
 793                                     wextra = (int32_t *) &extra[idx + 4];
 794 # endif
 795                                     /* Get the collation sequence value.  */
 796                                     is_seqval = 1;
 797 # if WIDE_CHAR_VERSION
 798                                     cend = wextra[1 + wextra[idx]];
 799 # else
 800                                     /* Adjust for the alignment.  */
 801                                     idx += 1 + extra[idx];
 802                                     idx = (idx + 3) & ~4;
 803                                     cend = *((int32_t *) &extra[idx]);
 804 # endif
 805                                   }
 806                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 807                                   {
 808                                     cend = str[0];
 809                                     c = *p++;
 810                                   }
 811                                 else
 812                                   return FNM_NOMATCH;
 813                               }
 814 # undef str
 815                           }
 816                         else
 817                           {
 818                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 819                               cend = *p++;
 820                             if (cend == L('\0'))
 821                               return FNM_NOMATCH;
 822                             cend = FOLD (cend);
 823                           }
 824
 825                         /* XXX It is not entirely clear to me how to handle
 826                            characters which are not mentioned in the
 827                            collation specification.  */
 828                         if (
 829 # if WIDE_CHAR_VERSION
 830                             lcollseq == 0xffffffff ||
 831 # endif
 832                             lcollseq <= fcollseq)
 833                           {
 834                             /* We have to look at the upper bound.  */
 835                             uint32_t hcollseq;
 836
 837                             if (is_seqval)
 838                               hcollseq = cend;
 839                             else
 840                               {
 841 # if WIDE_CHAR_VERSION
 842                                 hcollseq =
 843                                   __collseq_table_lookup (collseq, cend);
 844                                 if (hcollseq == ~((uint32_t) 0))
 845                                   {
 846                                     /* Hum, no information about the upper
 847                                        bound.  The matching succeeds if the
 848                                        lower bound is matched exactly.  */
 849                                     if (lcollseq != fcollseq)
 850                                       goto range_not_matched;
 851
 852                                     goto matched;
 853                                   }
 854 # else
 855                                 hcollseq = collseq[cend];
 856 # endif
 857                               }
 858
 859                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 860                               goto matched;
 861                           }
 862 # if WIDE_CHAR_VERSION
 863                       range_not_matched:
 864 # endif
 865 #else
 866                         /* We use a boring value comparison of the character
 867                            values.  This is better than comparing using
 868                            `strcoll' since the latter would have surprising
 869                            and sometimes fatal consequences.  */
 870                         UCHAR cend = *p++;
 871
 872                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 873                           cend = *p++;
 874                         if (cend == L('\0'))
 875                           return FNM_NOMATCH;
 876
 877                         /* It is a range.  */
 878                         if (cold <= fn && fn <= cend)
 879                           goto matched;
 880 #endif
 881
 882                         c = *p++;
 883                       }
 884                   }
 885
 886                 if (c == L(']'))
 887                   break;
 888               }
 889
 890             if (!not)
 891               return FNM_NOMATCH;
 892             break;
 893
 894           matched:
 895             /* Skip the rest of the [...] that already matched.  */
 896             while ((c = *p++) != L (']'))
 897               {
 898                 if (c == L('\0'))
 899                   /* [... (unterminated) loses.  */
 900                   return FNM_NOMATCH;
 901
 902                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 903                   {
 904                     if (*p == L('\0'))
 905                       return FNM_NOMATCH;
 906                     /* XXX 1003.2d11 is unclear if this is right.  */
 907                     ++p;
 908                   }
 909                 else if (c == L('[') && *p == L(':'))
 910                   {
 911                     int c1 = 0;
 912                     const CHAR *startp = p;
 913
 914                     while (1)
 915                       {
 916                         c = *++p;
 917                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 918                           return FNM_NOMATCH;
 919
 920                         if (*p == L(':') && p[1] == L(']'))
 921                           break;
 922
 923                         if (c < L('a') || c >= L('z'))
 924                           {
 925                             p = startp - 2;
 926                             break;
 927                           }
 928                       }
 929                     p += 2;
 930                   }
 931                 else if (c == L('[') && *p == L('='))
 932                   {
 933                     c = *++p;
 934                     if (c == L('\0'))
 935                       return FNM_NOMATCH;
 936                     c = *++p;
 937                     if (c != L('=') || p[1] != L(']'))
 938                       return FNM_NOMATCH;
 939                     p += 2;
 940                   }
 941                 else if (c == L('[') && *p == L('.'))
 942                   {
 943                     ++p;
 944                     while (1)
 945                       {
 946                         c = *++p;
 947                         if (c == '\0')
 948                           return FNM_NOMATCH;
 949
 950                         if (*p == L('.') && p[1] == L(']'))
 951                           break;
 952                       }
 953                     p += 2;
 954                   }
 955               }
 956             if (not)
 957               return FNM_NOMATCH;
 958           }
 959           break;
 960
 961         case L('+'):
 962         case L('@'):
 963         case L('!'):
 964           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 965             {
 966               int res = EXT (c, p, n, string_end, no_leading_period, flags,
 967                              alloca_used);
 968               if (res != -1)
 969                 return res;
 970             }
 971           goto normal_match;
 972
 973         case L('/'):
 974           if (NO_LEADING_PERIOD (flags))
 975             {
 976               if (n == string_end || c != (UCHAR) *n)
 977                 return FNM_NOMATCH;
 978
 979               new_no_leading_period = 1;
 980               break;
 981             }
 982           /* FALLTHROUGH */
 983         default:
 984         normal_match:
 985           if (n == string_end || c != FOLD ((UCHAR) *n))
 986             return FNM_NOMATCH;
 987         }
 988
 989       no_leading_period = new_no_leading_period;
 990       ++n;
 991     }
 992
 993   if (n == string_end)
 994     return 0;
 995
 996   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 997     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 998     return 0;
 999
1000   return FNM_NOMATCH;
1001 }
1002
1003
1004 static const CHAR *
1005 internal_function
1006 END (const CHAR *pattern)
1007 {
1008   const CHAR *p = pattern;
1009
1010   while (1)
1011     if (*++p == L('\0'))
1012       /* This is an invalid pattern.  */
1013       return pattern;
1014     else if (*p == L('['))
1015       {
1016         /* Handle brackets special.  */
1017         if (posixly_correct == 0)
1018           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1019
1020         /* Skip the not sign.  We have to recognize it because of a possibly
1021            following ']'.  */
1022         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1023           ++p;
1024         /* A leading ']' is recognized as such.  */
1025         if (*p == L(']'))
1026           ++p;
1027         /* Skip over all characters of the list.  */
1028         while (*p != L(']'))
1029           if (*p++ == L('\0'))
1030             /* This is no valid pattern.  */
1031             return pattern;
1032       }
1033     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1034               || *p == L('!')) && p[1] == L('('))
1035       p = END (p + 1);
1036     else if (*p == L(')'))
1037       break;
1038
1039   return p + 1;
1040 }
1041
1042
1043 static int
1044 internal_function
1045 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1046      int no_leading_period, int flags, size_t alloca_used)
1047 {
1048   const CHAR *startp;
1049   int level;
1050   struct patternlist
1051   {
1052     struct patternlist *next;
1053     CHAR malloced;
1054     CHAR str[0];
1055   } *list = NULL;
1056   struct patternlist **lastp = &list;
1057   size_t pattern_len = STRLEN (pattern);
1058   int any_malloced = 0;
1059   const CHAR *p;
1060   const CHAR *rs;
1061   int retval = 0;
1062
1063   /* Parse the pattern.  Store the individual parts in the list.  */
1064   level = 0;
1065   for (startp = p = pattern + 1; level >= 0; ++p)
1066     if (*p == L('\0'))
1067       {
1068         /* This is an invalid pattern.  */
1069         retval = -1;
1070         goto out;
1071       }
1072     else if (*p == L('['))
1073       {
1074         /* Handle brackets special.  */
1075         if (posixly_correct == 0)
1076           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1077
1078         /* Skip the not sign.  We have to recognize it because of a possibly
1079            following ']'.  */
1080         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1081           ++p;
1082         /* A leading ']' is recognized as such.  */
1083         if (*p == L(']'))
1084           ++p;
1085         /* Skip over all characters of the list.  */
1086         while (*p != L(']'))
1087           if (*p++ == L('\0'))
1088             {
1089               /* This is no valid pattern.  */
1090               retval = -1;
1091               goto out;
1092             }
1093       }
1094     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1095               || *p == L('!')) && p[1] == L('('))
1096       /* Remember the nesting level.  */
1097       ++level;
1098     else if (*p == L(')'))
1099       {
1100         if (level-- == 0)
1101           {
1102             /* This means we found the end of the pattern.  */
1103 #define NEW_PATTERN \
1104             struct patternlist *newp;                                         \
1105             size_t slen = (opt == L('?') || opt == L('@')                     \
1106                            ? pattern_len : (p - startp + 1));                 \
1107             slen = sizeof (struct patternlist) + (slen * sizeof (CHAR));      \
1108             int malloced = ! __libc_use_alloca (alloca_used + slen);          \
1109             if (__builtin_expect (malloced, 0))                               \
1110               {                                                               \
1111                 newp = malloc (slen);                                         \
1112                 if (newp == NULL)                                             \
1113                   {                                                           \
1114                     retval = -2;                                              \
1115                     goto out;                                                 \
1116                   }                                                           \
1117                 any_malloced = 1;                                             \
1118               }                                                               \
1119             else                                                              \
1120               newp = alloca_account (slen, alloca_used);                      \
1121             newp->next = NULL;                                                \
1122             newp->malloced = malloced;                                        \
1123             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1124             *lastp = newp;                                                    \
1125             lastp = &newp->next
1126             NEW_PATTERN;
1127           }
1128       }
1129     else if (*p == L('|'))
1130       {
1131         if (level == 0)
1132           {
1133             NEW_PATTERN;
1134             startp = p + 1;
1135           }
1136       }
1137   assert (list != NULL);
1138   assert (p[-1] == L(')'));
1139 #undef NEW_PATTERN
1140
1141   switch (opt)
1142     {
1143     case L('*'):
1144       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1145                alloca_used) == 0)
1146         goto success;
1147       /* FALLTHROUGH */
1148
1149     case L('+'):
1150       do
1151         {
1152           for (rs = string; rs <= string_end; ++rs)
1153             /* First match the prefix with the current pattern with the
1154                current pattern.  */
1155             if (FCT (list->str, string, rs, no_leading_period,
1156                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1157                      NULL, alloca_used) == 0
1158                 /* This was successful.  Now match the rest with the rest
1159                    of the pattern.  */
1160                 && (FCT (p, rs, string_end,
1161                          rs == string
1162                          ? no_leading_period
1163                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1164                          flags & FNM_FILE_NAME
1165                          ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1166                     /* This didn't work.  Try the whole pattern.  */
1167                     || (rs != string
1168                         && FCT (pattern - 1, rs, string_end,
1169                                 rs == string
1170                                 ? no_leading_period
1171                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1172                                    ? 1 : 0),
1173                                 flags & FNM_FILE_NAME
1174                                 ? flags : flags & ~FNM_PERIOD, NULL,
1175                                 alloca_used) == 0)))
1176               /* It worked.  Signal success.  */
1177               goto success;
1178         }
1179       while ((list = list->next) != NULL);
1180
1181       /* None of the patterns lead to a match.  */
1182       retval = FNM_NOMATCH;
1183       break;
1184
1185     case L('?'):
1186       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1187                alloca_used) == 0)
1188         goto success;
1189       /* FALLTHROUGH */
1190
1191     case L('@'):
1192       do
1193         /* I cannot believe it but `strcat' is actually acceptable
1194            here.  Match the entire string with the prefix from the
1195            pattern list and the rest of the pattern following the
1196            pattern list.  */
1197         if (FCT (STRCAT (list->str, p), string, string_end,
1198                  no_leading_period,
1199                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1200                  NULL, alloca_used) == 0)
1201           /* It worked.  Signal success.  */
1202           goto success;
1203       while ((list = list->next) != NULL);
1204
1205       /* None of the patterns lead to a match.  */
1206       retval = FNM_NOMATCH;
1207       break;
1208
1209     case L('!'):
1210       for (rs = string; rs <= string_end; ++rs)
1211         {
1212           struct patternlist *runp;
1213
1214           for (runp = list; runp != NULL; runp = runp->next)
1215             if (FCT (runp->str, string, rs,  no_leading_period,
1216                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1217                      NULL, alloca_used) == 0)
1218               break;
1219
1220           /* If none of the patterns matched see whether the rest does.  */
1221           if (runp == NULL
1222               && (FCT (p, rs, string_end,
1223                        rs == string
1224                        ? no_leading_period
1225                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1226                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1227                        NULL, alloca_used) == 0))
1228             /* This is successful.  */
1229             goto success;
1230         }
1231
1232       /* None of the patterns together with the rest of the pattern
1233          lead to a match.  */
1234       retval = FNM_NOMATCH;
1235       break;
1236
1237     default:
1238       assert (! "Invalid extended matching operator");
1239       retval = -1;
1240       break;
1241     }
1242
1243  success:
1244  out:
1245   if (any_malloced)
1246     while (list != NULL)
1247       {
1248         struct patternlist *old = list;
1249         list = list->next;
1250         if (old->malloced)
1251           free (old);
1252       }
1253
1254   return retval;
1255 }
1256
1257
1258 #undef FOLD
1259 #undef CHAR
1260 #undef UCHAR
1261 #undef INT
1262 #undef FCT
1263 #undef EXT
1264 #undef END
1265 #undef STRUCT
1266 #undef MEMPCPY
1267 #undef MEMCHR
1268 #undef STRCOLL
1269 #undef STRLEN
1270 #undef STRCAT
1271 #undef L
1272 #undef BTOWC
1273 #undef WIDE_CHAR_VERSION
1274 #undef FINDIDX