posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <http://www.gnu.org/licenses/>.  */
  17
  18 #include <stdint.h>
  19
  20 struct STRUCT
  21 {
  22   const CHAR *pattern;
  23   const CHAR *string;
  24   int no_leading_period;
  25 };
  26
  27 /* Match STRING against the filename pattern PATTERN, returning zero if
  28    it matches, nonzero if not.  */
  29 static int FCT (const CHAR *pattern, const CHAR *string,
  30                 const CHAR *string_end, int no_leading_period, int flags,
  31                 struct STRUCT *ends, size_t alloca_used)
  32      internal_function;
  33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  34                 const CHAR *string_end, int no_leading_period, int flags,
  35                 size_t alloca_used)
  36      internal_function;
  37 static const CHAR *END (const CHAR *patternp) internal_function;
  38
  39 static int
  40 internal_function
  41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
  42      const CHAR *pattern;
  43      const CHAR *string;
  44      const CHAR *string_end;
  45      int no_leading_period;
  46      int flags;
  47      struct STRUCT *ends;
  48      size_t alloca_used;
  49 {
  50   const CHAR *p = pattern, *n = string;
  51   UCHAR c;
  52 #ifdef _LIBC
  53 # if WIDE_CHAR_VERSION
  54   const char *collseq = (const char *)
  55     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  56 # else
  57   const UCHAR *collseq = (const UCHAR *)
  58     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  59 # endif
  60 #endif
  61
  62   while ((c = *p++) != L('\0'))
  63     {
  64       int new_no_leading_period = 0;
  65       c = FOLD (c);
  66
  67       switch (c)
  68         {
  69         case L('?'):
  70           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  71             {
  72               int res = EXT (c, p, n, string_end, no_leading_period,
  73                              flags, alloca_used);
  74               if (res != -1)
  75                 return res;
  76             }
  77
  78           if (n == string_end)
  79             return FNM_NOMATCH;
  80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  81             return FNM_NOMATCH;
  82           else if (*n == L('.') && no_leading_period)
  83             return FNM_NOMATCH;
  84           break;
  85
  86         case L('\\'):
  87           if (!(flags & FNM_NOESCAPE))
  88             {
  89               c = *p++;
  90               if (c == L('\0'))
  91                 /* Trailing \ loses.  */
  92                 return FNM_NOMATCH;
  93               c = FOLD (c);
  94             }
  95           if (n == string_end || FOLD ((UCHAR) *n) != c)
  96             return FNM_NOMATCH;
  97           break;
  98
  99         case L('*'):
 100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 101             {
 102               int res = EXT (c, p, n, string_end, no_leading_period,
 103                              flags, alloca_used);
 104               if (res != -1)
 105                 return res;
 106             }
 107           else if (ends != NULL)
 108             {
 109               ends->pattern = p - 1;
 110               ends->string = n;
 111               ends->no_leading_period = no_leading_period;
 112               return 0;
 113             }
 114
 115           if (n != string_end && *n == L('.') && no_leading_period)
 116             return FNM_NOMATCH;
 117
 118           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 119             {
 120               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 121                 {
 122                   const CHAR *endp = END (p);
 123                   if (endp != p)
 124                     {
 125                       /* This is a pattern.  Skip over it.  */
 126                       p = endp;
 127                       continue;
 128                     }
 129                 }
 130
 131               if (c == L('?'))
 132                 {
 133                   /* A ? needs to match one character.  */
 134                   if (n == string_end)
 135                     /* There isn't another character; no match.  */
 136                     return FNM_NOMATCH;
 137                   else if (*n == L('/')
 138                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 139                     /* A slash does not match a wildcard under
 140                        FNM_FILE_NAME.  */
 141                     return FNM_NOMATCH;
 142                   else
 143                     /* One character of the string is consumed in matching
 144                        this ? wildcard, so *??? won't match if there are
 145                        less than three characters.  */
 146                     ++n;
 147                 }
 148             }
 149
 150           if (c == L('\0'))
 151             /* The wildcard(s) is/are the last element of the pattern.
 152                If the name is a file name and contains another slash
 153                this means it cannot match, unless the FNM_LEADING_DIR
 154                flag is set.  */
 155             {
 156               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 157
 158               if (flags & FNM_FILE_NAME)
 159                 {
 160                   if (flags & FNM_LEADING_DIR)
 161                     result = 0;
 162                   else
 163                     {
 164                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 165                         result = 0;
 166                     }
 167                 }
 168
 169               return result;
 170             }
 171           else
 172             {
 173               const CHAR *endp;
 174               struct STRUCT end;
 175
 176               end.pattern = NULL;
 177               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 178                              string_end - n);
 179               if (endp == NULL)
 180                 endp = string_end;
 181
 182               if (c == L('[')
 183                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 184                       && (c == L('@') || c == L('+') || c == L('!'))
 185                       && *p == L('(')))
 186                 {
 187                   int flags2 = ((flags & FNM_FILE_NAME)
 188                                 ? flags : (flags & ~FNM_PERIOD));
 189
 190                   for (--p; n < endp; ++n, no_leading_period = 0)
 191                     if (FCT (p, n, string_end, no_leading_period, flags2,
 192                              &end, alloca_used) == 0)
 193                       goto found;
 194                 }
 195               else if (c == L('/') && (flags & FNM_FILE_NAME))
 196                 {
 197                   while (n < string_end && *n != L('/'))
 198                     ++n;
 199                   if (n < string_end && *n == L('/')
 200                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 201                                NULL, alloca_used) == 0))
 202                     return 0;
 203                 }
 204               else
 205                 {
 206                   int flags2 = ((flags & FNM_FILE_NAME)
 207                                 ? flags : (flags & ~FNM_PERIOD));
 208
 209                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 210                     c = *p;
 211                   c = FOLD (c);
 212                   for (--p; n < endp; ++n, no_leading_period = 0)
 213                     if (FOLD ((UCHAR) *n) == c
 214                         && (FCT (p, n, string_end, no_leading_period, flags2,
 215                                  &end, alloca_used) == 0))
 216                       {
 217                       found:
 218                         if (end.pattern == NULL)
 219                           return 0;
 220                         break;
 221                       }
 222                   if (end.pattern != NULL)
 223                     {
 224                       p = end.pattern;
 225                       n = end.string;
 226                       no_leading_period = end.no_leading_period;
 227                       continue;
 228                     }
 229                 }
 230             }
 231
 232           /* If we come here no match is possible with the wildcard.  */
 233           return FNM_NOMATCH;
 234
 235         case L('['):
 236           {
 237             /* Nonzero if the sense of the character class is inverted.  */
 238             const CHAR *p_init = p;
 239             const CHAR *n_init = n;
 240             int not;
 241             CHAR cold;
 242             UCHAR fn;
 243
 244             if (posixly_correct == 0)
 245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 246
 247             if (n == string_end)
 248               return FNM_NOMATCH;
 249
 250             if (*n == L('.') && no_leading_period)
 251               return FNM_NOMATCH;
 252
 253             if (*n == L('/') && (flags & FNM_FILE_NAME))
 254               /* `/' cannot be matched.  */
 255               return FNM_NOMATCH;
 256
 257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 258             if (not)
 259               ++p;
 260
 261             fn = FOLD ((UCHAR) *n);
 262
 263             c = *p++;
 264             for (;;)
 265               {
 266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 267                   {
 268                     if (*p == L('\0'))
 269                       return FNM_NOMATCH;
 270                     c = FOLD ((UCHAR) *p);
 271                     ++p;
 272
 273                     goto normal_bracket;
 274                   }
 275                 else if (c == L('[') && *p == L(':'))
 276                   {
 277                     /* Leave room for the null.  */
 278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 279                     size_t c1 = 0;
 280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 281                     wctype_t wt;
 282 #endif
 283                     const CHAR *startp = p;
 284
 285                     for (;;)
 286                       {
 287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 288                           /* The name is too long and therefore the pattern
 289                              is ill-formed.  */
 290                           return FNM_NOMATCH;
 291
 292                         c = *++p;
 293                         if (c == L(':') && p[1] == L(']'))
 294                           {
 295                             p += 2;
 296                             break;
 297                           }
 298                         if (c < L('a') || c >= L('z'))
 299                           {
 300                             /* This cannot possibly be a character class name.
 301                                Match it as a normal range.  */
 302                             p = startp;
 303                             c = L('[');
 304                             goto normal_bracket;
 305                           }
 306                         str[c1++] = c;
 307                       }
 308                     str[c1] = L('\0');
 309
 310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 311                     wt = IS_CHAR_CLASS (str);
 312                     if (wt == 0)
 313                       /* Invalid character class name.  */
 314                       return FNM_NOMATCH;
 315
 316 # if defined _LIBC && ! WIDE_CHAR_VERSION
 317                     /* The following code is glibc specific but does
 318                        there a good job in speeding up the code since
 319                        we can avoid the btowc() call.  */
 320                     if (_ISCTYPE ((UCHAR) *n, wt))
 321                       goto matched;
 322 # else
 323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 324                       goto matched;
 325 # endif
 326 #else
 327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 339                       goto matched;
 340 #endif
 341                     c = *p++;
 342                   }
 343 #ifdef _LIBC
 344                 else if (c == L('[') && *p == L('='))
 345                   {
 346                     /* It's important that STR be a scalar variable rather
 347                        than a one-element array, because GCC (at least 4.9.2
 348                        -O2 on x86-64) can be confused by the array and
 349                        diagnose a "used initialized" in a dead branch in the
 350                        findidx function.  */
 351                     UCHAR str;
 352                     uint32_t nrules =
 353                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 354                     const CHAR *startp = p;
 355
 356                     c = *++p;
 357                     if (c == L('\0'))
 358                       {
 359                         p = startp;
 360                         c = L('[');
 361                         goto normal_bracket;
 362                       }
 363                     str = c;
 364
 365                     c = *++p;
 366                     if (c != L('=') || p[1] != L(']'))
 367                       {
 368                         p = startp;
 369                         c = L('[');
 370                         goto normal_bracket;
 371                       }
 372                     p += 2;
 373
 374                     if (nrules == 0)
 375                       {
 376                         if ((UCHAR) *n == str)
 377                           goto matched;
 378                       }
 379                     else
 380                       {
 381                         const int32_t *table;
 382 # if WIDE_CHAR_VERSION
 383                         const int32_t *weights;
 384                         const wint_t *extra;
 385 # else
 386                         const unsigned char *weights;
 387                         const unsigned char *extra;
 388 # endif
 389                         const int32_t *indirect;
 390                         int32_t idx;
 391                         const UCHAR *cp = (const UCHAR *) &str;
 392
 393 # if WIDE_CHAR_VERSION
 394                         table = (const int32_t *)
 395                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 396                         weights = (const int32_t *)
 397                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 398                         extra = (const wint_t *)
 399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 400                         indirect = (const int32_t *)
 401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 402 # else
 403                         table = (const int32_t *)
 404                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 405                         weights = (const unsigned char *)
 406                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 407                         extra = (const unsigned char *)
 408                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 409                         indirect = (const int32_t *)
 410                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 411 # endif
 412
 413                         idx = FINDIDX (table, indirect, extra, &cp, 1);
 414                         if (idx != 0)
 415                           {
 416                             /* We found a table entry.  Now see whether the
 417                                character we are currently at has the same
 418                                equivalance class value.  */
 419                             int len = weights[idx & 0xffffff];
 420                             int32_t idx2;
 421                             const UCHAR *np = (const UCHAR *) n;
 422
 423                             idx2 = FINDIDX (table, indirect, extra,
 424                                             &np, string_end - n);
 425                             if (idx2 != 0
 426                                 && (idx >> 24) == (idx2 >> 24)
 427                                 && len == weights[idx2 & 0xffffff])
 428                               {
 429                                 int cnt = 0;
 430
 431                                 idx &= 0xffffff;
 432                                 idx2 &= 0xffffff;
 433
 434                                 while (cnt < len
 435                                        && (weights[idx + 1 + cnt]
 436                                            == weights[idx2 + 1 + cnt]))
 437                                   ++cnt;
 438
 439                                 if (cnt == len)
 440                                   goto matched;
 441                               }
 442                           }
 443                       }
 444
 445                     c = *p++;
 446                   }
 447 #endif
 448                 else if (c == L('\0'))
 449                   {
 450                     /* [ unterminated, treat as normal character.  */
 451                     p = p_init;
 452                     n = n_init;
 453                     c = L('[');
 454                     goto normal_match;
 455                   }
 456                 else
 457                   {
 458                     int is_range = 0;
 459
 460 #ifdef _LIBC
 461                     int is_seqval = 0;
 462
 463                     if (c == L('[') && *p == L('.'))
 464                       {
 465                         uint32_t nrules =
 466                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 467                         const CHAR *startp = p;
 468                         size_t c1 = 0;
 469
 470                         while (1)
 471                           {
 472                             c = *++p;
 473                             if (c == L('.') && p[1] == L(']'))
 474                               {
 475                                 p += 2;
 476                                 break;
 477                               }
 478                             if (c == '\0')
 479                               return FNM_NOMATCH;
 480                             ++c1;
 481                           }
 482
 483                         /* We have to handling the symbols differently in
 484                            ranges since then the collation sequence is
 485                            important.  */
 486                         is_range = *p == L('-') && p[1] != L('\0');
 487
 488                         if (nrules == 0)
 489                           {
 490                             /* There are no names defined in the collation
 491                                data.  Therefore we only accept the trivial
 492                                names consisting of the character itself.  */
 493                             if (c1 != 1)
 494                               return FNM_NOMATCH;
 495
 496                             if (!is_range && *n == startp[1])
 497                               goto matched;
 498
 499                             cold = startp[1];
 500                             c = *p++;
 501                           }
 502                         else
 503                           {
 504                             int32_t table_size;
 505                             const int32_t *symb_table;
 506 # if WIDE_CHAR_VERSION
 507                             char str[c1];
 508                             unsigned int strcnt;
 509 # else
 510 #  define str (startp + 1)
 511 # endif
 512                             const unsigned char *extra;
 513                             int32_t idx;
 514                             int32_t elem;
 515                             int32_t second;
 516                             int32_t hash;
 517
 518 # if WIDE_CHAR_VERSION
 519                             /* We have to convert the name to a single-byte
 520                                string.  This is possible since the names
 521                                consist of ASCII characters and the internal
 522                                representation is UCS4.  */
 523                             for (strcnt = 0; strcnt < c1; ++strcnt)
 524                               str[strcnt] = startp[1 + strcnt];
 525 #endif
 526
 527                             table_size =
 528                               _NL_CURRENT_WORD (LC_COLLATE,
 529                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 530                             symb_table = (const int32_t *)
 531                               _NL_CURRENT (LC_COLLATE,
 532                                            _NL_COLLATE_SYMB_TABLEMB);
 533                             extra = (const unsigned char *)
 534                               _NL_CURRENT (LC_COLLATE,
 535                                            _NL_COLLATE_SYMB_EXTRAMB);
 536
 537                             /* Locate the character in the hashing table.  */
 538                             hash = elem_hash (str, c1);
 539
 540                             idx = 0;
 541                             elem = hash % table_size;
 542                             if (symb_table[2 * elem] != 0)
 543                               {
 544                                 second = hash % (table_size - 2) + 1;
 545
 546                                 do
 547                                   {
 548                                     /* First compare the hashing value.  */
 549                                     if (symb_table[2 * elem] == hash
 550                                         && (c1
 551                                             == extra[symb_table[2 * elem + 1]])
 552                                         && memcmp (str,
 553                                                    &extra[symb_table[2 * elem
 554                                                                      + 1]
 555                                                           + 1], c1) == 0)
 556                                       {
 557                                         /* Yep, this is the entry.  */
 558                                         idx = symb_table[2 * elem + 1];
 559                                         idx += 1 + extra[idx];
 560                                         break;
 561                                       }
 562
 563                                     /* Next entry.  */
 564                                     elem += second;
 565                                   }
 566                                 while (symb_table[2 * elem] != 0);
 567                               }
 568
 569                             if (symb_table[2 * elem] != 0)
 570                               {
 571                                 /* Compare the byte sequence but only if
 572                                    this is not part of a range.  */
 573 # if WIDE_CHAR_VERSION
 574                                 int32_t *wextra;
 575
 576                                 idx += 1 + extra[idx];
 577                                 /* Adjust for the alignment.  */
 578                                 idx = (idx + 3) & ~3;
 579
 580                                 wextra = (int32_t *) &extra[idx + 4];
 581 # endif
 582
 583                                 if (! is_range)
 584                                   {
 585 # if WIDE_CHAR_VERSION
 586                                     for (c1 = 0;
 587                                          (int32_t) c1 < wextra[idx];
 588                                          ++c1)
 589                                       if (n[c1] != wextra[1 + c1])
 590                                         break;
 591
 592                                     if ((int32_t) c1 == wextra[idx])
 593                                       goto matched;
 594 # else
 595                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 596                                       if (n[c1] != extra[1 + c1])
 597                                         break;
 598
 599                                     if (c1 == extra[idx])
 600                                       goto matched;
 601 # endif
 602                                   }
 603
 604                                 /* Get the collation sequence value.  */
 605                                 is_seqval = 1;
 606 # if WIDE_CHAR_VERSION
 607                                 cold = wextra[1 + wextra[idx]];
 608 # else
 609                                 /* Adjust for the alignment.  */
 610                                 idx += 1 + extra[idx];
 611                                 idx = (idx + 3) & ~4;
 612                                 cold = *((int32_t *) &extra[idx]);
 613 # endif
 614
 615                                 c = *p++;
 616                               }
 617                             else if (c1 == 1)
 618                               {
 619                                 /* No valid character.  Match it as a
 620                                    single byte.  */
 621                                 if (!is_range && *n == str[0])
 622                                   goto matched;
 623
 624                                 cold = str[0];
 625                                 c = *p++;
 626                               }
 627                             else
 628                               return FNM_NOMATCH;
 629                           }
 630                       }
 631                     else
 632 # undef str
 633 #endif
 634                       {
 635                         c = FOLD (c);
 636                       normal_bracket:
 637
 638                         /* We have to handling the symbols differently in
 639                            ranges since then the collation sequence is
 640                            important.  */
 641                         is_range = (*p == L('-') && p[1] != L('\0')
 642                                     && p[1] != L(']'));
 643
 644                         if (!is_range && c == fn)
 645                           goto matched;
 646
 647                         /* This is needed if we goto normal_bracket; from
 648                            outside of is_seqval's scope.  */
 649                         is_seqval = 0;
 650                         cold = c;
 651                         c = *p++;
 652                       }
 653
 654                     if (c == L('-') && *p != L(']'))
 655                       {
 656 #if _LIBC
 657                         /* We have to find the collation sequence
 658                            value for C.  Collation sequence is nothing
 659                            we can regularly access.  The sequence
 660                            value is defined by the order in which the
 661                            definitions of the collation values for the
 662                            various characters appear in the source
 663                            file.  A strange concept, nowhere
 664                            documented.  */
 665                         uint32_t fcollseq;
 666                         uint32_t lcollseq;
 667                         UCHAR cend = *p++;
 668
 669 # if WIDE_CHAR_VERSION
 670                         /* Search in the `names' array for the characters.  */
 671                         fcollseq = __collseq_table_lookup (collseq, fn);
 672                         if (fcollseq == ~((uint32_t) 0))
 673                           /* XXX We don't know anything about the character
 674                              we are supposed to match.  This means we are
 675                              failing.  */
 676                           goto range_not_matched;
 677
 678                         if (is_seqval)
 679                           lcollseq = cold;
 680                         else
 681                           lcollseq = __collseq_table_lookup (collseq, cold);
 682 # else
 683                         fcollseq = collseq[fn];
 684                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 685 # endif
 686
 687                         is_seqval = 0;
 688                         if (cend == L('[') && *p == L('.'))
 689                           {
 690                             uint32_t nrules =
 691                               _NL_CURRENT_WORD (LC_COLLATE,
 692                                                 _NL_COLLATE_NRULES);
 693                             const CHAR *startp = p;
 694                             size_t c1 = 0;
 695
 696                             while (1)
 697                               {
 698                                 c = *++p;
 699                                 if (c == L('.') && p[1] == L(']'))
 700                                   {
 701                                     p += 2;
 702                                     break;
 703                                   }
 704                                 if (c == '\0')
 705                                   return FNM_NOMATCH;
 706                                 ++c1;
 707                               }
 708
 709                             if (nrules == 0)
 710                               {
 711                                 /* There are no names defined in the
 712                                    collation data.  Therefore we only
 713                                    accept the trivial names consisting
 714                                    of the character itself.  */
 715                                 if (c1 != 1)
 716                                   return FNM_NOMATCH;
 717
 718                                 cend = startp[1];
 719                               }
 720                             else
 721                               {
 722                                 int32_t table_size;
 723                                 const int32_t *symb_table;
 724 # if WIDE_CHAR_VERSION
 725                                 char str[c1];
 726                                 unsigned int strcnt;
 727 # else
 728 #  define str (startp + 1)
 729 # endif
 730                                 const unsigned char *extra;
 731                                 int32_t idx;
 732                                 int32_t elem;
 733                                 int32_t second;
 734                                 int32_t hash;
 735
 736 # if WIDE_CHAR_VERSION
 737                                 /* We have to convert the name to a single-byte
 738                                    string.  This is possible since the names
 739                                    consist of ASCII characters and the internal
 740                                    representation is UCS4.  */
 741                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 742                                   str[strcnt] = startp[1 + strcnt];
 743 # endif
 744
 745                                 table_size =
 746                                   _NL_CURRENT_WORD (LC_COLLATE,
 747                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 748                                 symb_table = (const int32_t *)
 749                                   _NL_CURRENT (LC_COLLATE,
 750                                                _NL_COLLATE_SYMB_TABLEMB);
 751                                 extra = (const unsigned char *)
 752                                   _NL_CURRENT (LC_COLLATE,
 753                                                _NL_COLLATE_SYMB_EXTRAMB);
 754
 755                                 /* Locate the character in the hashing
 756                                    table.  */
 757                                 hash = elem_hash (str, c1);
 758
 759                                 idx = 0;
 760                                 elem = hash % table_size;
 761                                 if (symb_table[2 * elem] != 0)
 762                                   {
 763                                     second = hash % (table_size - 2) + 1;
 764
 765                                     do
 766                                       {
 767                                         /* First compare the hashing value.  */
 768                                         if (symb_table[2 * elem] == hash
 769                                             && (c1
 770                                                 == extra[symb_table[2 * elem + 1]])
 771                                             && memcmp (str,
 772                                                        &extra[symb_table[2 * elem + 1]
 773                                                               + 1], c1) == 0)
 774                                           {
 775                                             /* Yep, this is the entry.  */
 776                                             idx = symb_table[2 * elem + 1];
 777                                             idx += 1 + extra[idx];
 778                                             break;
 779                                           }
 780
 781                                         /* Next entry.  */
 782                                         elem += second;
 783                                       }
 784                                     while (symb_table[2 * elem] != 0);
 785                                   }
 786
 787                                 if (symb_table[2 * elem] != 0)
 788                                   {
 789                                     /* Compare the byte sequence but only if
 790                                        this is not part of a range.  */
 791 # if WIDE_CHAR_VERSION
 792                                     int32_t *wextra;
 793
 794                                     idx += 1 + extra[idx];
 795                                     /* Adjust for the alignment.  */
 796                                     idx = (idx + 3) & ~4;
 797
 798                                     wextra = (int32_t *) &extra[idx + 4];
 799 # endif
 800                                     /* Get the collation sequence value.  */
 801                                     is_seqval = 1;
 802 # if WIDE_CHAR_VERSION
 803                                     cend = wextra[1 + wextra[idx]];
 804 # else
 805                                     /* Adjust for the alignment.  */
 806                                     idx += 1 + extra[idx];
 807                                     idx = (idx + 3) & ~4;
 808                                     cend = *((int32_t *) &extra[idx]);
 809 # endif
 810                                   }
 811                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 812                                   {
 813                                     cend = str[0];
 814                                     c = *p++;
 815                                   }
 816                                 else
 817                                   return FNM_NOMATCH;
 818                               }
 819 # undef str
 820                           }
 821                         else
 822                           {
 823                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 824                               cend = *p++;
 825                             if (cend == L('\0'))
 826                               return FNM_NOMATCH;
 827                             cend = FOLD (cend);
 828                           }
 829
 830                         /* XXX It is not entirely clear to me how to handle
 831                            characters which are not mentioned in the
 832                            collation specification.  */
 833                         if (
 834 # if WIDE_CHAR_VERSION
 835                             lcollseq == 0xffffffff ||
 836 # endif
 837                             lcollseq <= fcollseq)
 838                           {
 839                             /* We have to look at the upper bound.  */
 840                             uint32_t hcollseq;
 841
 842                             if (is_seqval)
 843                               hcollseq = cend;
 844                             else
 845                               {
 846 # if WIDE_CHAR_VERSION
 847                                 hcollseq =
 848                                   __collseq_table_lookup (collseq, cend);
 849                                 if (hcollseq == ~((uint32_t) 0))
 850                                   {
 851                                     /* Hum, no information about the upper
 852                                        bound.  The matching succeeds if the
 853                                        lower bound is matched exactly.  */
 854                                     if (lcollseq != fcollseq)
 855                                       goto range_not_matched;
 856
 857                                     goto matched;
 858                                   }
 859 # else
 860                                 hcollseq = collseq[cend];
 861 # endif
 862                               }
 863
 864                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 865                               goto matched;
 866                           }
 867 # if WIDE_CHAR_VERSION
 868                       range_not_matched:
 869 # endif
 870 #else
 871                         /* We use a boring value comparison of the character
 872                            values.  This is better than comparing using
 873                            `strcoll' since the latter would have surprising
 874                            and sometimes fatal consequences.  */
 875                         UCHAR cend = *p++;
 876
 877                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 878                           cend = *p++;
 879                         if (cend == L('\0'))
 880                           return FNM_NOMATCH;
 881
 882                         /* It is a range.  */
 883                         if (cold <= fn && fn <= cend)
 884                           goto matched;
 885 #endif
 886
 887                         c = *p++;
 888                       }
 889                   }
 890
 891                 if (c == L(']'))
 892                   break;
 893               }
 894
 895             if (!not)
 896               return FNM_NOMATCH;
 897             break;
 898
 899           matched:
 900             /* Skip the rest of the [...] that already matched.  */
 901             while ((c = *p++) != L (']'))
 902               {
 903                 if (c == L('\0'))
 904                   /* [... (unterminated) loses.  */
 905                   return FNM_NOMATCH;
 906
 907                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 908                   {
 909                     if (*p == L('\0'))
 910                       return FNM_NOMATCH;
 911                     /* XXX 1003.2d11 is unclear if this is right.  */
 912                     ++p;
 913                   }
 914                 else if (c == L('[') && *p == L(':'))
 915                   {
 916                     int c1 = 0;
 917                     const CHAR *startp = p;
 918
 919                     while (1)
 920                       {
 921                         c = *++p;
 922                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 923                           return FNM_NOMATCH;
 924
 925                         if (*p == L(':') && p[1] == L(']'))
 926                           break;
 927
 928                         if (c < L('a') || c >= L('z'))
 929                           {
 930                             p = startp - 2;
 931                             break;
 932                           }
 933                       }
 934                     p += 2;
 935                   }
 936                 else if (c == L('[') && *p == L('='))
 937                   {
 938                     c = *++p;
 939                     if (c == L('\0'))
 940                       return FNM_NOMATCH;
 941                     c = *++p;
 942                     if (c != L('=') || p[1] != L(']'))
 943                       return FNM_NOMATCH;
 944                     p += 2;
 945                   }
 946                 else if (c == L('[') && *p == L('.'))
 947                   {
 948                     while (1)
 949                       {
 950                         c = *++p;
 951                         if (c == L('\0'))
 952                           return FNM_NOMATCH;
 953
 954                         if (c == L('.') && p[1] == L(']'))
 955                           break;
 956                       }
 957                     p += 2;
 958                   }
 959               }
 960             if (not)
 961               return FNM_NOMATCH;
 962           }
 963           break;
 964
 965         case L('+'):
 966         case L('@'):
 967         case L('!'):
 968           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 969             {
 970               int res = EXT (c, p, n, string_end, no_leading_period, flags,
 971                              alloca_used);
 972               if (res != -1)
 973                 return res;
 974             }
 975           goto normal_match;
 976
 977         case L('/'):
 978           if (NO_LEADING_PERIOD (flags))
 979             {
 980               if (n == string_end || c != (UCHAR) *n)
 981                 return FNM_NOMATCH;
 982
 983               new_no_leading_period = 1;
 984               break;
 985             }
 986           /* FALLTHROUGH */
 987         default:
 988         normal_match:
 989           if (n == string_end || c != FOLD ((UCHAR) *n))
 990             return FNM_NOMATCH;
 991         }
 992
 993       no_leading_period = new_no_leading_period;
 994       ++n;
 995     }
 996
 997   if (n == string_end)
 998     return 0;
 999
1000   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1001     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1002     return 0;
1003
1004   return FNM_NOMATCH;
1005 }
1006
1007
1008 static const CHAR *
1009 internal_function
1010 END (const CHAR *pattern)
1011 {
1012   const CHAR *p = pattern;
1013
1014   while (1)
1015     if (*++p == L('\0'))
1016       /* This is an invalid pattern.  */
1017       return pattern;
1018     else if (*p == L('['))
1019       {
1020         /* Handle brackets special.  */
1021         if (posixly_correct == 0)
1022           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1023
1024         /* Skip the not sign.  We have to recognize it because of a possibly
1025            following ']'.  */
1026         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1027           ++p;
1028         /* A leading ']' is recognized as such.  */
1029         if (*p == L(']'))
1030           ++p;
1031         /* Skip over all characters of the list.  */
1032         while (*p != L(']'))
1033           if (*p++ == L('\0'))
1034             /* This is no valid pattern.  */
1035             return pattern;
1036       }
1037     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1038               || *p == L('!')) && p[1] == L('('))
1039       {
1040         p = END (p + 1);
1041         if (*p == L('\0'))
1042           /* This is an invalid pattern.  */
1043           return pattern;
1044       }
1045     else if (*p == L(')'))
1046       break;
1047
1048   return p + 1;
1049 }
1050
1051
1052 static int
1053 internal_function
1054 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1055      int no_leading_period, int flags, size_t alloca_used)
1056 {
1057   const CHAR *startp;
1058   int level;
1059   struct patternlist
1060   {
1061     struct patternlist *next;
1062     CHAR malloced;
1063     CHAR str[0];
1064   } *list = NULL;
1065   struct patternlist **lastp = &list;
1066   size_t pattern_len = STRLEN (pattern);
1067   int any_malloced = 0;
1068   const CHAR *p;
1069   const CHAR *rs;
1070   int retval = 0;
1071
1072   /* Parse the pattern.  Store the individual parts in the list.  */
1073   level = 0;
1074   for (startp = p = pattern + 1; level >= 0; ++p)
1075     if (*p == L('\0'))
1076       {
1077         /* This is an invalid pattern.  */
1078         retval = -1;
1079         goto out;
1080       }
1081     else if (*p == L('['))
1082       {
1083         /* Handle brackets special.  */
1084         if (posixly_correct == 0)
1085           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1086
1087         /* Skip the not sign.  We have to recognize it because of a possibly
1088            following ']'.  */
1089         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1090           ++p;
1091         /* A leading ']' is recognized as such.  */
1092         if (*p == L(']'))
1093           ++p;
1094         /* Skip over all characters of the list.  */
1095         while (*p != L(']'))
1096           if (*p++ == L('\0'))
1097             {
1098               /* This is no valid pattern.  */
1099               retval = -1;
1100               goto out;
1101             }
1102       }
1103     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1104               || *p == L('!')) && p[1] == L('('))
1105       /* Remember the nesting level.  */
1106       ++level;
1107     else if (*p == L(')'))
1108       {
1109         if (level-- == 0)
1110           {
1111             /* This means we found the end of the pattern.  */
1112 #define NEW_PATTERN \
1113             struct patternlist *newp;                                         \
1114             size_t slen = (opt == L('?') || opt == L('@')                     \
1115                            ? pattern_len : (p - startp + 1));                 \
1116             slen = sizeof (struct patternlist) + (slen * sizeof (CHAR));      \
1117             int malloced = ! __libc_use_alloca (alloca_used + slen);          \
1118             if (__builtin_expect (malloced, 0))                               \
1119               {                                                               \
1120                 newp = malloc (slen);                                         \
1121                 if (newp == NULL)                                             \
1122                   {                                                           \
1123                     retval = -2;                                              \
1124                     goto out;                                                 \
1125                   }                                                           \
1126                 any_malloced = 1;                                             \
1127               }                                                               \
1128             else                                                              \
1129               newp = alloca_account (slen, alloca_used);                      \
1130             newp->next = NULL;                                                \
1131             newp->malloced = malloced;                                        \
1132             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1133             *lastp = newp;                                                    \
1134             lastp = &newp->next
1135             NEW_PATTERN;
1136           }
1137       }
1138     else if (*p == L('|'))
1139       {
1140         if (level == 0)
1141           {
1142             NEW_PATTERN;
1143             startp = p + 1;
1144           }
1145       }
1146   assert (list != NULL);
1147   assert (p[-1] == L(')'));
1148 #undef NEW_PATTERN
1149
1150   switch (opt)
1151     {
1152     case L('*'):
1153       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1154                alloca_used) == 0)
1155         goto success;
1156       /* FALLTHROUGH */
1157
1158     case L('+'):
1159       do
1160         {
1161           for (rs = string; rs <= string_end; ++rs)
1162             /* First match the prefix with the current pattern with the
1163                current pattern.  */
1164             if (FCT (list->str, string, rs, no_leading_period,
1165                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1166                      NULL, alloca_used) == 0
1167                 /* This was successful.  Now match the rest with the rest
1168                    of the pattern.  */
1169                 && (FCT (p, rs, string_end,
1170                          rs == string
1171                          ? no_leading_period
1172                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1173                          flags & FNM_FILE_NAME
1174                          ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1175                     /* This didn't work.  Try the whole pattern.  */
1176                     || (rs != string
1177                         && FCT (pattern - 1, rs, string_end,
1178                                 rs == string
1179                                 ? no_leading_period
1180                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1181                                    ? 1 : 0),
1182                                 flags & FNM_FILE_NAME
1183                                 ? flags : flags & ~FNM_PERIOD, NULL,
1184                                 alloca_used) == 0)))
1185               /* It worked.  Signal success.  */
1186               goto success;
1187         }
1188       while ((list = list->next) != NULL);
1189
1190       /* None of the patterns lead to a match.  */
1191       retval = FNM_NOMATCH;
1192       break;
1193
1194     case L('?'):
1195       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1196                alloca_used) == 0)
1197         goto success;
1198       /* FALLTHROUGH */
1199
1200     case L('@'):
1201       do
1202         /* I cannot believe it but `strcat' is actually acceptable
1203            here.  Match the entire string with the prefix from the
1204            pattern list and the rest of the pattern following the
1205            pattern list.  */
1206         if (FCT (STRCAT (list->str, p), string, string_end,
1207                  no_leading_period,
1208                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1209                  NULL, alloca_used) == 0)
1210           /* It worked.  Signal success.  */
1211           goto success;
1212       while ((list = list->next) != NULL);
1213
1214       /* None of the patterns lead to a match.  */
1215       retval = FNM_NOMATCH;
1216       break;
1217
1218     case L('!'):
1219       for (rs = string; rs <= string_end; ++rs)
1220         {
1221           struct patternlist *runp;
1222
1223           for (runp = list; runp != NULL; runp = runp->next)
1224             if (FCT (runp->str, string, rs,  no_leading_period,
1225                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1226                      NULL, alloca_used) == 0)
1227               break;
1228
1229           /* If none of the patterns matched see whether the rest does.  */
1230           if (runp == NULL
1231               && (FCT (p, rs, string_end,
1232                        rs == string
1233                        ? no_leading_period
1234                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1235                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1236                        NULL, alloca_used) == 0))
1237             /* This is successful.  */
1238             goto success;
1239         }
1240
1241       /* None of the patterns together with the rest of the pattern
1242          lead to a match.  */
1243       retval = FNM_NOMATCH;
1244       break;
1245
1246     default:
1247       assert (! "Invalid extended matching operator");
1248       retval = -1;
1249       break;
1250     }
1251
1252  success:
1253  out:
1254   if (any_malloced)
1255     while (list != NULL)
1256       {
1257         struct patternlist *old = list;
1258         list = list->next;
1259         if (old->malloced)
1260           free (old);
1261       }
1262
1263   return retval;
1264 }
1265
1266
1267 #undef FOLD
1268 #undef CHAR
1269 #undef UCHAR
1270 #undef INT
1271 #undef FCT
1272 #undef EXT
1273 #undef END
1274 #undef STRUCT
1275 #undef MEMPCPY
1276 #undef MEMCHR
1277 #undef STRCOLL
1278 #undef STRLEN
1279 #undef STRCAT
1280 #undef L
1281 #undef BTOWC
1282 #undef WIDE_CHAR_VERSION
1283 #undef FINDIDX