posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <http://www.gnu.org/licenses/>.  */
  17
  18 #include <stdint.h>
  19
  20 struct STRUCT
  21 {
  22   const CHAR *pattern;
  23   const CHAR *string;
  24   int no_leading_period;
  25 };
  26
  27 /* Match STRING against the filename pattern PATTERN, returning zero if
  28    it matches, nonzero if not.  */
  29 static int FCT (const CHAR *pattern, const CHAR *string,
  30                 const CHAR *string_end, int no_leading_period, int flags,
  31                 struct STRUCT *ends, size_t alloca_used)
  32      internal_function;
  33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  34                 const CHAR *string_end, int no_leading_period, int flags,
  35                 size_t alloca_used)
  36      internal_function;
  37 static const CHAR *END (const CHAR *patternp) internal_function;
  38
  39 static int
  40 internal_function
  41 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  42      int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
  43 {
  44   const CHAR *p = pattern, *n = string;
  45   UCHAR c;
  46 #ifdef _LIBC
  47 # if WIDE_CHAR_VERSION
  48   const char *collseq = (const char *)
  49     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  50 # else
  51   const UCHAR *collseq = (const UCHAR *)
  52     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  53 # endif
  54 #endif
  55
  56   while ((c = *p++) != L('\0'))
  57     {
  58       int new_no_leading_period = 0;
  59       c = FOLD (c);
  60
  61       switch (c)
  62         {
  63         case L('?'):
  64           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  65             {
  66               int res = EXT (c, p, n, string_end, no_leading_period,
  67                              flags, alloca_used);
  68               if (res != -1)
  69                 return res;
  70             }
  71
  72           if (n == string_end)
  73             return FNM_NOMATCH;
  74           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  75             return FNM_NOMATCH;
  76           else if (*n == L('.') && no_leading_period)
  77             return FNM_NOMATCH;
  78           break;
  79
  80         case L('\\'):
  81           if (!(flags & FNM_NOESCAPE))
  82             {
  83               c = *p++;
  84               if (c == L('\0'))
  85                 /* Trailing \ loses.  */
  86                 return FNM_NOMATCH;
  87               c = FOLD (c);
  88             }
  89           if (n == string_end || FOLD ((UCHAR) *n) != c)
  90             return FNM_NOMATCH;
  91           break;
  92
  93         case L('*'):
  94           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  95             {
  96               int res = EXT (c, p, n, string_end, no_leading_period,
  97                              flags, alloca_used);
  98               if (res != -1)
  99                 return res;
 100             }
 101           else if (ends != NULL)
 102             {
 103               ends->pattern = p - 1;
 104               ends->string = n;
 105               ends->no_leading_period = no_leading_period;
 106               return 0;
 107             }
 108
 109           if (n != string_end && *n == L('.') && no_leading_period)
 110             return FNM_NOMATCH;
 111
 112           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 113             {
 114               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 115                 {
 116                   const CHAR *endp = END (p);
 117                   if (endp != p)
 118                     {
 119                       /* This is a pattern.  Skip over it.  */
 120                       p = endp;
 121                       continue;
 122                     }
 123                 }
 124
 125               if (c == L('?'))
 126                 {
 127                   /* A ? needs to match one character.  */
 128                   if (n == string_end)
 129                     /* There isn't another character; no match.  */
 130                     return FNM_NOMATCH;
 131                   else if (*n == L('/')
 132                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 133                     /* A slash does not match a wildcard under
 134                        FNM_FILE_NAME.  */
 135                     return FNM_NOMATCH;
 136                   else
 137                     /* One character of the string is consumed in matching
 138                        this ? wildcard, so *??? won't match if there are
 139                        less than three characters.  */
 140                     ++n;
 141                 }
 142             }
 143
 144           if (c == L('\0'))
 145             /* The wildcard(s) is/are the last element of the pattern.
 146                If the name is a file name and contains another slash
 147                this means it cannot match, unless the FNM_LEADING_DIR
 148                flag is set.  */
 149             {
 150               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 151
 152               if (flags & FNM_FILE_NAME)
 153                 {
 154                   if (flags & FNM_LEADING_DIR)
 155                     result = 0;
 156                   else
 157                     {
 158                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 159                         result = 0;
 160                     }
 161                 }
 162
 163               return result;
 164             }
 165           else
 166             {
 167               const CHAR *endp;
 168               struct STRUCT end;
 169
 170               end.pattern = NULL;
 171               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 172                              string_end - n);
 173               if (endp == NULL)
 174                 endp = string_end;
 175
 176               if (c == L('[')
 177                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 178                       && (c == L('@') || c == L('+') || c == L('!'))
 179                       && *p == L('(')))
 180                 {
 181                   int flags2 = ((flags & FNM_FILE_NAME)
 182                                 ? flags : (flags & ~FNM_PERIOD));
 183
 184                   for (--p; n < endp; ++n, no_leading_period = 0)
 185                     if (FCT (p, n, string_end, no_leading_period, flags2,
 186                              &end, alloca_used) == 0)
 187                       goto found;
 188                 }
 189               else if (c == L('/') && (flags & FNM_FILE_NAME))
 190                 {
 191                   while (n < string_end && *n != L('/'))
 192                     ++n;
 193                   if (n < string_end && *n == L('/')
 194                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 195                                NULL, alloca_used) == 0))
 196                     return 0;
 197                 }
 198               else
 199                 {
 200                   int flags2 = ((flags & FNM_FILE_NAME)
 201                                 ? flags : (flags & ~FNM_PERIOD));
 202
 203                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 204                     c = *p;
 205                   c = FOLD (c);
 206                   for (--p; n < endp; ++n, no_leading_period = 0)
 207                     if (FOLD ((UCHAR) *n) == c
 208                         && (FCT (p, n, string_end, no_leading_period, flags2,
 209                                  &end, alloca_used) == 0))
 210                       {
 211                       found:
 212                         if (end.pattern == NULL)
 213                           return 0;
 214                         break;
 215                       }
 216                   if (end.pattern != NULL)
 217                     {
 218                       p = end.pattern;
 219                       n = end.string;
 220                       no_leading_period = end.no_leading_period;
 221                       continue;
 222                     }
 223                 }
 224             }
 225
 226           /* If we come here no match is possible with the wildcard.  */
 227           return FNM_NOMATCH;
 228
 229         case L('['):
 230           {
 231             /* Nonzero if the sense of the character class is inverted.  */
 232             const CHAR *p_init = p;
 233             const CHAR *n_init = n;
 234             int not;
 235             CHAR cold;
 236             UCHAR fn;
 237
 238             if (posixly_correct == 0)
 239               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 240
 241             if (n == string_end)
 242               return FNM_NOMATCH;
 243
 244             if (*n == L('.') && no_leading_period)
 245               return FNM_NOMATCH;
 246
 247             if (*n == L('/') && (flags & FNM_FILE_NAME))
 248               /* `/' cannot be matched.  */
 249               return FNM_NOMATCH;
 250
 251             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 252             if (not)
 253               ++p;
 254
 255             fn = FOLD ((UCHAR) *n);
 256
 257             c = *p++;
 258             for (;;)
 259               {
 260                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 261                   {
 262                     if (*p == L('\0'))
 263                       return FNM_NOMATCH;
 264                     c = FOLD ((UCHAR) *p);
 265                     ++p;
 266
 267                     goto normal_bracket;
 268                   }
 269                 else if (c == L('[') && *p == L(':'))
 270                   {
 271                     /* Leave room for the null.  */
 272                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 273                     size_t c1 = 0;
 274 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 275                     wctype_t wt;
 276 #endif
 277                     const CHAR *startp = p;
 278
 279                     for (;;)
 280                       {
 281                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 282                           /* The name is too long and therefore the pattern
 283                              is ill-formed.  */
 284                           return FNM_NOMATCH;
 285
 286                         c = *++p;
 287                         if (c == L(':') && p[1] == L(']'))
 288                           {
 289                             p += 2;
 290                             break;
 291                           }
 292                         if (c < L('a') || c >= L('z'))
 293                           {
 294                             /* This cannot possibly be a character class name.
 295                                Match it as a normal range.  */
 296                             p = startp;
 297                             c = L('[');
 298                             goto normal_bracket;
 299                           }
 300                         str[c1++] = c;
 301                       }
 302                     str[c1] = L('\0');
 303
 304 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 305                     wt = IS_CHAR_CLASS (str);
 306                     if (wt == 0)
 307                       /* Invalid character class name.  */
 308                       return FNM_NOMATCH;
 309
 310 # if defined _LIBC && ! WIDE_CHAR_VERSION
 311                     /* The following code is glibc specific but does
 312                        there a good job in speeding up the code since
 313                        we can avoid the btowc() call.  */
 314                     if (_ISCTYPE ((UCHAR) *n, wt))
 315                       goto matched;
 316 # else
 317                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 318                       goto matched;
 319 # endif
 320 #else
 321                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 322                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 323                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 324                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 325                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 326                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 327                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 328                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 329                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 330                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 331                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 332                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 333                       goto matched;
 334 #endif
 335                     c = *p++;
 336                   }
 337 #ifdef _LIBC
 338                 else if (c == L('[') && *p == L('='))
 339                   {
 340                     /* It's important that STR be a scalar variable rather
 341                        than a one-element array, because GCC (at least 4.9.2
 342                        -O2 on x86-64) can be confused by the array and
 343                        diagnose a "used initialized" in a dead branch in the
 344                        findidx function.  */
 345                     UCHAR str;
 346                     uint32_t nrules =
 347                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 348                     const CHAR *startp = p;
 349
 350                     c = *++p;
 351                     if (c == L('\0'))
 352                       {
 353                         p = startp;
 354                         c = L('[');
 355                         goto normal_bracket;
 356                       }
 357                     str = c;
 358
 359                     c = *++p;
 360                     if (c != L('=') || p[1] != L(']'))
 361                       {
 362                         p = startp;
 363                         c = L('[');
 364                         goto normal_bracket;
 365                       }
 366                     p += 2;
 367
 368                     if (nrules == 0)
 369                       {
 370                         if ((UCHAR) *n == str)
 371                           goto matched;
 372                       }
 373                     else
 374                       {
 375                         const int32_t *table;
 376 # if WIDE_CHAR_VERSION
 377                         const int32_t *weights;
 378                         const wint_t *extra;
 379 # else
 380                         const unsigned char *weights;
 381                         const unsigned char *extra;
 382 # endif
 383                         const int32_t *indirect;
 384                         int32_t idx;
 385                         const UCHAR *cp = (const UCHAR *) &str;
 386
 387 # if WIDE_CHAR_VERSION
 388                         table = (const int32_t *)
 389                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 390                         weights = (const int32_t *)
 391                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 392                         extra = (const wint_t *)
 393                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 394                         indirect = (const int32_t *)
 395                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 396 # else
 397                         table = (const int32_t *)
 398                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 399                         weights = (const unsigned char *)
 400                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 401                         extra = (const unsigned char *)
 402                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 403                         indirect = (const int32_t *)
 404                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 405 # endif
 406
 407                         idx = FINDIDX (table, indirect, extra, &cp, 1);
 408                         if (idx != 0)
 409                           {
 410                             /* We found a table entry.  Now see whether the
 411                                character we are currently at has the same
 412                                equivalance class value.  */
 413                             int len = weights[idx & 0xffffff];
 414                             int32_t idx2;
 415                             const UCHAR *np = (const UCHAR *) n;
 416
 417                             idx2 = FINDIDX (table, indirect, extra,
 418                                             &np, string_end - n);
 419                             if (idx2 != 0
 420                                 && (idx >> 24) == (idx2 >> 24)
 421                                 && len == weights[idx2 & 0xffffff])
 422                               {
 423                                 int cnt = 0;
 424
 425                                 idx &= 0xffffff;
 426                                 idx2 &= 0xffffff;
 427
 428                                 while (cnt < len
 429                                        && (weights[idx + 1 + cnt]
 430                                            == weights[idx2 + 1 + cnt]))
 431                                   ++cnt;
 432
 433                                 if (cnt == len)
 434                                   goto matched;
 435                               }
 436                           }
 437                       }
 438
 439                     c = *p++;
 440                   }
 441 #endif
 442                 else if (c == L('\0'))
 443                   {
 444                     /* [ unterminated, treat as normal character.  */
 445                     p = p_init;
 446                     n = n_init;
 447                     c = L('[');
 448                     goto normal_match;
 449                   }
 450                 else
 451                   {
 452                     int is_range = 0;
 453
 454 #ifdef _LIBC
 455                     int is_seqval = 0;
 456
 457                     if (c == L('[') && *p == L('.'))
 458                       {
 459                         uint32_t nrules =
 460                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 461                         const CHAR *startp = p;
 462                         size_t c1 = 0;
 463
 464                         while (1)
 465                           {
 466                             c = *++p;
 467                             if (c == L('.') && p[1] == L(']'))
 468                               {
 469                                 p += 2;
 470                                 break;
 471                               }
 472                             if (c == '\0')
 473                               return FNM_NOMATCH;
 474                             ++c1;
 475                           }
 476
 477                         /* We have to handling the symbols differently in
 478                            ranges since then the collation sequence is
 479                            important.  */
 480                         is_range = *p == L('-') && p[1] != L('\0');
 481
 482                         if (nrules == 0)
 483                           {
 484                             /* There are no names defined in the collation
 485                                data.  Therefore we only accept the trivial
 486                                names consisting of the character itself.  */
 487                             if (c1 != 1)
 488                               return FNM_NOMATCH;
 489
 490                             if (!is_range && *n == startp[1])
 491                               goto matched;
 492
 493                             cold = startp[1];
 494                             c = *p++;
 495                           }
 496                         else
 497                           {
 498                             int32_t table_size;
 499                             const int32_t *symb_table;
 500 # if WIDE_CHAR_VERSION
 501                             char str[c1];
 502                             unsigned int strcnt;
 503 # else
 504 #  define str (startp + 1)
 505 # endif
 506                             const unsigned char *extra;
 507                             int32_t idx;
 508                             int32_t elem;
 509                             int32_t second;
 510                             int32_t hash;
 511
 512 # if WIDE_CHAR_VERSION
 513                             /* We have to convert the name to a single-byte
 514                                string.  This is possible since the names
 515                                consist of ASCII characters and the internal
 516                                representation is UCS4.  */
 517                             for (strcnt = 0; strcnt < c1; ++strcnt)
 518                               str[strcnt] = startp[1 + strcnt];
 519 #endif
 520
 521                             table_size =
 522                               _NL_CURRENT_WORD (LC_COLLATE,
 523                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 524                             symb_table = (const int32_t *)
 525                               _NL_CURRENT (LC_COLLATE,
 526                                            _NL_COLLATE_SYMB_TABLEMB);
 527                             extra = (const unsigned char *)
 528                               _NL_CURRENT (LC_COLLATE,
 529                                            _NL_COLLATE_SYMB_EXTRAMB);
 530
 531                             /* Locate the character in the hashing table.  */
 532                             hash = elem_hash (str, c1);
 533
 534                             idx = 0;
 535                             elem = hash % table_size;
 536                             if (symb_table[2 * elem] != 0)
 537                               {
 538                                 second = hash % (table_size - 2) + 1;
 539
 540                                 do
 541                                   {
 542                                     /* First compare the hashing value.  */
 543                                     if (symb_table[2 * elem] == hash
 544                                         && (c1
 545                                             == extra[symb_table[2 * elem + 1]])
 546                                         && memcmp (str,
 547                                                    &extra[symb_table[2 * elem
 548                                                                      + 1]
 549                                                           + 1], c1) == 0)
 550                                       {
 551                                         /* Yep, this is the entry.  */
 552                                         idx = symb_table[2 * elem + 1];
 553                                         idx += 1 + extra[idx];
 554                                         break;
 555                                       }
 556
 557                                     /* Next entry.  */
 558                                     elem += second;
 559                                   }
 560                                 while (symb_table[2 * elem] != 0);
 561                               }
 562
 563                             if (symb_table[2 * elem] != 0)
 564                               {
 565                                 /* Compare the byte sequence but only if
 566                                    this is not part of a range.  */
 567 # if WIDE_CHAR_VERSION
 568                                 int32_t *wextra;
 569
 570                                 idx += 1 + extra[idx];
 571                                 /* Adjust for the alignment.  */
 572                                 idx = (idx + 3) & ~3;
 573
 574                                 wextra = (int32_t *) &extra[idx + 4];
 575 # endif
 576
 577                                 if (! is_range)
 578                                   {
 579 # if WIDE_CHAR_VERSION
 580                                     for (c1 = 0;
 581                                          (int32_t) c1 < wextra[idx];
 582                                          ++c1)
 583                                       if (n[c1] != wextra[1 + c1])
 584                                         break;
 585
 586                                     if ((int32_t) c1 == wextra[idx])
 587                                       goto matched;
 588 # else
 589                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 590                                       if (n[c1] != extra[1 + c1])
 591                                         break;
 592
 593                                     if (c1 == extra[idx])
 594                                       goto matched;
 595 # endif
 596                                   }
 597
 598                                 /* Get the collation sequence value.  */
 599                                 is_seqval = 1;
 600 # if WIDE_CHAR_VERSION
 601                                 cold = wextra[1 + wextra[idx]];
 602 # else
 603                                 /* Adjust for the alignment.  */
 604                                 idx += 1 + extra[idx];
 605                                 idx = (idx + 3) & ~4;
 606                                 cold = *((int32_t *) &extra[idx]);
 607 # endif
 608
 609                                 c = *p++;
 610                               }
 611                             else if (c1 == 1)
 612                               {
 613                                 /* No valid character.  Match it as a
 614                                    single byte.  */
 615                                 if (!is_range && *n == str[0])
 616                                   goto matched;
 617
 618                                 cold = str[0];
 619                                 c = *p++;
 620                               }
 621                             else
 622                               return FNM_NOMATCH;
 623                           }
 624                       }
 625                     else
 626 # undef str
 627 #endif
 628                       {
 629                         c = FOLD (c);
 630                       normal_bracket:
 631
 632                         /* We have to handling the symbols differently in
 633                            ranges since then the collation sequence is
 634                            important.  */
 635                         is_range = (*p == L('-') && p[1] != L('\0')
 636                                     && p[1] != L(']'));
 637
 638                         if (!is_range && c == fn)
 639                           goto matched;
 640
 641                         /* This is needed if we goto normal_bracket; from
 642                            outside of is_seqval's scope.  */
 643                         is_seqval = 0;
 644                         cold = c;
 645                         c = *p++;
 646                       }
 647
 648                     if (c == L('-') && *p != L(']'))
 649                       {
 650 #if _LIBC
 651                         /* We have to find the collation sequence
 652                            value for C.  Collation sequence is nothing
 653                            we can regularly access.  The sequence
 654                            value is defined by the order in which the
 655                            definitions of the collation values for the
 656                            various characters appear in the source
 657                            file.  A strange concept, nowhere
 658                            documented.  */
 659                         uint32_t fcollseq;
 660                         uint32_t lcollseq;
 661                         UCHAR cend = *p++;
 662
 663 # if WIDE_CHAR_VERSION
 664                         /* Search in the `names' array for the characters.  */
 665                         fcollseq = __collseq_table_lookup (collseq, fn);
 666                         if (fcollseq == ~((uint32_t) 0))
 667                           /* XXX We don't know anything about the character
 668                              we are supposed to match.  This means we are
 669                              failing.  */
 670                           goto range_not_matched;
 671
 672                         if (is_seqval)
 673                           lcollseq = cold;
 674                         else
 675                           lcollseq = __collseq_table_lookup (collseq, cold);
 676 # else
 677                         fcollseq = collseq[fn];
 678                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 679 # endif
 680
 681                         is_seqval = 0;
 682                         if (cend == L('[') && *p == L('.'))
 683                           {
 684                             uint32_t nrules =
 685                               _NL_CURRENT_WORD (LC_COLLATE,
 686                                                 _NL_COLLATE_NRULES);
 687                             const CHAR *startp = p;
 688                             size_t c1 = 0;
 689
 690                             while (1)
 691                               {
 692                                 c = *++p;
 693                                 if (c == L('.') && p[1] == L(']'))
 694                                   {
 695                                     p += 2;
 696                                     break;
 697                                   }
 698                                 if (c == '\0')
 699                                   return FNM_NOMATCH;
 700                                 ++c1;
 701                               }
 702
 703                             if (nrules == 0)
 704                               {
 705                                 /* There are no names defined in the
 706                                    collation data.  Therefore we only
 707                                    accept the trivial names consisting
 708                                    of the character itself.  */
 709                                 if (c1 != 1)
 710                                   return FNM_NOMATCH;
 711
 712                                 cend = startp[1];
 713                               }
 714                             else
 715                               {
 716                                 int32_t table_size;
 717                                 const int32_t *symb_table;
 718 # if WIDE_CHAR_VERSION
 719                                 char str[c1];
 720                                 unsigned int strcnt;
 721 # else
 722 #  define str (startp + 1)
 723 # endif
 724                                 const unsigned char *extra;
 725                                 int32_t idx;
 726                                 int32_t elem;
 727                                 int32_t second;
 728                                 int32_t hash;
 729
 730 # if WIDE_CHAR_VERSION
 731                                 /* We have to convert the name to a single-byte
 732                                    string.  This is possible since the names
 733                                    consist of ASCII characters and the internal
 734                                    representation is UCS4.  */
 735                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 736                                   str[strcnt] = startp[1 + strcnt];
 737 # endif
 738
 739                                 table_size =
 740                                   _NL_CURRENT_WORD (LC_COLLATE,
 741                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 742                                 symb_table = (const int32_t *)
 743                                   _NL_CURRENT (LC_COLLATE,
 744                                                _NL_COLLATE_SYMB_TABLEMB);
 745                                 extra = (const unsigned char *)
 746                                   _NL_CURRENT (LC_COLLATE,
 747                                                _NL_COLLATE_SYMB_EXTRAMB);
 748
 749                                 /* Locate the character in the hashing
 750                                    table.  */
 751                                 hash = elem_hash (str, c1);
 752
 753                                 idx = 0;
 754                                 elem = hash % table_size;
 755                                 if (symb_table[2 * elem] != 0)
 756                                   {
 757                                     second = hash % (table_size - 2) + 1;
 758
 759                                     do
 760                                       {
 761                                         /* First compare the hashing value.  */
 762                                         if (symb_table[2 * elem] == hash
 763                                             && (c1
 764                                                 == extra[symb_table[2 * elem + 1]])
 765                                             && memcmp (str,
 766                                                        &extra[symb_table[2 * elem + 1]
 767                                                               + 1], c1) == 0)
 768                                           {
 769                                             /* Yep, this is the entry.  */
 770                                             idx = symb_table[2 * elem + 1];
 771                                             idx += 1 + extra[idx];
 772                                             break;
 773                                           }
 774
 775                                         /* Next entry.  */
 776                                         elem += second;
 777                                       }
 778                                     while (symb_table[2 * elem] != 0);
 779                                   }
 780
 781                                 if (symb_table[2 * elem] != 0)
 782                                   {
 783                                     /* Compare the byte sequence but only if
 784                                        this is not part of a range.  */
 785 # if WIDE_CHAR_VERSION
 786                                     int32_t *wextra;
 787
 788                                     idx += 1 + extra[idx];
 789                                     /* Adjust for the alignment.  */
 790                                     idx = (idx + 3) & ~4;
 791
 792                                     wextra = (int32_t *) &extra[idx + 4];
 793 # endif
 794                                     /* Get the collation sequence value.  */
 795                                     is_seqval = 1;
 796 # if WIDE_CHAR_VERSION
 797                                     cend = wextra[1 + wextra[idx]];
 798 # else
 799                                     /* Adjust for the alignment.  */
 800                                     idx += 1 + extra[idx];
 801                                     idx = (idx + 3) & ~4;
 802                                     cend = *((int32_t *) &extra[idx]);
 803 # endif
 804                                   }
 805                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 806                                   {
 807                                     cend = str[0];
 808                                     c = *p++;
 809                                   }
 810                                 else
 811                                   return FNM_NOMATCH;
 812                               }
 813 # undef str
 814                           }
 815                         else
 816                           {
 817                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 818                               cend = *p++;
 819                             if (cend == L('\0'))
 820                               return FNM_NOMATCH;
 821                             cend = FOLD (cend);
 822                           }
 823
 824                         /* XXX It is not entirely clear to me how to handle
 825                            characters which are not mentioned in the
 826                            collation specification.  */
 827                         if (
 828 # if WIDE_CHAR_VERSION
 829                             lcollseq == 0xffffffff ||
 830 # endif
 831                             lcollseq <= fcollseq)
 832                           {
 833                             /* We have to look at the upper bound.  */
 834                             uint32_t hcollseq;
 835
 836                             if (is_seqval)
 837                               hcollseq = cend;
 838                             else
 839                               {
 840 # if WIDE_CHAR_VERSION
 841                                 hcollseq =
 842                                   __collseq_table_lookup (collseq, cend);
 843                                 if (hcollseq == ~((uint32_t) 0))
 844                                   {
 845                                     /* Hum, no information about the upper
 846                                        bound.  The matching succeeds if the
 847                                        lower bound is matched exactly.  */
 848                                     if (lcollseq != fcollseq)
 849                                       goto range_not_matched;
 850
 851                                     goto matched;
 852                                   }
 853 # else
 854                                 hcollseq = collseq[cend];
 855 # endif
 856                               }
 857
 858                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 859                               goto matched;
 860                           }
 861 # if WIDE_CHAR_VERSION
 862                       range_not_matched:
 863 # endif
 864 #else
 865                         /* We use a boring value comparison of the character
 866                            values.  This is better than comparing using
 867                            `strcoll' since the latter would have surprising
 868                            and sometimes fatal consequences.  */
 869                         UCHAR cend = *p++;
 870
 871                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 872                           cend = *p++;
 873                         if (cend == L('\0'))
 874                           return FNM_NOMATCH;
 875
 876                         /* It is a range.  */
 877                         if (cold <= fn && fn <= cend)
 878                           goto matched;
 879 #endif
 880
 881                         c = *p++;
 882                       }
 883                   }
 884
 885                 if (c == L(']'))
 886                   break;
 887               }
 888
 889             if (!not)
 890               return FNM_NOMATCH;
 891             break;
 892
 893           matched:
 894             /* Skip the rest of the [...] that already matched.  */
 895             while ((c = *p++) != L (']'))
 896               {
 897                 if (c == L('\0'))
 898                   /* [... (unterminated) loses.  */
 899                   return FNM_NOMATCH;
 900
 901                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 902                   {
 903                     if (*p == L('\0'))
 904                       return FNM_NOMATCH;
 905                     /* XXX 1003.2d11 is unclear if this is right.  */
 906                     ++p;
 907                   }
 908                 else if (c == L('[') && *p == L(':'))
 909                   {
 910                     int c1 = 0;
 911                     const CHAR *startp = p;
 912
 913                     while (1)
 914                       {
 915                         c = *++p;
 916                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 917                           return FNM_NOMATCH;
 918
 919                         if (*p == L(':') && p[1] == L(']'))
 920                           break;
 921
 922                         if (c < L('a') || c >= L('z'))
 923                           {
 924                             p = startp - 2;
 925                             break;
 926                           }
 927                       }
 928                     p += 2;
 929                   }
 930                 else if (c == L('[') && *p == L('='))
 931                   {
 932                     c = *++p;
 933                     if (c == L('\0'))
 934                       return FNM_NOMATCH;
 935                     c = *++p;
 936                     if (c != L('=') || p[1] != L(']'))
 937                       return FNM_NOMATCH;
 938                     p += 2;
 939                   }
 940                 else if (c == L('[') && *p == L('.'))
 941                   {
 942                     while (1)
 943                       {
 944                         c = *++p;
 945                         if (c == L('\0'))
 946                           return FNM_NOMATCH;
 947
 948                         if (c == L('.') && p[1] == L(']'))
 949                           break;
 950                       }
 951                     p += 2;
 952                   }
 953               }
 954             if (not)
 955               return FNM_NOMATCH;
 956           }
 957           break;
 958
 959         case L('+'):
 960         case L('@'):
 961         case L('!'):
 962           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 963             {
 964               int res = EXT (c, p, n, string_end, no_leading_period, flags,
 965                              alloca_used);
 966               if (res != -1)
 967                 return res;
 968             }
 969           goto normal_match;
 970
 971         case L('/'):
 972           if (NO_LEADING_PERIOD (flags))
 973             {
 974               if (n == string_end || c != (UCHAR) *n)
 975                 return FNM_NOMATCH;
 976
 977               new_no_leading_period = 1;
 978               break;
 979             }
 980           /* FALLTHROUGH */
 981         default:
 982         normal_match:
 983           if (n == string_end || c != FOLD ((UCHAR) *n))
 984             return FNM_NOMATCH;
 985         }
 986
 987       no_leading_period = new_no_leading_period;
 988       ++n;
 989     }
 990
 991   if (n == string_end)
 992     return 0;
 993
 994   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 995     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 996     return 0;
 997
 998   return FNM_NOMATCH;
 999 }
1000
1001
1002 static const CHAR *
1003 internal_function
1004 END (const CHAR *pattern)
1005 {
1006   const CHAR *p = pattern;
1007
1008   while (1)
1009     if (*++p == L('\0'))
1010       /* This is an invalid pattern.  */
1011       return pattern;
1012     else if (*p == L('['))
1013       {
1014         /* Handle brackets special.  */
1015         if (posixly_correct == 0)
1016           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1017
1018         /* Skip the not sign.  We have to recognize it because of a possibly
1019            following ']'.  */
1020         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1021           ++p;
1022         /* A leading ']' is recognized as such.  */
1023         if (*p == L(']'))
1024           ++p;
1025         /* Skip over all characters of the list.  */
1026         while (*p != L(']'))
1027           if (*p++ == L('\0'))
1028             /* This is no valid pattern.  */
1029             return pattern;
1030       }
1031     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1032               || *p == L('!')) && p[1] == L('('))
1033       {
1034         p = END (p + 1);
1035         if (*p == L('\0'))
1036           /* This is an invalid pattern.  */
1037           return pattern;
1038       }
1039     else if (*p == L(')'))
1040       break;
1041
1042   return p + 1;
1043 }
1044
1045
1046 static int
1047 internal_function
1048 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1049      int no_leading_period, int flags, size_t alloca_used)
1050 {
1051   const CHAR *startp;
1052   int level;
1053   struct patternlist
1054   {
1055     struct patternlist *next;
1056     CHAR malloced;
1057     CHAR str[0];
1058   } *list = NULL;
1059   struct patternlist **lastp = &list;
1060   size_t pattern_len = STRLEN (pattern);
1061   int any_malloced = 0;
1062   const CHAR *p;
1063   const CHAR *rs;
1064   int retval = 0;
1065
1066   /* Parse the pattern.  Store the individual parts in the list.  */
1067   level = 0;
1068   for (startp = p = pattern + 1; level >= 0; ++p)
1069     if (*p == L('\0'))
1070       {
1071         /* This is an invalid pattern.  */
1072         retval = -1;
1073         goto out;
1074       }
1075     else if (*p == L('['))
1076       {
1077         /* Handle brackets special.  */
1078         if (posixly_correct == 0)
1079           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1080
1081         /* Skip the not sign.  We have to recognize it because of a possibly
1082            following ']'.  */
1083         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1084           ++p;
1085         /* A leading ']' is recognized as such.  */
1086         if (*p == L(']'))
1087           ++p;
1088         /* Skip over all characters of the list.  */
1089         while (*p != L(']'))
1090           if (*p++ == L('\0'))
1091             {
1092               /* This is no valid pattern.  */
1093               retval = -1;
1094               goto out;
1095             }
1096       }
1097     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1098               || *p == L('!')) && p[1] == L('('))
1099       /* Remember the nesting level.  */
1100       ++level;
1101     else if (*p == L(')'))
1102       {
1103         if (level-- == 0)
1104           {
1105             /* This means we found the end of the pattern.  */
1106 #define NEW_PATTERN \
1107             struct patternlist *newp;                                         \
1108             size_t slen = (opt == L('?') || opt == L('@')                     \
1109                            ? pattern_len : (p - startp + 1));                 \
1110             slen = sizeof (struct patternlist) + (slen * sizeof (CHAR));      \
1111             int malloced = ! __libc_use_alloca (alloca_used + slen);          \
1112             if (__builtin_expect (malloced, 0))                               \
1113               {                                                               \
1114                 newp = malloc (slen);                                         \
1115                 if (newp == NULL)                                             \
1116                   {                                                           \
1117                     retval = -2;                                              \
1118                     goto out;                                                 \
1119                   }                                                           \
1120                 any_malloced = 1;                                             \
1121               }                                                               \
1122             else                                                              \
1123               newp = alloca_account (slen, alloca_used);                      \
1124             newp->next = NULL;                                                \
1125             newp->malloced = malloced;                                        \
1126             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1127             *lastp = newp;                                                    \
1128             lastp = &newp->next
1129             NEW_PATTERN;
1130           }
1131       }
1132     else if (*p == L('|'))
1133       {
1134         if (level == 0)
1135           {
1136             NEW_PATTERN;
1137             startp = p + 1;
1138           }
1139       }
1140   assert (list != NULL);
1141   assert (p[-1] == L(')'));
1142 #undef NEW_PATTERN
1143
1144   switch (opt)
1145     {
1146     case L('*'):
1147       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1148                alloca_used) == 0)
1149         goto success;
1150       /* FALLTHROUGH */
1151
1152     case L('+'):
1153       do
1154         {
1155           for (rs = string; rs <= string_end; ++rs)
1156             /* First match the prefix with the current pattern with the
1157                current pattern.  */
1158             if (FCT (list->str, string, rs, no_leading_period,
1159                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1160                      NULL, alloca_used) == 0
1161                 /* This was successful.  Now match the rest with the rest
1162                    of the pattern.  */
1163                 && (FCT (p, rs, string_end,
1164                          rs == string
1165                          ? no_leading_period
1166                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1167                          flags & FNM_FILE_NAME
1168                          ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1169                     /* This didn't work.  Try the whole pattern.  */
1170                     || (rs != string
1171                         && FCT (pattern - 1, rs, string_end,
1172                                 rs == string
1173                                 ? no_leading_period
1174                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1175                                    ? 1 : 0),
1176                                 flags & FNM_FILE_NAME
1177                                 ? flags : flags & ~FNM_PERIOD, NULL,
1178                                 alloca_used) == 0)))
1179               /* It worked.  Signal success.  */
1180               goto success;
1181         }
1182       while ((list = list->next) != NULL);
1183
1184       /* None of the patterns lead to a match.  */
1185       retval = FNM_NOMATCH;
1186       break;
1187
1188     case L('?'):
1189       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1190                alloca_used) == 0)
1191         goto success;
1192       /* FALLTHROUGH */
1193
1194     case L('@'):
1195       do
1196         /* I cannot believe it but `strcat' is actually acceptable
1197            here.  Match the entire string with the prefix from the
1198            pattern list and the rest of the pattern following the
1199            pattern list.  */
1200         if (FCT (STRCAT (list->str, p), string, string_end,
1201                  no_leading_period,
1202                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1203                  NULL, alloca_used) == 0)
1204           /* It worked.  Signal success.  */
1205           goto success;
1206       while ((list = list->next) != NULL);
1207
1208       /* None of the patterns lead to a match.  */
1209       retval = FNM_NOMATCH;
1210       break;
1211
1212     case L('!'):
1213       for (rs = string; rs <= string_end; ++rs)
1214         {
1215           struct patternlist *runp;
1216
1217           for (runp = list; runp != NULL; runp = runp->next)
1218             if (FCT (runp->str, string, rs,  no_leading_period,
1219                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1220                      NULL, alloca_used) == 0)
1221               break;
1222
1223           /* If none of the patterns matched see whether the rest does.  */
1224           if (runp == NULL
1225               && (FCT (p, rs, string_end,
1226                        rs == string
1227                        ? no_leading_period
1228                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1229                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1230                        NULL, alloca_used) == 0))
1231             /* This is successful.  */
1232             goto success;
1233         }
1234
1235       /* None of the patterns together with the rest of the pattern
1236          lead to a match.  */
1237       retval = FNM_NOMATCH;
1238       break;
1239
1240     default:
1241       assert (! "Invalid extended matching operator");
1242       retval = -1;
1243       break;
1244     }
1245
1246  success:
1247  out:
1248   if (any_malloced)
1249     while (list != NULL)
1250       {
1251         struct patternlist *old = list;
1252         list = list->next;
1253         if (old->malloced)
1254           free (old);
1255       }
1256
1257   return retval;
1258 }
1259
1260
1261 #undef FOLD
1262 #undef CHAR
1263 #undef UCHAR
1264 #undef INT
1265 #undef FCT
1266 #undef EXT
1267 #undef END
1268 #undef STRUCT
1269 #undef MEMPCPY
1270 #undef MEMCHR
1271 #undef STRCOLL
1272 #undef STRLEN
1273 #undef STRCAT
1274 #undef L
1275 #undef BTOWC
1276 #undef WIDE_CHAR_VERSION
1277 #undef FINDIDX