posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <http://www.gnu.org/licenses/>.  */
  17
  18 #include <stdint.h>
  19
  20 struct STRUCT
  21 {
  22   const CHAR *pattern;
  23   const CHAR *string;
  24   int no_leading_period;
  25 };
  26
  27 /* Match STRING against the filename pattern PATTERN, returning zero if
  28    it matches, nonzero if not.  */
  29 static int FCT (const CHAR *pattern, const CHAR *string,
  30                 const CHAR *string_end, int no_leading_period, int flags,
  31                 struct STRUCT *ends, size_t alloca_used)
  32      internal_function;
  33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  34                 const CHAR *string_end, int no_leading_period, int flags,
  35                 size_t alloca_used)
  36      internal_function;
  37 static const CHAR *END (const CHAR *patternp) internal_function;
  38
  39 static int
  40 internal_function
  41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
  42      const CHAR *pattern;
  43      const CHAR *string;
  44      const CHAR *string_end;
  45      int no_leading_period;
  46      int flags;
  47      struct STRUCT *ends;
  48      size_t alloca_used;
  49 {
  50   const CHAR *p = pattern, *n = string;
  51   UCHAR c;
  52 #ifdef _LIBC
  53 # if WIDE_CHAR_VERSION
  54   const char *collseq = (const char *)
  55     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  56 # else
  57   const UCHAR *collseq = (const UCHAR *)
  58     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  59 # endif
  60 #endif
  61
  62   while ((c = *p++) != L('\0'))
  63     {
  64       int new_no_leading_period = 0;
  65       c = FOLD (c);
  66
  67       switch (c)
  68         {
  69         case L('?'):
  70           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  71             {
  72               int res = EXT (c, p, n, string_end, no_leading_period,
  73                              flags, alloca_used);
  74               if (res != -1)
  75                 return res;
  76             }
  77
  78           if (n == string_end)
  79             return FNM_NOMATCH;
  80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  81             return FNM_NOMATCH;
  82           else if (*n == L('.') && no_leading_period)
  83             return FNM_NOMATCH;
  84           break;
  85
  86         case L('\\'):
  87           if (!(flags & FNM_NOESCAPE))
  88             {
  89               c = *p++;
  90               if (c == L('\0'))
  91                 /* Trailing \ loses.  */
  92                 return FNM_NOMATCH;
  93               c = FOLD (c);
  94             }
  95           if (n == string_end || FOLD ((UCHAR) *n) != c)
  96             return FNM_NOMATCH;
  97           break;
  98
  99         case L('*'):
 100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 101             {
 102               int res = EXT (c, p, n, string_end, no_leading_period,
 103                              flags, alloca_used);
 104               if (res != -1)
 105                 return res;
 106             }
 107           else if (ends != NULL)
 108             {
 109               ends->pattern = p - 1;
 110               ends->string = n;
 111               ends->no_leading_period = no_leading_period;
 112               return 0;
 113             }
 114
 115           if (n != string_end && *n == L('.') && no_leading_period)
 116             return FNM_NOMATCH;
 117
 118           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 119             {
 120               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 121                 {
 122                   const CHAR *endp = END (p);
 123                   if (endp != p)
 124                     {
 125                       /* This is a pattern.  Skip over it.  */
 126                       p = endp;
 127                       continue;
 128                     }
 129                 }
 130
 131               if (c == L('?'))
 132                 {
 133                   /* A ? needs to match one character.  */
 134                   if (n == string_end)
 135                     /* There isn't another character; no match.  */
 136                     return FNM_NOMATCH;
 137                   else if (*n == L('/')
 138                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 139                     /* A slash does not match a wildcard under
 140                        FNM_FILE_NAME.  */
 141                     return FNM_NOMATCH;
 142                   else
 143                     /* One character of the string is consumed in matching
 144                        this ? wildcard, so *??? won't match if there are
 145                        less than three characters.  */
 146                     ++n;
 147                 }
 148             }
 149
 150           if (c == L('\0'))
 151             /* The wildcard(s) is/are the last element of the pattern.
 152                If the name is a file name and contains another slash
 153                this means it cannot match, unless the FNM_LEADING_DIR
 154                flag is set.  */
 155             {
 156               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 157
 158               if (flags & FNM_FILE_NAME)
 159                 {
 160                   if (flags & FNM_LEADING_DIR)
 161                     result = 0;
 162                   else
 163                     {
 164                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 165                         result = 0;
 166                     }
 167                 }
 168
 169               return result;
 170             }
 171           else
 172             {
 173               const CHAR *endp;
 174               struct STRUCT end;
 175
 176               end.pattern = NULL;
 177               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 178                              string_end - n);
 179               if (endp == NULL)
 180                 endp = string_end;
 181
 182               if (c == L('[')
 183                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 184                       && (c == L('@') || c == L('+') || c == L('!'))
 185                       && *p == L('(')))
 186                 {
 187                   int flags2 = ((flags & FNM_FILE_NAME)
 188                                 ? flags : (flags & ~FNM_PERIOD));
 189
 190                   for (--p; n < endp; ++n, no_leading_period = 0)
 191                     if (FCT (p, n, string_end, no_leading_period, flags2,
 192                              &end, alloca_used) == 0)
 193                       goto found;
 194                 }
 195               else if (c == L('/') && (flags & FNM_FILE_NAME))
 196                 {
 197                   while (n < string_end && *n != L('/'))
 198                     ++n;
 199                   if (n < string_end && *n == L('/')
 200                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 201                                NULL, alloca_used) == 0))
 202                     return 0;
 203                 }
 204               else
 205                 {
 206                   int flags2 = ((flags & FNM_FILE_NAME)
 207                                 ? flags : (flags & ~FNM_PERIOD));
 208
 209                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 210                     c = *p;
 211                   c = FOLD (c);
 212                   for (--p; n < endp; ++n, no_leading_period = 0)
 213                     if (FOLD ((UCHAR) *n) == c
 214                         && (FCT (p, n, string_end, no_leading_period, flags2,
 215                                  &end, alloca_used) == 0))
 216                       {
 217                       found:
 218                         if (end.pattern == NULL)
 219                           return 0;
 220                         break;
 221                       }
 222                   if (end.pattern != NULL)
 223                     {
 224                       p = end.pattern;
 225                       n = end.string;
 226                       no_leading_period = end.no_leading_period;
 227                       continue;
 228                     }
 229                 }
 230             }
 231
 232           /* If we come here no match is possible with the wildcard.  */
 233           return FNM_NOMATCH;
 234
 235         case L('['):
 236           {
 237             /* Nonzero if the sense of the character class is inverted.  */
 238             const CHAR *p_init = p;
 239             const CHAR *n_init = n;
 240             int not;
 241             CHAR cold;
 242             UCHAR fn;
 243
 244             if (posixly_correct == 0)
 245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 246
 247             if (n == string_end)
 248               return FNM_NOMATCH;
 249
 250             if (*n == L('.') && no_leading_period)
 251               return FNM_NOMATCH;
 252
 253             if (*n == L('/') && (flags & FNM_FILE_NAME))
 254               /* `/' cannot be matched.  */
 255               return FNM_NOMATCH;
 256
 257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 258             if (not)
 259               ++p;
 260
 261             fn = FOLD ((UCHAR) *n);
 262
 263             c = *p++;
 264             for (;;)
 265               {
 266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 267                   {
 268                     if (*p == L('\0'))
 269                       return FNM_NOMATCH;
 270                     c = FOLD ((UCHAR) *p);
 271                     ++p;
 272
 273                     goto normal_bracket;
 274                   }
 275                 else if (c == L('[') && *p == L(':'))
 276                   {
 277                     /* Leave room for the null.  */
 278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 279                     size_t c1 = 0;
 280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 281                     wctype_t wt;
 282 #endif
 283                     const CHAR *startp = p;
 284
 285                     for (;;)
 286                       {
 287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 288                           /* The name is too long and therefore the pattern
 289                              is ill-formed.  */
 290                           return FNM_NOMATCH;
 291
 292                         c = *++p;
 293                         if (c == L(':') && p[1] == L(']'))
 294                           {
 295                             p += 2;
 296                             break;
 297                           }
 298                         if (c < L('a') || c >= L('z'))
 299                           {
 300                             /* This cannot possibly be a character class name.
 301                                Match it as a normal range.  */
 302                             p = startp;
 303                             c = L('[');
 304                             goto normal_bracket;
 305                           }
 306                         str[c1++] = c;
 307                       }
 308                     str[c1] = L('\0');
 309
 310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 311                     wt = IS_CHAR_CLASS (str);
 312                     if (wt == 0)
 313                       /* Invalid character class name.  */
 314                       return FNM_NOMATCH;
 315
 316 # if defined _LIBC && ! WIDE_CHAR_VERSION
 317                     /* The following code is glibc specific but does
 318                        there a good job in speeding up the code since
 319                        we can avoid the btowc() call.  */
 320                     if (_ISCTYPE ((UCHAR) *n, wt))
 321                       goto matched;
 322 # else
 323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 324                       goto matched;
 325 # endif
 326 #else
 327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 339                       goto matched;
 340 #endif
 341                     c = *p++;
 342                   }
 343 #ifdef _LIBC
 344                 else if (c == L('[') && *p == L('='))
 345                   {
 346                     /* It's important that STR be a scalar variable rather
 347                        than a one-element array, because GCC (at least 4.9.2
 348                        -O2 on x86-64) can be confused by the array and
 349                        diagnose a "used initialized" in a dead branch in the
 350                        findidx function.  */
 351                     UCHAR str;
 352                     uint32_t nrules =
 353                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 354                     const CHAR *startp = p;
 355
 356                     c = *++p;
 357                     if (c == L('\0'))
 358                       {
 359                         p = startp;
 360                         c = L('[');
 361                         goto normal_bracket;
 362                       }
 363                     str = c;
 364
 365                     c = *++p;
 366                     if (c != L('=') || p[1] != L(']'))
 367                       {
 368                         p = startp;
 369                         c = L('[');
 370                         goto normal_bracket;
 371                       }
 372                     p += 2;
 373
 374                     if (nrules == 0)
 375                       {
 376                         if ((UCHAR) *n == str)
 377                           goto matched;
 378                       }
 379                     else
 380                       {
 381                         const int32_t *table;
 382 # if WIDE_CHAR_VERSION
 383                         const int32_t *weights;
 384                         const wint_t *extra;
 385 # else
 386                         const unsigned char *weights;
 387                         const unsigned char *extra;
 388 # endif
 389                         const int32_t *indirect;
 390                         int32_t idx;
 391                         const UCHAR *cp = (const UCHAR *) &str;
 392
 393 # if WIDE_CHAR_VERSION
 394                         table = (const int32_t *)
 395                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 396                         weights = (const int32_t *)
 397                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 398                         extra = (const wint_t *)
 399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 400                         indirect = (const int32_t *)
 401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 402 # else
 403                         table = (const int32_t *)
 404                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 405                         weights = (const unsigned char *)
 406                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 407                         extra = (const unsigned char *)
 408                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 409                         indirect = (const int32_t *)
 410                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 411 # endif
 412
 413                         idx = FINDIDX (table, indirect, extra, &cp, 1);
 414                         if (idx != 0)
 415                           {
 416                             /* We found a table entry.  Now see whether the
 417                                character we are currently at has the same
 418                                equivalance class value.  */
 419                             int len = weights[idx & 0xffffff];
 420                             int32_t idx2;
 421                             const UCHAR *np = (const UCHAR *) n;
 422
 423                             idx2 = FINDIDX (table, indirect, extra,
 424                                             &np, string_end - n);
 425                             if (idx2 != 0
 426                                 && (idx >> 24) == (idx2 >> 24)
 427                                 && len == weights[idx2 & 0xffffff])
 428                               {
 429                                 int cnt = 0;
 430
 431                                 idx &= 0xffffff;
 432                                 idx2 &= 0xffffff;
 433
 434                                 while (cnt < len
 435                                        && (weights[idx + 1 + cnt]
 436                                            == weights[idx2 + 1 + cnt]))
 437                                   ++cnt;
 438
 439                                 if (cnt == len)
 440                                   goto matched;
 441                               }
 442                           }
 443                       }
 444
 445                     c = *p++;
 446                   }
 447 #endif
 448                 else if (c == L('\0'))
 449                   {
 450                     /* [ unterminated, treat as normal character.  */
 451                     p = p_init;
 452                     n = n_init;
 453                     c = L('[');
 454                     goto normal_match;
 455                   }
 456                 else
 457                   {
 458                     int is_range = 0;
 459
 460 #ifdef _LIBC
 461                     int is_seqval = 0;
 462
 463                     if (c == L('[') && *p == L('.'))
 464                       {
 465                         uint32_t nrules =
 466                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 467                         const CHAR *startp = p;
 468                         size_t c1 = 0;
 469
 470                         while (1)
 471                           {
 472                             c = *++p;
 473                             if (c == L('.') && p[1] == L(']'))
 474                               {
 475                                 p += 2;
 476                                 break;
 477                               }
 478                             if (c == '\0')
 479                               return FNM_NOMATCH;
 480                             ++c1;
 481                           }
 482
 483                         /* We have to handling the symbols differently in
 484                            ranges since then the collation sequence is
 485                            important.  */
 486                         is_range = *p == L('-') && p[1] != L('\0');
 487
 488                         if (nrules == 0)
 489                           {
 490                             /* There are no names defined in the collation
 491                                data.  Therefore we only accept the trivial
 492                                names consisting of the character itself.  */
 493                             if (c1 != 1)
 494                               return FNM_NOMATCH;
 495
 496                             if (!is_range && *n == startp[1])
 497                               goto matched;
 498
 499                             cold = startp[1];
 500                             c = *p++;
 501                           }
 502                         else
 503                           {
 504                             int32_t table_size;
 505                             const int32_t *symb_table;
 506 # if WIDE_CHAR_VERSION
 507                             char str[c1];
 508                             unsigned int strcnt;
 509 # else
 510 #  define str (startp + 1)
 511 # endif
 512                             const unsigned char *extra;
 513                             int32_t idx;
 514                             int32_t elem;
 515                             int32_t second;
 516                             int32_t hash;
 517
 518 # if WIDE_CHAR_VERSION
 519                             /* We have to convert the name to a single-byte
 520                                string.  This is possible since the names
 521                                consist of ASCII characters and the internal
 522                                representation is UCS4.  */
 523                             for (strcnt = 0; strcnt < c1; ++strcnt)
 524                               str[strcnt] = startp[1 + strcnt];
 525 #endif
 526
 527                             table_size =
 528                               _NL_CURRENT_WORD (LC_COLLATE,
 529                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 530                             symb_table = (const int32_t *)
 531                               _NL_CURRENT (LC_COLLATE,
 532                                            _NL_COLLATE_SYMB_TABLEMB);
 533                             extra = (const unsigned char *)
 534                               _NL_CURRENT (LC_COLLATE,
 535                                            _NL_COLLATE_SYMB_EXTRAMB);
 536
 537                             /* Locate the character in the hashing table.  */
 538                             hash = elem_hash (str, c1);
 539
 540                             idx = 0;
 541                             elem = hash % table_size;
 542                             if (symb_table[2 * elem] != 0)
 543                               {
 544                                 second = hash % (table_size - 2) + 1;
 545
 546                                 do
 547                                   {
 548                                     /* First compare the hashing value.  */
 549                                     if (symb_table[2 * elem] == hash
 550                                         && (c1
 551                                             == extra[symb_table[2 * elem + 1]])
 552                                         && memcmp (str,
 553                                                    &extra[symb_table[2 * elem
 554                                                                      + 1]
 555                                                           + 1], c1) == 0)
 556                                       {
 557                                         /* Yep, this is the entry.  */
 558                                         idx = symb_table[2 * elem + 1];
 559                                         idx += 1 + extra[idx];
 560                                         break;
 561                                       }
 562
 563                                     /* Next entry.  */
 564                                     elem += second;
 565                                   }
 566                                 while (symb_table[2 * elem] != 0);
 567                               }
 568
 569                             if (symb_table[2 * elem] != 0)
 570                               {
 571                                 /* Compare the byte sequence but only if
 572                                    this is not part of a range.  */
 573 # if WIDE_CHAR_VERSION
 574                                 int32_t *wextra;
 575
 576                                 idx += 1 + extra[idx];
 577                                 /* Adjust for the alignment.  */
 578                                 idx = (idx + 3) & ~3;
 579
 580                                 wextra = (int32_t *) &extra[idx + 4];
 581 # endif
 582
 583                                 if (! is_range)
 584                                   {
 585 # if WIDE_CHAR_VERSION
 586                                     for (c1 = 0;
 587                                          (int32_t) c1 < wextra[idx];
 588                                          ++c1)
 589                                       if (n[c1] != wextra[1 + c1])
 590                                         break;
 591
 592                                     if ((int32_t) c1 == wextra[idx])
 593                                       goto matched;
 594 # else
 595                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 596                                       if (n[c1] != extra[1 + c1])
 597                                         break;
 598
 599                                     if (c1 == extra[idx])
 600                                       goto matched;
 601 # endif
 602                                   }
 603
 604                                 /* Get the collation sequence value.  */
 605                                 is_seqval = 1;
 606 # if WIDE_CHAR_VERSION
 607                                 cold = wextra[1 + wextra[idx]];
 608 # else
 609                                 /* Adjust for the alignment.  */
 610                                 idx += 1 + extra[idx];
 611                                 idx = (idx + 3) & ~4;
 612                                 cold = *((int32_t *) &extra[idx]);
 613 # endif
 614
 615                                 c = *p++;
 616                               }
 617                             else if (c1 == 1)
 618                               {
 619                                 /* No valid character.  Match it as a
 620                                    single byte.  */
 621                                 if (!is_range && *n == str[0])
 622                                   goto matched;
 623
 624                                 cold = str[0];
 625                                 c = *p++;
 626                               }
 627                             else
 628                               return FNM_NOMATCH;
 629                           }
 630                       }
 631                     else
 632 # undef str
 633 #endif
 634                       {
 635                         c = FOLD (c);
 636                       normal_bracket:
 637
 638                         /* We have to handling the symbols differently in
 639                            ranges since then the collation sequence is
 640                            important.  */
 641                         is_range = (*p == L('-') && p[1] != L('\0')
 642                                     && p[1] != L(']'));
 643
 644                         if (!is_range && c == fn)
 645                           goto matched;
 646
 647                         /* This is needed if we goto normal_bracket; from
 648                            outside of is_seqval's scope.  */
 649                         is_seqval = 0;
 650                         cold = c;
 651                         c = *p++;
 652                       }
 653
 654                     if (c == L('-') && *p != L(']'))
 655                       {
 656 #if _LIBC
 657                         /* We have to find the collation sequence
 658                            value for C.  Collation sequence is nothing
 659                            we can regularly access.  The sequence
 660                            value is defined by the order in which the
 661                            definitions of the collation values for the
 662                            various characters appear in the source
 663                            file.  A strange concept, nowhere
 664                            documented.  */
 665                         uint32_t fcollseq;
 666                         uint32_t lcollseq;
 667                         UCHAR cend = *p++;
 668
 669 # if WIDE_CHAR_VERSION
 670                         /* Search in the `names' array for the characters.  */
 671                         fcollseq = __collseq_table_lookup (collseq, fn);
 672                         if (fcollseq == ~((uint32_t) 0))
 673                           /* XXX We don't know anything about the character
 674                              we are supposed to match.  This means we are
 675                              failing.  */
 676                           goto range_not_matched;
 677
 678                         if (is_seqval)
 679                           lcollseq = cold;
 680                         else
 681                           lcollseq = __collseq_table_lookup (collseq, cold);
 682 # else
 683                         fcollseq = collseq[fn];
 684                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 685 # endif
 686
 687                         is_seqval = 0;
 688                         if (cend == L('[') && *p == L('.'))
 689                           {
 690                             uint32_t nrules =
 691                               _NL_CURRENT_WORD (LC_COLLATE,
 692                                                 _NL_COLLATE_NRULES);
 693                             const CHAR *startp = p;
 694                             size_t c1 = 0;
 695
 696                             while (1)
 697                               {
 698                                 c = *++p;
 699                                 if (c == L('.') && p[1] == L(']'))
 700                                   {
 701                                     p += 2;
 702                                     break;
 703                                   }
 704                                 if (c == '\0')
 705                                   return FNM_NOMATCH;
 706                                 ++c1;
 707                               }
 708
 709                             if (nrules == 0)
 710                               {
 711                                 /* There are no names defined in the
 712                                    collation data.  Therefore we only
 713                                    accept the trivial names consisting
 714                                    of the character itself.  */
 715                                 if (c1 != 1)
 716                                   return FNM_NOMATCH;
 717
 718                                 cend = startp[1];
 719                               }
 720                             else
 721                               {
 722                                 int32_t table_size;
 723                                 const int32_t *symb_table;
 724 # if WIDE_CHAR_VERSION
 725                                 char str[c1];
 726                                 unsigned int strcnt;
 727 # else
 728 #  define str (startp + 1)
 729 # endif
 730                                 const unsigned char *extra;
 731                                 int32_t idx;
 732                                 int32_t elem;
 733                                 int32_t second;
 734                                 int32_t hash;
 735
 736 # if WIDE_CHAR_VERSION
 737                                 /* We have to convert the name to a single-byte
 738                                    string.  This is possible since the names
 739                                    consist of ASCII characters and the internal
 740                                    representation is UCS4.  */
 741                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 742                                   str[strcnt] = startp[1 + strcnt];
 743 # endif
 744
 745                                 table_size =
 746                                   _NL_CURRENT_WORD (LC_COLLATE,
 747                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 748                                 symb_table = (const int32_t *)
 749                                   _NL_CURRENT (LC_COLLATE,
 750                                                _NL_COLLATE_SYMB_TABLEMB);
 751                                 extra = (const unsigned char *)
 752                                   _NL_CURRENT (LC_COLLATE,
 753                                                _NL_COLLATE_SYMB_EXTRAMB);
 754
 755                                 /* Locate the character in the hashing
 756                                    table.  */
 757                                 hash = elem_hash (str, c1);
 758
 759                                 idx = 0;
 760                                 elem = hash % table_size;
 761                                 if (symb_table[2 * elem] != 0)
 762                                   {
 763                                     second = hash % (table_size - 2) + 1;
 764
 765                                     do
 766                                       {
 767                                         /* First compare the hashing value.  */
 768                                         if (symb_table[2 * elem] == hash
 769                                             && (c1
 770                                                 == extra[symb_table[2 * elem + 1]])
 771                                             && memcmp (str,
 772                                                        &extra[symb_table[2 * elem + 1]
 773                                                               + 1], c1) == 0)
 774                                           {
 775                                             /* Yep, this is the entry.  */
 776                                             idx = symb_table[2 * elem + 1];
 777                                             idx += 1 + extra[idx];
 778                                             break;
 779                                           }
 780
 781                                         /* Next entry.  */
 782                                         elem += second;
 783                                       }
 784                                     while (symb_table[2 * elem] != 0);
 785                                   }
 786
 787                                 if (symb_table[2 * elem] != 0)
 788                                   {
 789                                     /* Compare the byte sequence but only if
 790                                        this is not part of a range.  */
 791 # if WIDE_CHAR_VERSION
 792                                     int32_t *wextra;
 793
 794                                     idx += 1 + extra[idx];
 795                                     /* Adjust for the alignment.  */
 796                                     idx = (idx + 3) & ~4;
 797
 798                                     wextra = (int32_t *) &extra[idx + 4];
 799 # endif
 800                                     /* Get the collation sequence value.  */
 801                                     is_seqval = 1;
 802 # if WIDE_CHAR_VERSION
 803                                     cend = wextra[1 + wextra[idx]];
 804 # else
 805                                     /* Adjust for the alignment.  */
 806                                     idx += 1 + extra[idx];
 807                                     idx = (idx + 3) & ~4;
 808                                     cend = *((int32_t *) &extra[idx]);
 809 # endif
 810                                   }
 811                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 812                                   {
 813                                     cend = str[0];
 814                                     c = *p++;
 815                                   }
 816                                 else
 817                                   return FNM_NOMATCH;
 818                               }
 819 # undef str
 820                           }
 821                         else
 822                           {
 823                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 824                               cend = *p++;
 825                             if (cend == L('\0'))
 826                               return FNM_NOMATCH;
 827                             cend = FOLD (cend);
 828                           }
 829
 830                         /* XXX It is not entirely clear to me how to handle
 831                            characters which are not mentioned in the
 832                            collation specification.  */
 833                         if (
 834 # if WIDE_CHAR_VERSION
 835                             lcollseq == 0xffffffff ||
 836 # endif
 837                             lcollseq <= fcollseq)
 838                           {
 839                             /* We have to look at the upper bound.  */
 840                             uint32_t hcollseq;
 841
 842                             if (is_seqval)
 843                               hcollseq = cend;
 844                             else
 845                               {
 846 # if WIDE_CHAR_VERSION
 847                                 hcollseq =
 848                                   __collseq_table_lookup (collseq, cend);
 849                                 if (hcollseq == ~((uint32_t) 0))
 850                                   {
 851                                     /* Hum, no information about the upper
 852                                        bound.  The matching succeeds if the
 853                                        lower bound is matched exactly.  */
 854                                     if (lcollseq != fcollseq)
 855                                       goto range_not_matched;
 856
 857                                     goto matched;
 858                                   }
 859 # else
 860                                 hcollseq = collseq[cend];
 861 # endif
 862                               }
 863
 864                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 865                               goto matched;
 866                           }
 867 # if WIDE_CHAR_VERSION
 868                       range_not_matched:
 869 # endif
 870 #else
 871                         /* We use a boring value comparison of the character
 872                            values.  This is better than comparing using
 873                            `strcoll' since the latter would have surprising
 874                            and sometimes fatal consequences.  */
 875                         UCHAR cend = *p++;
 876
 877                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 878                           cend = *p++;
 879                         if (cend == L('\0'))
 880                           return FNM_NOMATCH;
 881
 882                         /* It is a range.  */
 883                         if (cold <= fn && fn <= cend)
 884                           goto matched;
 885 #endif
 886
 887                         c = *p++;
 888                       }
 889                   }
 890
 891                 if (c == L(']'))
 892                   break;
 893               }
 894
 895             if (!not)
 896               return FNM_NOMATCH;
 897             break;
 898
 899           matched:
 900             /* Skip the rest of the [...] that already matched.  */
 901             while ((c = *p++) != L (']'))
 902               {
 903                 if (c == L('\0'))
 904                   /* [... (unterminated) loses.  */
 905                   return FNM_NOMATCH;
 906
 907                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 908                   {
 909                     if (*p == L('\0'))
 910                       return FNM_NOMATCH;
 911                     /* XXX 1003.2d11 is unclear if this is right.  */
 912                     ++p;
 913                   }
 914                 else if (c == L('[') && *p == L(':'))
 915                   {
 916                     int c1 = 0;
 917                     const CHAR *startp = p;
 918
 919                     while (1)
 920                       {
 921                         c = *++p;
 922                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 923                           return FNM_NOMATCH;
 924
 925                         if (*p == L(':') && p[1] == L(']'))
 926                           break;
 927
 928                         if (c < L('a') || c >= L('z'))
 929                           {
 930                             p = startp - 2;
 931                             break;
 932                           }
 933                       }
 934                     p += 2;
 935                   }
 936                 else if (c == L('[') && *p == L('='))
 937                   {
 938                     c = *++p;
 939                     if (c == L('\0'))
 940                       return FNM_NOMATCH;
 941                     c = *++p;
 942                     if (c != L('=') || p[1] != L(']'))
 943                       return FNM_NOMATCH;
 944                     p += 2;
 945                   }
 946                 else if (c == L('[') && *p == L('.'))
 947                   {
 948                     ++p;
 949                     while (1)
 950                       {
 951                         c = *++p;
 952                         if (c == '\0')
 953                           return FNM_NOMATCH;
 954
 955                         if (*p == L('.') && p[1] == L(']'))
 956                           break;
 957                       }
 958                     p += 2;
 959                   }
 960               }
 961             if (not)
 962               return FNM_NOMATCH;
 963           }
 964           break;
 965
 966         case L('+'):
 967         case L('@'):
 968         case L('!'):
 969           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 970             {
 971               int res = EXT (c, p, n, string_end, no_leading_period, flags,
 972                              alloca_used);
 973               if (res != -1)
 974                 return res;
 975             }
 976           goto normal_match;
 977
 978         case L('/'):
 979           if (NO_LEADING_PERIOD (flags))
 980             {
 981               if (n == string_end || c != (UCHAR) *n)
 982                 return FNM_NOMATCH;
 983
 984               new_no_leading_period = 1;
 985               break;
 986             }
 987           /* FALLTHROUGH */
 988         default:
 989         normal_match:
 990           if (n == string_end || c != FOLD ((UCHAR) *n))
 991             return FNM_NOMATCH;
 992         }
 993
 994       no_leading_period = new_no_leading_period;
 995       ++n;
 996     }
 997
 998   if (n == string_end)
 999     return 0;
1000
1001   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1002     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1003     return 0;
1004
1005   return FNM_NOMATCH;
1006 }
1007
1008
1009 static const CHAR *
1010 internal_function
1011 END (const CHAR *pattern)
1012 {
1013   const CHAR *p = pattern;
1014
1015   while (1)
1016     if (*++p == L('\0'))
1017       /* This is an invalid pattern.  */
1018       return pattern;
1019     else if (*p == L('['))
1020       {
1021         /* Handle brackets special.  */
1022         if (posixly_correct == 0)
1023           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1024
1025         /* Skip the not sign.  We have to recognize it because of a possibly
1026            following ']'.  */
1027         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1028           ++p;
1029         /* A leading ']' is recognized as such.  */
1030         if (*p == L(']'))
1031           ++p;
1032         /* Skip over all characters of the list.  */
1033         while (*p != L(']'))
1034           if (*p++ == L('\0'))
1035             /* This is no valid pattern.  */
1036             return pattern;
1037       }
1038     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1039               || *p == L('!')) && p[1] == L('('))
1040       p = END (p + 1);
1041     else if (*p == L(')'))
1042       break;
1043
1044   return p + 1;
1045 }
1046
1047
1048 static int
1049 internal_function
1050 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1051      int no_leading_period, int flags, size_t alloca_used)
1052 {
1053   const CHAR *startp;
1054   int level;
1055   struct patternlist
1056   {
1057     struct patternlist *next;
1058     CHAR malloced;
1059     CHAR str[0];
1060   } *list = NULL;
1061   struct patternlist **lastp = &list;
1062   size_t pattern_len = STRLEN (pattern);
1063   int any_malloced = 0;
1064   const CHAR *p;
1065   const CHAR *rs;
1066   int retval = 0;
1067
1068   /* Parse the pattern.  Store the individual parts in the list.  */
1069   level = 0;
1070   for (startp = p = pattern + 1; level >= 0; ++p)
1071     if (*p == L('\0'))
1072       {
1073         /* This is an invalid pattern.  */
1074         retval = -1;
1075         goto out;
1076       }
1077     else if (*p == L('['))
1078       {
1079         /* Handle brackets special.  */
1080         if (posixly_correct == 0)
1081           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1082
1083         /* Skip the not sign.  We have to recognize it because of a possibly
1084            following ']'.  */
1085         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1086           ++p;
1087         /* A leading ']' is recognized as such.  */
1088         if (*p == L(']'))
1089           ++p;
1090         /* Skip over all characters of the list.  */
1091         while (*p != L(']'))
1092           if (*p++ == L('\0'))
1093             {
1094               /* This is no valid pattern.  */
1095               retval = -1;
1096               goto out;
1097             }
1098       }
1099     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1100               || *p == L('!')) && p[1] == L('('))
1101       /* Remember the nesting level.  */
1102       ++level;
1103     else if (*p == L(')'))
1104       {
1105         if (level-- == 0)
1106           {
1107             /* This means we found the end of the pattern.  */
1108 #define NEW_PATTERN \
1109             struct patternlist *newp;                                         \
1110             size_t slen = (opt == L('?') || opt == L('@')                     \
1111                            ? pattern_len : (p - startp + 1));                 \
1112             slen = sizeof (struct patternlist) + (slen * sizeof (CHAR));      \
1113             int malloced = ! __libc_use_alloca (alloca_used + slen);          \
1114             if (__builtin_expect (malloced, 0))                               \
1115               {                                                               \
1116                 newp = malloc (slen);                                         \
1117                 if (newp == NULL)                                             \
1118                   {                                                           \
1119                     retval = -2;                                              \
1120                     goto out;                                                 \
1121                   }                                                           \
1122                 any_malloced = 1;                                             \
1123               }                                                               \
1124             else                                                              \
1125               newp = alloca_account (slen, alloca_used);                      \
1126             newp->next = NULL;                                                \
1127             newp->malloced = malloced;                                        \
1128             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1129             *lastp = newp;                                                    \
1130             lastp = &newp->next
1131             NEW_PATTERN;
1132           }
1133       }
1134     else if (*p == L('|'))
1135       {
1136         if (level == 0)
1137           {
1138             NEW_PATTERN;
1139             startp = p + 1;
1140           }
1141       }
1142   assert (list != NULL);
1143   assert (p[-1] == L(')'));
1144 #undef NEW_PATTERN
1145
1146   switch (opt)
1147     {
1148     case L('*'):
1149       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1150                alloca_used) == 0)
1151         goto success;
1152       /* FALLTHROUGH */
1153
1154     case L('+'):
1155       do
1156         {
1157           for (rs = string; rs <= string_end; ++rs)
1158             /* First match the prefix with the current pattern with the
1159                current pattern.  */
1160             if (FCT (list->str, string, rs, no_leading_period,
1161                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1162                      NULL, alloca_used) == 0
1163                 /* This was successful.  Now match the rest with the rest
1164                    of the pattern.  */
1165                 && (FCT (p, rs, string_end,
1166                          rs == string
1167                          ? no_leading_period
1168                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1169                          flags & FNM_FILE_NAME
1170                          ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1171                     /* This didn't work.  Try the whole pattern.  */
1172                     || (rs != string
1173                         && FCT (pattern - 1, rs, string_end,
1174                                 rs == string
1175                                 ? no_leading_period
1176                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1177                                    ? 1 : 0),
1178                                 flags & FNM_FILE_NAME
1179                                 ? flags : flags & ~FNM_PERIOD, NULL,
1180                                 alloca_used) == 0)))
1181               /* It worked.  Signal success.  */
1182               goto success;
1183         }
1184       while ((list = list->next) != NULL);
1185
1186       /* None of the patterns lead to a match.  */
1187       retval = FNM_NOMATCH;
1188       break;
1189
1190     case L('?'):
1191       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1192                alloca_used) == 0)
1193         goto success;
1194       /* FALLTHROUGH */
1195
1196     case L('@'):
1197       do
1198         /* I cannot believe it but `strcat' is actually acceptable
1199            here.  Match the entire string with the prefix from the
1200            pattern list and the rest of the pattern following the
1201            pattern list.  */
1202         if (FCT (STRCAT (list->str, p), string, string_end,
1203                  no_leading_period,
1204                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1205                  NULL, alloca_used) == 0)
1206           /* It worked.  Signal success.  */
1207           goto success;
1208       while ((list = list->next) != NULL);
1209
1210       /* None of the patterns lead to a match.  */
1211       retval = FNM_NOMATCH;
1212       break;
1213
1214     case L('!'):
1215       for (rs = string; rs <= string_end; ++rs)
1216         {
1217           struct patternlist *runp;
1218
1219           for (runp = list; runp != NULL; runp = runp->next)
1220             if (FCT (runp->str, string, rs,  no_leading_period,
1221                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1222                      NULL, alloca_used) == 0)
1223               break;
1224
1225           /* If none of the patterns matched see whether the rest does.  */
1226           if (runp == NULL
1227               && (FCT (p, rs, string_end,
1228                        rs == string
1229                        ? no_leading_period
1230                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1231                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1232                        NULL, alloca_used) == 0))
1233             /* This is successful.  */
1234             goto success;
1235         }
1236
1237       /* None of the patterns together with the rest of the pattern
1238          lead to a match.  */
1239       retval = FNM_NOMATCH;
1240       break;
1241
1242     default:
1243       assert (! "Invalid extended matching operator");
1244       retval = -1;
1245       break;
1246     }
1247
1248  success:
1249  out:
1250   if (any_malloced)
1251     while (list != NULL)
1252       {
1253         struct patternlist *old = list;
1254         list = list->next;
1255         if (old->malloced)
1256           free (old);
1257       }
1258
1259   return retval;
1260 }
1261
1262
1263 #undef FOLD
1264 #undef CHAR
1265 #undef UCHAR
1266 #undef INT
1267 #undef FCT
1268 #undef EXT
1269 #undef END
1270 #undef STRUCT
1271 #undef MEMPCPY
1272 #undef MEMCHR
1273 #undef STRCOLL
1274 #undef STRLEN
1275 #undef STRCAT
1276 #undef L
1277 #undef BTOWC
1278 #undef WIDE_CHAR_VERSION
1279 #undef FINDIDX