posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-2022 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <https://www.gnu.org/licenses/>.  */
  17
  18 #ifdef _LIBC
  19 # include <stdint.h>
  20 #endif
  21
  22 struct STRUCT
  23 {
  24   const CHAR *pattern;
  25   const CHAR *string;
  26   bool no_leading_period;
  27 };
  28
  29 /* Match STRING against the file name pattern PATTERN, returning zero if
  30    it matches, nonzero if not.  */
  31 static int FCT (const CHAR *pattern, const CHAR *string,
  32                 const CHAR *string_end, bool no_leading_period, int flags,
  33                 struct STRUCT *ends);
  34 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  35                 const CHAR *string_end, bool no_leading_period, int flags);
  36 static const CHAR *END (const CHAR *patternp);
  37
  38 static int
  39 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  40      bool no_leading_period, int flags, struct STRUCT *ends)
  41 {
  42   const CHAR *p = pattern, *n = string;
  43   UCHAR c;
  44 #ifdef _LIBC
  45 # if WIDE_CHAR_VERSION
  46   const char *collseq = (const char *)
  47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  48 # else
  49   const UCHAR *collseq = (const UCHAR *)
  50     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  51 # endif
  52 #endif
  53
  54   while ((c = *p++) != L_('\0'))
  55     {
  56       bool new_no_leading_period = false;
  57       c = FOLD (c);
  58
  59       switch (c)
  60         {
  61         case L_('?'):
  62           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
  63             {
  64               int res = EXT (c, p, n, string_end, no_leading_period, flags);
  65               if (res != -1)
  66                 return res;
  67             }
  68
  69           if (n == string_end)
  70             return FNM_NOMATCH;
  71           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  72             return FNM_NOMATCH;
  73           else if (*n == L_('.') && no_leading_period)
  74             return FNM_NOMATCH;
  75           break;
  76
  77         case L_('\\'):
  78           if (!(flags & FNM_NOESCAPE))
  79             {
  80               c = *p++;
  81               if (c == L_('\0'))
  82                 /* Trailing \ loses.  */
  83                 return FNM_NOMATCH;
  84               c = FOLD (c);
  85             }
  86           if (n == string_end || FOLD ((UCHAR) *n) != c)
  87             return FNM_NOMATCH;
  88           break;
  89
  90         case L_('*'):
  91           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
  92             {
  93               int res = EXT (c, p, n, string_end, no_leading_period, flags);
  94               if (res != -1)
  95                 return res;
  96             }
  97           else if (ends != NULL)
  98             {
  99               ends->pattern = p - 1;
 100               ends->string = n;
 101               ends->no_leading_period = no_leading_period;
 102               return 0;
 103             }
 104
 105           if (n != string_end && *n == L_('.') && no_leading_period)
 106             return FNM_NOMATCH;
 107
 108           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
 109             {
 110               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
 111                 {
 112                   const CHAR *endp = END (p);
 113                   if (endp != p)
 114                     {
 115                       /* This is a pattern.  Skip over it.  */
 116                       p = endp;
 117                       continue;
 118                     }
 119                 }
 120
 121               if (c == L_('?'))
 122                 {
 123                   /* A ? needs to match one character.  */
 124                   if (n == string_end)
 125                     /* There isn't another character; no match.  */
 126                     return FNM_NOMATCH;
 127                   else if (*n == L_('/')
 128                            && __glibc_unlikely (flags & FNM_FILE_NAME))
 129                     /* A slash does not match a wildcard under
 130                        FNM_FILE_NAME.  */
 131                     return FNM_NOMATCH;
 132                   else
 133                     /* One character of the string is consumed in matching
 134                        this ? wildcard, so *??? won't match if there are
 135                        less than three characters.  */
 136                     ++n;
 137                 }
 138             }
 139
 140           if (c == L_('\0'))
 141             /* The wildcard(s) is/are the last element of the pattern.
 142                If the name is a file name and contains another slash
 143                this means it cannot match, unless the FNM_LEADING_DIR
 144                flag is set.  */
 145             {
 146               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 147
 148               if (flags & FNM_FILE_NAME)
 149                 {
 150                   if (flags & FNM_LEADING_DIR)
 151                     result = 0;
 152                   else
 153                     {
 154                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
 155                         result = 0;
 156                     }
 157                 }
 158
 159               return result;
 160             }
 161           else
 162             {
 163               const CHAR *endp;
 164               struct STRUCT end;
 165
 166               end.pattern = NULL;
 167               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
 168                              string_end - n);
 169               if (endp == NULL)
 170                 endp = string_end;
 171
 172               if (c == L_('[')
 173                   || (__glibc_unlikely (flags & FNM_EXTMATCH)
 174                       && (c == L_('@') || c == L_('+') || c == L_('!'))
 175                       && *p == L_('(')))
 176                 {
 177                   int flags2 = ((flags & FNM_FILE_NAME)
 178                                 ? flags : (flags & ~FNM_PERIOD));
 179
 180                   for (--p; n < endp; ++n, no_leading_period = false)
 181                     if (FCT (p, n, string_end, no_leading_period, flags2,
 182                              &end) == 0)
 183                       goto found;
 184                 }
 185               else if (c == L_('/') && (flags & FNM_FILE_NAME))
 186                 {
 187                   while (n < string_end && *n != L_('/'))
 188                     ++n;
 189                   if (n < string_end && *n == L_('/')
 190                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 191                                NULL) == 0))
 192                     return 0;
 193                 }
 194               else
 195                 {
 196                   int flags2 = ((flags & FNM_FILE_NAME)
 197                                 ? flags : (flags & ~FNM_PERIOD));
 198
 199                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
 200                     c = *p;
 201                   c = FOLD (c);
 202                   for (--p; n < endp; ++n, no_leading_period = false)
 203                     if (FOLD ((UCHAR) *n) == c
 204                         && (FCT (p, n, string_end, no_leading_period, flags2,
 205                                  &end) == 0))
 206                       {
 207                       found:
 208                         if (end.pattern == NULL)
 209                           return 0;
 210                         break;
 211                       }
 212                   if (end.pattern != NULL)
 213                     {
 214                       p = end.pattern;
 215                       n = end.string;
 216                       no_leading_period = end.no_leading_period;
 217                       continue;
 218                     }
 219                 }
 220             }
 221
 222           /* If we come here no match is possible with the wildcard.  */
 223           return FNM_NOMATCH;
 224
 225         case L_('['):
 226           {
 227             /* Nonzero if the sense of the character class is inverted.  */
 228             const CHAR *p_init = p;
 229             const CHAR *n_init = n;
 230             bool not;
 231             CHAR cold;
 232             UCHAR fn;
 233
 234             if (posixly_correct == 0)
 235               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 236
 237             if (n == string_end)
 238               return FNM_NOMATCH;
 239
 240             if (*n == L_('.') && no_leading_period)
 241               return FNM_NOMATCH;
 242
 243             if (*n == L_('/') && (flags & FNM_FILE_NAME))
 244               /* '/' cannot be matched.  */
 245               return FNM_NOMATCH;
 246
 247             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
 248             if (not)
 249               ++p;
 250
 251             fn = FOLD ((UCHAR) *n);
 252
 253             c = *p++;
 254             for (;;)
 255               {
 256                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 257                   {
 258                     if (*p == L_('\0'))
 259                       return FNM_NOMATCH;
 260                     c = FOLD ((UCHAR) *p);
 261                     ++p;
 262
 263                     goto normal_bracket;
 264                   }
 265                 else if (c == L_('[') && *p == L_(':'))
 266                   {
 267                     /* Leave room for the null.  */
 268                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 269                     size_t c1 = 0;
 270                     wctype_t wt;
 271                     const CHAR *startp = p;
 272
 273                     for (;;)
 274                       {
 275                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 276                           /* The name is too long and therefore the pattern
 277                              is ill-formed.  */
 278                           return FNM_NOMATCH;
 279
 280                         c = *++p;
 281                         if (c == L_(':') && p[1] == L_(']'))
 282                           {
 283                             p += 2;
 284                             break;
 285                           }
 286                         if (c < L_('a') || c >= L_('z'))
 287                           {
 288                             /* This cannot possibly be a character class name.
 289                                Match it as a normal range.  */
 290                             p = startp;
 291                             c = L_('[');
 292                             goto normal_bracket;
 293                           }
 294                         str[c1++] = c;
 295                       }
 296                     str[c1] = L_('\0');
 297
 298                     wt = IS_CHAR_CLASS (str);
 299                     if (wt == 0)
 300                       /* Invalid character class name.  */
 301                       return FNM_NOMATCH;
 302
 303 #if defined _LIBC && ! WIDE_CHAR_VERSION
 304                     /* The following code is glibc specific but does
 305                        there a good job in speeding up the code since
 306                        we can avoid the btowc() call.  */
 307                     if (_ISCTYPE ((UCHAR) *n, wt))
 308                       goto matched;
 309 #else
 310                     if (iswctype (BTOWC ((UCHAR) *n), wt))
 311                       goto matched;
 312 #endif
 313                     c = *p++;
 314                   }
 315 #ifdef _LIBC
 316                 else if (c == L_('[') && *p == L_('='))
 317                   {
 318                     /* It's important that STR be a scalar variable rather
 319                        than a one-element array, because GCC (at least 4.9.2
 320                        -O2 on x86-64) can be confused by the array and
 321                        diagnose a "used initialized" in a dead branch in the
 322                        findidx function.  */
 323                     UCHAR str;
 324                     uint32_t nrules =
 325                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 326                     const CHAR *startp = p;
 327
 328                     c = *++p;
 329                     if (c == L_('\0'))
 330                       {
 331                         p = startp;
 332                         c = L_('[');
 333                         goto normal_bracket;
 334                       }
 335                     str = c;
 336
 337                     c = *++p;
 338                     if (c != L_('=') || p[1] != L_(']'))
 339                       {
 340                         p = startp;
 341                         c = L_('[');
 342                         goto normal_bracket;
 343                       }
 344                     p += 2;
 345
 346                     if (nrules == 0)
 347                       {
 348                         if ((UCHAR) *n == str)
 349                           goto matched;
 350                       }
 351                     else
 352                       {
 353                         const int32_t *table;
 354 # if WIDE_CHAR_VERSION
 355                         const int32_t *weights;
 356                         const wint_t *extra;
 357 # else
 358                         const unsigned char *weights;
 359                         const unsigned char *extra;
 360 # endif
 361                         const int32_t *indirect;
 362                         int32_t idx;
 363                         const UCHAR *cp = (const UCHAR *) &str;
 364
 365 # if WIDE_CHAR_VERSION
 366                         table = (const int32_t *)
 367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 368                         weights = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 370                         extra = (const wint_t *)
 371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 372                         indirect = (const int32_t *)
 373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 374 # else
 375                         table = (const int32_t *)
 376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 377                         weights = (const unsigned char *)
 378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 379                         extra = (const unsigned char *)
 380                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 381                         indirect = (const int32_t *)
 382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 383 # endif
 384
 385                         idx = FINDIDX (table, indirect, extra, &cp, 1);
 386                         if (idx != 0)
 387                           {
 388                             /* We found a table entry.  Now see whether the
 389                                character we are currently at has the same
 390                                equivalence class value.  */
 391                             int len = weights[idx & 0xffffff];
 392                             int32_t idx2;
 393                             const UCHAR *np = (const UCHAR *) n;
 394
 395                             idx2 = FINDIDX (table, indirect, extra,
 396                                             &np, string_end - n);
 397                             if (idx2 != 0
 398                                 && (idx >> 24) == (idx2 >> 24)
 399                                 && len == weights[idx2 & 0xffffff])
 400                               {
 401                                 int cnt = 0;
 402
 403                                 idx &= 0xffffff;
 404                                 idx2 &= 0xffffff;
 405
 406                                 while (cnt < len
 407                                        && (weights[idx + 1 + cnt]
 408                                            == weights[idx2 + 1 + cnt]))
 409                                   ++cnt;
 410
 411                                 if (cnt == len)
 412                                   goto matched;
 413                               }
 414                           }
 415                       }
 416
 417                     c = *p++;
 418                   }
 419 #endif
 420                 else if (c == L_('\0'))
 421                   {
 422                     /* [ unterminated, treat as normal character.  */
 423                     p = p_init;
 424                     n = n_init;
 425                     c = L_('[');
 426                     goto normal_match;
 427                   }
 428                 else
 429                   {
 430                     bool is_range = false;
 431
 432 #ifdef _LIBC
 433                     bool is_seqval = false;
 434
 435                     if (c == L_('[') && *p == L_('.'))
 436                       {
 437                         uint32_t nrules =
 438                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 439                         const CHAR *startp = p;
 440                         size_t c1 = 0;
 441
 442                         while (1)
 443                           {
 444                             c = *++p;
 445                             if (c == L_('.') && p[1] == L_(']'))
 446                               {
 447                                 p += 2;
 448                                 break;
 449                               }
 450                             if (c == '\0')
 451                               return FNM_NOMATCH;
 452                             ++c1;
 453                           }
 454
 455                         /* We have to handling the symbols differently in
 456                            ranges since then the collation sequence is
 457                            important.  */
 458                         is_range = *p == L_('-') && p[1] != L_('\0');
 459
 460                         if (nrules == 0)
 461                           {
 462                             /* There are no names defined in the collation
 463                                data.  Therefore we only accept the trivial
 464                                names consisting of the character itself.  */
 465                             if (c1 != 1)
 466                               return FNM_NOMATCH;
 467
 468                             if (!is_range && *n == startp[1])
 469                               goto matched;
 470
 471                             cold = startp[1];
 472                             c = *p++;
 473                           }
 474                         else
 475                           {
 476                             int32_t table_size;
 477                             const int32_t *symb_table;
 478                             const unsigned char *extra;
 479                             int32_t idx;
 480                             int32_t elem;
 481 # if WIDE_CHAR_VERSION
 482                             CHAR *wextra;
 483 # endif
 484
 485                             table_size =
 486                               _NL_CURRENT_WORD (LC_COLLATE,
 487                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 488                             symb_table = (const int32_t *)
 489                               _NL_CURRENT (LC_COLLATE,
 490                                            _NL_COLLATE_SYMB_TABLEMB);
 491                             extra = (const unsigned char *)
 492                               _NL_CURRENT (LC_COLLATE,
 493                                            _NL_COLLATE_SYMB_EXTRAMB);
 494
 495                             for (elem = 0; elem < table_size; elem++)
 496                               if (symb_table[2 * elem] != 0)
 497                                 {
 498                                   idx = symb_table[2 * elem + 1];
 499                                   /* Skip the name of collating element.  */
 500                                   idx += 1 + extra[idx];
 501 # if WIDE_CHAR_VERSION
 502                                   /* Skip the byte sequence of the
 503                                      collating element.  */
 504                                   idx += 1 + extra[idx];
 505                                   /* Adjust for the alignment.  */
 506                                   idx = (idx + 3) & ~3;
 507
 508                                   wextra = (CHAR *) &extra[idx + 4];
 509
 510                                   if (/* Compare the length of the sequence.  */
 511                                       c1 == wextra[0]
 512                                       /* Compare the wide char sequence.  */
 513                                       && (__wmemcmp (startp + 1, &wextra[1],
 514                                                      c1)
 515                                           == 0))
 516                                     /* Yep, this is the entry.  */
 517                                     break;
 518 # else
 519                                   if (/* Compare the length of the sequence.  */
 520                                       c1 == extra[idx]
 521                                       /* Compare the byte sequence.  */
 522                                       && memcmp (startp + 1,
 523                                                  &extra[idx + 1], c1) == 0)
 524                                     /* Yep, this is the entry.  */
 525                                     break;
 526 # endif
 527                                 }
 528
 529                             if (elem < table_size)
 530                               {
 531                                 /* Compare the byte sequence but only if
 532                                    this is not part of a range.  */
 533                                 if (! is_range
 534
 535 # if WIDE_CHAR_VERSION
 536                                     && __wmemcmp (n, &wextra[1], c1) == 0
 537 # else
 538                                     && memcmp (n, &extra[idx + 1], c1) == 0
 539 # endif
 540                                     )
 541                                   {
 542                                     n += c1 - 1;
 543                                     goto matched;
 544                                   }
 545
 546                                 /* Get the collation sequence value.  */
 547                                 is_seqval = true;
 548 # if WIDE_CHAR_VERSION
 549                                 cold = wextra[1 + wextra[0]];
 550 # else
 551                                 idx += 1 + extra[idx];
 552                                 /* Adjust for the alignment.  */
 553                                 idx = (idx + 3) & ~3;
 554                                 cold = *((int32_t *) &extra[idx]);
 555 # endif
 556
 557                                 c = *p++;
 558                               }
 559                             else if (c1 == 1)
 560                               {
 561                                 /* No valid character.  Match it as a
 562                                    single byte.  */
 563                                 if (!is_range && *n == startp[1])
 564                                   goto matched;
 565
 566                                 cold = startp[1];
 567                                 c = *p++;
 568                               }
 569                             else
 570                               return FNM_NOMATCH;
 571                           }
 572                       }
 573                     else
 574 #endif
 575                       {
 576                         c = FOLD (c);
 577                       normal_bracket:
 578
 579                         /* We have to handling the symbols differently in
 580                            ranges since then the collation sequence is
 581                            important.  */
 582                         is_range = (*p == L_('-') && p[1] != L_('\0')
 583                                     && p[1] != L_(']'));
 584
 585                         if (!is_range && c == fn)
 586                           goto matched;
 587
 588 #if _LIBC
 589                         /* This is needed if we goto normal_bracket; from
 590                            outside of is_seqval's scope.  */
 591                         is_seqval = false;
 592 #endif
 593                         cold = c;
 594                         c = *p++;
 595                       }
 596
 597                     if (c == L_('-') && *p != L_(']'))
 598                       {
 599 #if _LIBC
 600                         /* We have to find the collation sequence
 601                            value for C.  Collation sequence is nothing
 602                            we can regularly access.  The sequence
 603                            value is defined by the order in which the
 604                            definitions of the collation values for the
 605                            various characters appear in the source
 606                            file.  A strange concept, nowhere
 607                            documented.  */
 608                         uint32_t fcollseq;
 609                         uint32_t lcollseq;
 610                         UCHAR cend = *p++;
 611
 612 # if WIDE_CHAR_VERSION
 613                         /* Search in the 'names' array for the characters.  */
 614                         fcollseq = __collseq_table_lookup (collseq, fn);
 615                         if (fcollseq == ~((uint32_t) 0))
 616                           /* XXX We don't know anything about the character
 617                              we are supposed to match.  This means we are
 618                              failing.  */
 619                           goto range_not_matched;
 620
 621                         if (is_seqval)
 622                           lcollseq = cold;
 623                         else
 624                           lcollseq = __collseq_table_lookup (collseq, cold);
 625 # else
 626                         fcollseq = collseq[fn];
 627                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 628 # endif
 629
 630                         is_seqval = false;
 631                         if (cend == L_('[') && *p == L_('.'))
 632                           {
 633                             uint32_t nrules =
 634                               _NL_CURRENT_WORD (LC_COLLATE,
 635                                                 _NL_COLLATE_NRULES);
 636                             const CHAR *startp = p;
 637                             size_t c1 = 0;
 638
 639                             while (1)
 640                               {
 641                                 c = *++p;
 642                                 if (c == L_('.') && p[1] == L_(']'))
 643                                   {
 644                                     p += 2;
 645                                     break;
 646                                   }
 647                                 if (c == '\0')
 648                                   return FNM_NOMATCH;
 649                                 ++c1;
 650                               }
 651
 652                             if (nrules == 0)
 653                               {
 654                                 /* There are no names defined in the
 655                                    collation data.  Therefore we only
 656                                    accept the trivial names consisting
 657                                    of the character itself.  */
 658                                 if (c1 != 1)
 659                                   return FNM_NOMATCH;
 660
 661                                 cend = startp[1];
 662                               }
 663                             else
 664                               {
 665                                 int32_t table_size;
 666                                 const int32_t *symb_table;
 667                                 const unsigned char *extra;
 668                                 int32_t idx;
 669                                 int32_t elem;
 670 # if WIDE_CHAR_VERSION
 671                                 CHAR *wextra;
 672 # endif
 673
 674                                 table_size =
 675                                   _NL_CURRENT_WORD (LC_COLLATE,
 676                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 677                                 symb_table = (const int32_t *)
 678                                   _NL_CURRENT (LC_COLLATE,
 679                                                _NL_COLLATE_SYMB_TABLEMB);
 680                                 extra = (const unsigned char *)
 681                                   _NL_CURRENT (LC_COLLATE,
 682                                                _NL_COLLATE_SYMB_EXTRAMB);
 683
 684                                 for (elem = 0; elem < table_size; elem++)
 685                                   if (symb_table[2 * elem] != 0)
 686                                     {
 687                                       idx = symb_table[2 * elem + 1];
 688                                       /* Skip the name of collating
 689                                          element.  */
 690                                       idx += 1 + extra[idx];
 691 # if WIDE_CHAR_VERSION
 692                                       /* Skip the byte sequence of the
 693                                          collating element.  */
 694                                       idx += 1 + extra[idx];
 695                                       /* Adjust for the alignment.  */
 696                                       idx = (idx + 3) & ~3;
 697
 698                                       wextra = (CHAR *) &extra[idx + 4];
 699
 700                                       if (/* Compare the length of the
 701                                              sequence.  */
 702                                           c1 == wextra[0]
 703                                           /* Compare the wide char sequence.  */
 704                                           && (__wmemcmp (startp + 1,
 705                                                          &wextra[1], c1)
 706                                               == 0))
 707                                         /* Yep, this is the entry.  */
 708                                         break;
 709 # else
 710                                       if (/* Compare the length of the
 711                                              sequence.  */
 712                                           c1 == extra[idx]
 713                                           /* Compare the byte sequence.  */
 714                                           && memcmp (startp + 1,
 715                                                      &extra[idx + 1], c1) == 0)
 716                                         /* Yep, this is the entry.  */
 717                                         break;
 718 # endif
 719                                     }
 720
 721                                 if (elem < table_size)
 722                                   {
 723                                     /* Get the collation sequence value.  */
 724                                     is_seqval = true;
 725 # if WIDE_CHAR_VERSION
 726                                     cend = wextra[1 + wextra[0]];
 727 # else
 728                                     idx += 1 + extra[idx];
 729                                     /* Adjust for the alignment.  */
 730                                     idx = (idx + 3) & ~3;
 731                                     cend = *((int32_t *) &extra[idx]);
 732 # endif
 733                                   }
 734                                 else if (c1 == 1)
 735                                   {
 736                                     cend = startp[1];
 737                                     c = *p++;
 738                                   }
 739                                 else
 740                                   return FNM_NOMATCH;
 741                               }
 742                           }
 743                         else
 744                           {
 745                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 746                               cend = *p++;
 747                             if (cend == L_('\0'))
 748                               return FNM_NOMATCH;
 749                             cend = FOLD (cend);
 750                           }
 751
 752                         /* XXX It is not entirely clear to me how to handle
 753                            characters which are not mentioned in the
 754                            collation specification.  */
 755                         if (
 756 # if WIDE_CHAR_VERSION
 757                             lcollseq == 0xffffffff ||
 758 # endif
 759                             lcollseq <= fcollseq)
 760                           {
 761                             /* We have to look at the upper bound.  */
 762                             uint32_t hcollseq;
 763
 764                             if (is_seqval)
 765                               hcollseq = cend;
 766                             else
 767                               {
 768 # if WIDE_CHAR_VERSION
 769                                 hcollseq =
 770                                   __collseq_table_lookup (collseq, cend);
 771                                 if (hcollseq == ~((uint32_t) 0))
 772                                   {
 773                                     /* Hum, no information about the upper
 774                                        bound.  The matching succeeds if the
 775                                        lower bound is matched exactly.  */
 776                                     if (lcollseq != fcollseq)
 777                                       goto range_not_matched;
 778
 779                                     goto matched;
 780                                   }
 781 # else
 782                                 hcollseq = collseq[cend];
 783 # endif
 784                               }
 785
 786                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 787                               goto matched;
 788                           }
 789 # if WIDE_CHAR_VERSION
 790                       range_not_matched:
 791 # endif
 792 #else
 793                         /* We use a boring value comparison of the character
 794                            values.  This is better than comparing using
 795                            'strcoll' since the latter would have surprising
 796                            and sometimes fatal consequences.  */
 797                         UCHAR cend = *p++;
 798
 799                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 800                           cend = *p++;
 801                         if (cend == L_('\0'))
 802                           return FNM_NOMATCH;
 803
 804                         /* It is a range.  */
 805                         if ((UCHAR) cold <= fn && fn <= cend)
 806                           goto matched;
 807 #endif
 808
 809                         c = *p++;
 810                       }
 811                   }
 812
 813                 if (c == L_(']'))
 814                   break;
 815               }
 816
 817             if (!not)
 818               return FNM_NOMATCH;
 819             break;
 820
 821           matched:
 822             /* Skip the rest of the [...] that already matched.  */
 823             while ((c = *p++) != L_(']'))
 824               {
 825                 if (c == L_('\0'))
 826                   {
 827                     /* [ unterminated, treat as normal character.  */
 828                     p = p_init;
 829                     n = n_init;
 830                     c = L_('[');
 831                     goto normal_match;
 832                   }
 833
 834                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 835                   {
 836                     if (*p == L_('\0'))
 837                       return FNM_NOMATCH;
 838                     /* XXX 1003.2d11 is unclear if this is right.  */
 839                     ++p;
 840                   }
 841                 else if (c == L_('[') && *p == L_(':'))
 842                   {
 843                     int c1 = 0;
 844                     const CHAR *startp = p;
 845
 846                     while (1)
 847                       {
 848                         c = *++p;
 849                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 850                           return FNM_NOMATCH;
 851
 852                         if (*p == L_(':') && p[1] == L_(']'))
 853                           break;
 854
 855                         if (c < L_('a') || c >= L_('z'))
 856                           {
 857                             p = startp - 2;
 858                             break;
 859                           }
 860                       }
 861                     p += 2;
 862                   }
 863                 else if (c == L_('[') && *p == L_('='))
 864                   {
 865                     c = *++p;
 866                     if (c == L_('\0'))
 867                       return FNM_NOMATCH;
 868                     c = *++p;
 869                     if (c != L_('=') || p[1] != L_(']'))
 870                       return FNM_NOMATCH;
 871                     p += 2;
 872                   }
 873                 else if (c == L_('[') && *p == L_('.'))
 874                   {
 875                     while (1)
 876                       {
 877                         c = *++p;
 878                         if (c == L_('\0'))
 879                           return FNM_NOMATCH;
 880
 881                         if (c == L_('.') && p[1] == L_(']'))
 882                           break;
 883                       }
 884                     p += 2;
 885                   }
 886               }
 887             if (not)
 888               return FNM_NOMATCH;
 889           }
 890           break;
 891
 892         case L_('+'):
 893         case L_('@'):
 894         case L_('!'):
 895           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
 896             {
 897               int res = EXT (c, p, n, string_end, no_leading_period, flags);
 898               if (res != -1)
 899                 return res;
 900             }
 901           goto normal_match;
 902
 903         case L_('/'):
 904           if (NO_LEADING_PERIOD (flags))
 905             {
 906               if (n == string_end || c != (UCHAR) *n)
 907                 return FNM_NOMATCH;
 908
 909               new_no_leading_period = true;
 910               break;
 911             }
 912           FALLTHROUGH;
 913         default:
 914         normal_match:
 915           if (n == string_end || c != FOLD ((UCHAR) *n))
 916             return FNM_NOMATCH;
 917         }
 918
 919       no_leading_period = new_no_leading_period;
 920       ++n;
 921     }
 922
 923   if (n == string_end)
 924     return 0;
 925
 926   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
 927     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 928     return 0;
 929
 930   return FNM_NOMATCH;
 931 }
 932
 933
 934 static const CHAR *
 935 END (const CHAR *pattern)
 936 {
 937   const CHAR *p = pattern;
 938
 939   while (1)
 940     if (*++p == L_('\0'))
 941       /* This is an invalid pattern.  */
 942       return pattern;
 943     else if (*p == L_('['))
 944       {
 945         /* Handle brackets special.  */
 946         if (posixly_correct == 0)
 947           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 948
 949         /* Skip the not sign.  We have to recognize it because of a possibly
 950            following ']'.  */
 951         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
 952           ++p;
 953         /* A leading ']' is recognized as such.  */
 954         if (*p == L_(']'))
 955           ++p;
 956         /* Skip over all characters of the list.  */
 957         while (*p != L_(']'))
 958           if (*p++ == L_('\0'))
 959             /* This is no valid pattern.  */
 960             return pattern;
 961       }
 962     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
 963               || *p == L_('!')) && p[1] == L_('('))
 964       {
 965         p = END (p + 1);
 966         if (*p == L_('\0'))
 967           /* This is an invalid pattern.  */
 968           return pattern;
 969       }
 970     else if (*p == L_(')'))
 971       break;
 972
 973   return p + 1;
 974 }
 975
 976 #if WIDE_CHAR_VERSION
 977 # define PATTERN_PREFIX pattern_list
 978 #else
 979 # define PATTERN_PREFIX wpattern_list
 980 #endif
 981
 982 #define PASTE(a,b)                 PASTE1(a,b)
 983 #define PASTE1(a,b)                a##b
 984
 985 #define DYNARRAY_STRUCT            PATTERN_PREFIX
 986 #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
 987 #define DYNARRAY_ELEMENT           CHAR *
 988 #define DYNARRAY_PREFIX            PASTE(PATTERN_PREFIX,_)
 989 #define DYNARRAY_INITIAL_SIZE      8
 990 #include <malloc/dynarray-skeleton.c>
 991
 992 static int
 993 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 994      bool no_leading_period, int flags)
 995 {
 996   const CHAR *startp;
 997   ptrdiff_t level;
 998   struct PATTERN_PREFIX list;
 999   size_t pattern_len = STRLEN (pattern);
1000   size_t pattern_i = 0;
1001   const CHAR *p;
1002   const CHAR *rs;
1003   int retval = 0;
1004
1005   PASTE (PATTERN_PREFIX, _init) (&list);
1006
1007   /* Parse the pattern.  Store the individual parts in the list.  */
1008   level = 0;
1009   for (startp = p = pattern + 1; level >= 0; ++p)
1010     if (*p == L_('\0'))
1011       {
1012         /* This is an invalid pattern.  */
1013         retval = -1;
1014         goto out;
1015       }
1016     else if (*p == L_('['))
1017       {
1018         /* Handle brackets special.  */
1019         if (posixly_correct == 0)
1020           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1021
1022         /* Skip the not sign.  We have to recognize it because of a possibly
1023            following ']'.  */
1024         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1025           ++p;
1026         /* A leading ']' is recognized as such.  */
1027         if (*p == L_(']'))
1028           ++p;
1029         /* Skip over all characters of the list.  */
1030         while (*p != L_(']'))
1031           if (*p++ == L_('\0'))
1032             {
1033               /* This is no valid pattern.  */
1034               retval = -1;
1035               goto out;
1036             }
1037       }
1038     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1039               || *p == L_('!')) && p[1] == L_('('))
1040       /* Remember the nesting level.  */
1041       ++level;
1042     else if (*p == L_(')') || *p == L_('|'))
1043       {
1044         if (level == 0)
1045           {
1046             size_t slen = opt == L_('?') || opt == L_('@')
1047                           ? pattern_len : p - startp + 1;
1048             CHAR *newp = malloc (slen * sizeof (CHAR));
1049             if (newp != NULL)
1050               {
1051                 *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
1052                 PASTE (PATTERN_PREFIX,_add) (&list, newp);
1053               }
1054             if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
1055               {
1056                 retval = -2;
1057                 goto out;
1058               }
1059
1060             if (*p == L_('|'))
1061               startp = p + 1;
1062           }
1063         if (*p == L_(')'))
1064           level--;
1065       }
1066   assert (p[-1] == L_(')'));
1067
1068   switch (opt)
1069     {
1070     case L_('*'):
1071       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1072         goto success;
1073       FALLTHROUGH;
1074     case L_('+'):
1075       for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1076         {
1077           for (rs = string; rs <= string_end; ++rs)
1078             /* First match the prefix with the current pattern with the
1079                current pattern.  */
1080             if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1081                      rs, no_leading_period,
1082                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1083                      NULL) == 0
1084                 /* This was successful.  Now match the rest with the rest
1085                    of the pattern.  */
1086                 && (FCT (p, rs, string_end,
1087                          rs == string
1088                          ? no_leading_period
1089                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1090                          flags & FNM_FILE_NAME
1091                          ? flags : flags & ~FNM_PERIOD, NULL) == 0
1092                     /* This didn't work.  Try the whole pattern.  */
1093                     || (rs != string
1094                         && FCT (pattern - 1, rs, string_end,
1095                                 rs == string
1096                                 ? no_leading_period
1097                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1098                                 flags & FNM_FILE_NAME
1099                                 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1100               /* It worked.  Signal success.  */
1101               goto success;
1102         }
1103
1104       /* None of the patterns lead to a match.  */
1105       retval = FNM_NOMATCH;
1106       break;
1107
1108     case L_('?'):
1109       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1110         goto success;
1111       FALLTHROUGH;
1112     case L_('@'):
1113       for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1114         {
1115           /* I cannot believe it but `strcat' is actually acceptable
1116              here.  Match the entire string with the prefix from the
1117              pattern list and the rest of the pattern following the
1118              pattern list.  */
1119           if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1120                    string, string_end, no_leading_period,
1121                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1122                    NULL) == 0)
1123             /* It worked.  Signal success.  */
1124             goto success;
1125         }
1126
1127       /* None of the patterns lead to a match.  */
1128       retval = FNM_NOMATCH;
1129       break;
1130
1131     case L_('!'):
1132       for (rs = string; rs <= string_end; ++rs)
1133         {
1134           size_t runp_i;
1135
1136           for (runp_i = pattern_i;
1137                runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1138                runp_i++)
1139             {
1140               if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1141                        no_leading_period,
1142                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1143                        NULL) == 0)
1144               break;
1145             }
1146
1147           /* If none of the patterns matched see whether the rest does.  */
1148           if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1149               && (FCT (p, rs, string_end,
1150                        rs == string
1151                        ? no_leading_period
1152                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1153                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1154                        NULL) == 0))
1155             /* This is successful.  */
1156             goto success;
1157         }
1158
1159       /* None of the patterns together with the rest of the pattern
1160          lead to a match.  */
1161       retval = FNM_NOMATCH;
1162       break;
1163
1164     default:
1165       assert (! "Invalid extended matching operator");
1166       retval = -1;
1167       break;
1168     }
1169
1170  success:
1171  out:
1172   PASTE (PATTERN_PREFIX, _free) (&list);
1173
1174   return retval;
1175 }
1176
1177 #undef PATTERN_PREFIX
1178 #undef PASTE
1179 #undef PASTE1
1180
1181 #undef FOLD
1182 #undef CHAR
1183 #undef UCHAR
1184 #undef INT
1185 #undef FCT
1186 #undef EXT
1187 #undef END
1188 #undef STRUCT
1189 #undef MEMPCPY
1190 #undef MEMCHR
1191 #undef STRLEN
1192 #undef STRCAT
1193 #undef L_
1194 #undef BTOWC
1195 #undef WIDE_CHAR_VERSION
1196 #undef FINDIDX