posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-2024 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <https://www.gnu.org/licenses/>.  */
  17
  18 #ifdef _LIBC
  19 # include <stdint.h>
  20 #endif
  21
  22 struct STRUCT
  23 {
  24   const CHAR *pattern;
  25   const CHAR *string;
  26   bool no_leading_period;
  27 };
  28
  29 /* Match STRING against the file name pattern PATTERN, returning zero if
  30    it matches, nonzero if not.  */
  31 static int FCT (const CHAR *pattern, const CHAR *string,
  32                 const CHAR *string_end, bool no_leading_period, int flags,
  33                 struct STRUCT *ends);
  34 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  35                 const CHAR *string_end, bool no_leading_period, int flags);
  36 static const CHAR *END (const CHAR *patternp);
  37
  38 static int
  39 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  40      bool no_leading_period, int flags, struct STRUCT *ends)
  41 {
  42   const CHAR *p = pattern, *n = string;
  43   UCHAR c;
  44 #ifdef _LIBC
  45 # if WIDE_CHAR_VERSION
  46   const char *collseq = (const char *)
  47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  48 # else
  49   const UCHAR *collseq = (const UCHAR *)
  50     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  51 # endif
  52 #endif
  53
  54   while ((c = *p++) != L_('\0'))
  55     {
  56       bool new_no_leading_period = false;
  57       c = FOLD (c);
  58
  59       switch (c)
  60         {
  61         case L_('?'):
  62           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
  63             {
  64               int res = EXT (c, p, n, string_end, no_leading_period, flags);
  65               if (res != -1)
  66                 return res;
  67             }
  68
  69           if (n == string_end)
  70             return FNM_NOMATCH;
  71           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  72             return FNM_NOMATCH;
  73           else if (*n == L_('.') && no_leading_period)
  74             return FNM_NOMATCH;
  75           break;
  76
  77         case L_('\\'):
  78           if (!(flags & FNM_NOESCAPE))
  79             {
  80               c = *p++;
  81               if (c == L_('\0'))
  82                 /* Trailing \ loses.  */
  83                 return FNM_NOMATCH;
  84               c = FOLD (c);
  85             }
  86           if (n == string_end || FOLD ((UCHAR) *n) != c)
  87             return FNM_NOMATCH;
  88           break;
  89
  90         case L_('*'):
  91           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
  92             {
  93               int res = EXT (c, p, n, string_end, no_leading_period, flags);
  94               if (res != -1)
  95                 return res;
  96             }
  97           else if (ends != NULL)
  98             {
  99               ends->pattern = p - 1;
 100               ends->string = n;
 101               ends->no_leading_period = no_leading_period;
 102               return 0;
 103             }
 104
 105           if (n != string_end && *n == L_('.') && no_leading_period)
 106             return FNM_NOMATCH;
 107
 108           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
 109             {
 110               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
 111                 {
 112                   const CHAR *endp = END (p);
 113                   if (endp != p)
 114                     {
 115                       /* This is a pattern.  Skip over it.  */
 116                       p = endp;
 117                       continue;
 118                     }
 119                 }
 120
 121               if (c == L_('?'))
 122                 {
 123                   /* A ? needs to match one character.  */
 124                   if (n == string_end)
 125                     /* There isn't another character; no match.  */
 126                     return FNM_NOMATCH;
 127                   else if (*n == L_('/')
 128                            && __glibc_unlikely (flags & FNM_FILE_NAME))
 129                     /* A slash does not match a wildcard under
 130                        FNM_FILE_NAME.  */
 131                     return FNM_NOMATCH;
 132                   else
 133                     /* One character of the string is consumed in matching
 134                        this ? wildcard, so *??? won't match if there are
 135                        less than three characters.  */
 136                     ++n;
 137                 }
 138             }
 139
 140           if (c == L_('\0'))
 141             /* The wildcard(s) is/are the last element of the pattern.
 142                If the name is a file name and contains another slash
 143                this means it cannot match, unless the FNM_LEADING_DIR
 144                flag is set.  */
 145             {
 146               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 147
 148               if (flags & FNM_FILE_NAME)
 149                 {
 150                   if (flags & FNM_LEADING_DIR)
 151                     result = 0;
 152                   else
 153                     {
 154                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
 155                         result = 0;
 156                     }
 157                 }
 158
 159               return result;
 160             }
 161           else
 162             {
 163               const CHAR *endp;
 164               struct STRUCT end;
 165
 166               end.pattern = NULL;
 167               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
 168                              string_end - n);
 169               if (endp == NULL)
 170                 endp = string_end;
 171
 172               if (c == L_('[')
 173                   || (__glibc_unlikely (flags & FNM_EXTMATCH)
 174                       && (c == L_('@') || c == L_('+') || c == L_('!'))
 175                       && *p == L_('(')))
 176                 {
 177                   int flags2 = ((flags & FNM_FILE_NAME)
 178                                 ? flags : (flags & ~FNM_PERIOD));
 179
 180                   for (--p; n < endp; ++n, no_leading_period = false)
 181                     if (FCT (p, n, string_end, no_leading_period, flags2,
 182                              &end) == 0)
 183                       goto found;
 184                 }
 185               else if (c == L_('/') && (flags & FNM_FILE_NAME))
 186                 {
 187                   while (n < string_end && *n != L_('/'))
 188                     ++n;
 189                   if (n < string_end && *n == L_('/')
 190                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 191                                NULL) == 0))
 192                     return 0;
 193                 }
 194               else
 195                 {
 196                   int flags2 = ((flags & FNM_FILE_NAME)
 197                                 ? flags : (flags & ~FNM_PERIOD));
 198
 199                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
 200                     c = *p;
 201                   c = FOLD (c);
 202                   for (--p; n < endp; ++n, no_leading_period = false)
 203                     if (FOLD ((UCHAR) *n) == c
 204                         && (FCT (p, n, string_end, no_leading_period, flags2,
 205                                  &end) == 0))
 206                       {
 207                       found:
 208                         if (end.pattern == NULL)
 209                           return 0;
 210                         break;
 211                       }
 212                   if (end.pattern != NULL)
 213                     {
 214                       p = end.pattern;
 215                       n = end.string;
 216                       no_leading_period = end.no_leading_period;
 217                       continue;
 218                     }
 219                 }
 220             }
 221
 222           /* If we come here no match is possible with the wildcard.  */
 223           return FNM_NOMATCH;
 224
 225         case L_('['):
 226           {
 227             /* Nonzero if the sense of the character class is inverted.  */
 228             const CHAR *p_init = p;
 229             const CHAR *n_init = n;
 230             bool not;
 231             CHAR cold;
 232             UCHAR fn;
 233
 234             if (posixly_correct == 0)
 235               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 236
 237             if (n == string_end)
 238               return FNM_NOMATCH;
 239
 240             if (*n == L_('.') && no_leading_period)
 241               return FNM_NOMATCH;
 242
 243             if (*n == L_('/') && (flags & FNM_FILE_NAME))
 244               /* '/' cannot be matched.  */
 245               return FNM_NOMATCH;
 246
 247             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
 248             if (not)
 249               ++p;
 250
 251             fn = FOLD ((UCHAR) *n);
 252
 253             c = *p++;
 254             for (;;)
 255               {
 256                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 257                   {
 258                     if (*p == L_('\0'))
 259                       return FNM_NOMATCH;
 260                     c = FOLD ((UCHAR) *p);
 261                     ++p;
 262
 263                     goto normal_bracket;
 264                   }
 265                 else if (c == L_('[') && *p == L_(':'))
 266                   {
 267                     /* Leave room for the null.  */
 268                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 269                     size_t c1 = 0;
 270                     wctype_t wt;
 271                     const CHAR *startp = p;
 272
 273                     for (;;)
 274                       {
 275                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 276                           /* The name is too long and therefore the pattern
 277                              is ill-formed.  */
 278                           return FNM_NOMATCH;
 279
 280                         c = *++p;
 281                         if (c == L_(':') && p[1] == L_(']'))
 282                           {
 283                             p += 2;
 284                             break;
 285                           }
 286                         if (c < L_('a') || c >= L_('z'))
 287                           {
 288                             /* This cannot possibly be a character class name.
 289                                Match it as a normal range.  */
 290                             p = startp;
 291                             c = L_('[');
 292                             goto normal_bracket;
 293                           }
 294                         str[c1++] = c;
 295                       }
 296                     str[c1] = L_('\0');
 297
 298                     wt = IS_CHAR_CLASS (str);
 299                     if (wt == 0)
 300                       /* Invalid character class name.  */
 301                       return FNM_NOMATCH;
 302
 303 #if defined _LIBC && ! WIDE_CHAR_VERSION
 304                     /* The following code is glibc specific but does
 305                        there a good job in speeding up the code since
 306                        we can avoid the btowc() call.  */
 307                     if (_ISCTYPE ((UCHAR) *n, wt))
 308                       goto matched;
 309 #else
 310                     if (iswctype (BTOWC ((UCHAR) *n), wt))
 311                       goto matched;
 312 #endif
 313                     c = *p++;
 314                   }
 315 #ifdef _LIBC
 316                 else if (c == L_('[') && *p == L_('='))
 317                   {
 318                     /* It's important that STR be a scalar variable rather
 319                        than a one-element array, because GCC (at least 4.9.2
 320                        -O2 on x86-64) can be confused by the array and
 321                        diagnose a "used initialized" in a dead branch in the
 322                        findidx function.  */
 323                     UCHAR str;
 324                     uint32_t nrules =
 325                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 326                     const CHAR *startp = p;
 327
 328                     c = *++p;
 329                     if (c == L_('\0'))
 330                       {
 331                         p = startp;
 332                         c = L_('[');
 333                         goto normal_bracket;
 334                       }
 335                     str = c;
 336
 337                     c = *++p;
 338                     if (c != L_('=') || p[1] != L_(']'))
 339                       {
 340                         p = startp;
 341                         c = L_('[');
 342                         goto normal_bracket;
 343                       }
 344                     p += 2;
 345
 346                     if (nrules == 0)
 347                       {
 348                         if ((UCHAR) *n == str)
 349                           goto matched;
 350                       }
 351                     else
 352                       {
 353                         const int32_t *table;
 354 # if WIDE_CHAR_VERSION
 355                         const int32_t *weights;
 356                         const wint_t *extra;
 357 # else
 358                         const unsigned char *weights;
 359                         const unsigned char *extra;
 360 # endif
 361                         const int32_t *indirect;
 362                         int32_t idx;
 363                         const UCHAR *cp = (const UCHAR *) &str;
 364
 365 # if WIDE_CHAR_VERSION
 366                         table = (const int32_t *)
 367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 368                         weights = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 370                         extra = (const wint_t *)
 371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 372                         indirect = (const int32_t *)
 373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 374 # else
 375                         table = (const int32_t *)
 376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 377                         weights = (const unsigned char *)
 378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 379                         extra = (const unsigned char *)
 380                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 381                         indirect = (const int32_t *)
 382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 383 # endif
 384
 385                         idx = FINDIDX (table, indirect, extra, &cp, 1);
 386                         if (idx != 0)
 387                           {
 388                             /* We found a table entry.  Now see whether the
 389                                character we are currently at has the same
 390                                equivalence class value.  */
 391                             int len = weights[idx & 0xffffff];
 392                             int32_t idx2;
 393                             const UCHAR *np = (const UCHAR *) n;
 394
 395                             idx2 = FINDIDX (table, indirect, extra,
 396                                             &np, string_end - n);
 397                             if (idx2 != 0
 398                                 && (idx >> 24) == (idx2 >> 24)
 399                                 && len == weights[idx2 & 0xffffff])
 400                               {
 401                                 int cnt = 0;
 402
 403                                 idx &= 0xffffff;
 404                                 idx2 &= 0xffffff;
 405
 406                                 while (cnt < len
 407                                        && (weights[idx + 1 + cnt]
 408                                            == weights[idx2 + 1 + cnt]))
 409                                   ++cnt;
 410
 411                                 if (cnt == len)
 412                                   goto matched;
 413                               }
 414                           }
 415                       }
 416
 417                     c = *p++;
 418                   }
 419 #endif
 420                 else if (c == L_('\0'))
 421                   {
 422                     /* [ unterminated, treat as normal character.  */
 423                     p = p_init;
 424                     n = n_init;
 425                     c = L_('[');
 426                     goto normal_match;
 427                   }
 428                 else
 429                   {
 430                     bool is_range = false;
 431
 432 #ifdef _LIBC
 433                     bool is_seqval = false;
 434
 435                     if (c == L_('[') && *p == L_('.'))
 436                       {
 437                         uint32_t nrules =
 438                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 439                         const CHAR *startp = p;
 440                         size_t c1 = 0;
 441
 442                         while (1)
 443                           {
 444                             c = *++p;
 445                             if (c == L_('.') && p[1] == L_(']'))
 446                               {
 447                                 p += 2;
 448                                 break;
 449                               }
 450                             if (c == '\0')
 451                               return FNM_NOMATCH;
 452                             ++c1;
 453                           }
 454
 455                         /* We have to handling the symbols differently in
 456                            ranges since then the collation sequence is
 457                            important.  */
 458                         is_range = *p == L_('-') && p[1] != L_('\0');
 459
 460                         if (nrules == 0)
 461                           {
 462                             /* There are no names defined in the collation
 463                                data.  Therefore we only accept the trivial
 464                                names consisting of the character itself.  */
 465                             if (c1 != 1)
 466                               return FNM_NOMATCH;
 467
 468                             if (!is_range && *n == startp[1])
 469                               goto matched;
 470
 471                             cold = startp[1];
 472                             c = *p++;
 473                           }
 474                         else
 475                           {
 476                             int32_t table_size;
 477                             const int32_t *symb_table;
 478                             const unsigned char *extra;
 479                             int32_t idx;
 480                             int32_t elem;
 481 # if WIDE_CHAR_VERSION
 482                             CHAR *wextra;
 483 # endif
 484
 485                             table_size =
 486                               _NL_CURRENT_WORD (LC_COLLATE,
 487                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 488                             symb_table = (const int32_t *)
 489                               _NL_CURRENT (LC_COLLATE,
 490                                            _NL_COLLATE_SYMB_TABLEMB);
 491                             extra = (const unsigned char *)
 492                               _NL_CURRENT (LC_COLLATE,
 493                                            _NL_COLLATE_SYMB_EXTRAMB);
 494
 495                             for (elem = 0; elem < table_size; elem++)
 496                               if (symb_table[2 * elem] != 0)
 497                                 {
 498                                   idx = symb_table[2 * elem + 1];
 499                                   /* Skip the name of collating element.  */
 500                                   idx += 1 + extra[idx];
 501 # if WIDE_CHAR_VERSION
 502                                   /* Skip the byte sequence of the
 503                                      collating element.  */
 504                                   idx += 1 + extra[idx];
 505                                   /* Adjust for the alignment.  */
 506                                   idx = (idx + 3) & ~3;
 507
 508                                   wextra = (CHAR *) &extra[idx + 4];
 509
 510                                   if (/* Compare the length of the sequence.  */
 511                                       c1 == wextra[0]
 512                                       /* Compare the wide char sequence.  */
 513                                       && (__wmemcmp (startp + 1, &wextra[1],
 514                                                      c1)
 515                                           == 0))
 516                                     /* Yep, this is the entry.  */
 517                                     break;
 518 # else
 519                                   if (/* Compare the length of the sequence.  */
 520                                       c1 == extra[idx]
 521                                       /* Compare the byte sequence.  */
 522                                       && memcmp (startp + 1,
 523                                                  &extra[idx + 1], c1) == 0)
 524                                     /* Yep, this is the entry.  */
 525                                     break;
 526 # endif
 527                                 }
 528
 529                             if (elem < table_size)
 530                               {
 531                                 /* Compare the byte sequence but only if
 532                                    this is not part of a range.  */
 533
 534                                 /* The compiler might warn that idx may be
 535                                    used uninitialized, however it will be
 536                                    reached iff elem < table_size which means
 537                                    that it was properly set in the loop
 538                                    above.   */
 539                                 DIAG_PUSH_NEEDS_COMMENT;
 540                                 DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
 541                                 if (! is_range
 542
 543 # if WIDE_CHAR_VERSION
 544                                     && __wmemcmp (n, &wextra[1], c1) == 0
 545 # else
 546                                     && memcmp (n, &extra[idx + 1], c1) == 0
 547 # endif
 548                                     )
 549                                   {
 550                                     n += c1 - 1;
 551                                     goto matched;
 552                                   }
 553                                 DIAG_POP_NEEDS_COMMENT;
 554
 555                                 /* Get the collation sequence value.  */
 556                                 is_seqval = true;
 557 # if WIDE_CHAR_VERSION
 558                                 /* The compile might warn that wextra may be
 559                                    used uninitialized and similar to 'idx'
 560                                    above it will be properly set by the loop.
 561                                    */
 562                                 DIAG_PUSH_NEEDS_COMMENT;
 563                                 DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
 564                                 cold = wextra[1 + wextra[0]];
 565                                 DIAG_POP_NEEDS_COMMENT;
 566 # else
 567                                 idx += 1 + extra[idx];
 568                                 /* Adjust for the alignment.  */
 569                                 idx = (idx + 3) & ~3;
 570                                 cold = *((int32_t *) &extra[idx]);
 571 # endif
 572
 573                                 c = *p++;
 574                               }
 575                             else if (c1 == 1)
 576                               {
 577                                 /* No valid character.  Match it as a
 578                                    single byte.  */
 579                                 if (!is_range && *n == startp[1])
 580                                   goto matched;
 581
 582                                 cold = startp[1];
 583                                 c = *p++;
 584                               }
 585                             else
 586                               return FNM_NOMATCH;
 587                           }
 588                       }
 589                     else
 590 #endif
 591                       {
 592                         c = FOLD (c);
 593                       normal_bracket:
 594
 595                         /* We have to handling the symbols differently in
 596                            ranges since then the collation sequence is
 597                            important.  */
 598                         is_range = (*p == L_('-') && p[1] != L_('\0')
 599                                     && p[1] != L_(']'));
 600
 601                         if (!is_range && c == fn)
 602                           goto matched;
 603
 604 #if _LIBC
 605                         /* This is needed if we goto normal_bracket; from
 606                            outside of is_seqval's scope.  */
 607                         is_seqval = false;
 608 #endif
 609                         cold = c;
 610                         c = *p++;
 611                       }
 612
 613                     if (c == L_('-') && *p != L_(']'))
 614                       {
 615 #if _LIBC
 616                         /* We have to find the collation sequence
 617                            value for C.  Collation sequence is nothing
 618                            we can regularly access.  The sequence
 619                            value is defined by the order in which the
 620                            definitions of the collation values for the
 621                            various characters appear in the source
 622                            file.  A strange concept, nowhere
 623                            documented.  */
 624                         uint32_t fcollseq;
 625                         uint32_t lcollseq;
 626                         UCHAR cend = *p++;
 627
 628 # if WIDE_CHAR_VERSION
 629                         /* Search in the 'names' array for the characters.  */
 630                         fcollseq = __collseq_table_lookup (collseq, fn);
 631                         if (fcollseq == ~((uint32_t) 0))
 632                           /* XXX We don't know anything about the character
 633                              we are supposed to match.  This means we are
 634                              failing.  */
 635                           goto range_not_matched;
 636
 637                         if (is_seqval)
 638                           lcollseq = cold;
 639                         else
 640                           lcollseq = __collseq_table_lookup (collseq, cold);
 641 # else
 642                         fcollseq = collseq[fn];
 643                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 644 # endif
 645
 646                         is_seqval = false;
 647                         if (cend == L_('[') && *p == L_('.'))
 648                           {
 649                             uint32_t nrules =
 650                               _NL_CURRENT_WORD (LC_COLLATE,
 651                                                 _NL_COLLATE_NRULES);
 652                             const CHAR *startp = p;
 653                             size_t c1 = 0;
 654
 655                             while (1)
 656                               {
 657                                 c = *++p;
 658                                 if (c == L_('.') && p[1] == L_(']'))
 659                                   {
 660                                     p += 2;
 661                                     break;
 662                                   }
 663                                 if (c == '\0')
 664                                   return FNM_NOMATCH;
 665                                 ++c1;
 666                               }
 667
 668                             if (nrules == 0)
 669                               {
 670                                 /* There are no names defined in the
 671                                    collation data.  Therefore we only
 672                                    accept the trivial names consisting
 673                                    of the character itself.  */
 674                                 if (c1 != 1)
 675                                   return FNM_NOMATCH;
 676
 677                                 cend = startp[1];
 678                               }
 679                             else
 680                               {
 681                                 int32_t table_size;
 682                                 const int32_t *symb_table;
 683                                 const unsigned char *extra;
 684                                 int32_t idx;
 685                                 int32_t elem;
 686 # if WIDE_CHAR_VERSION
 687                                 CHAR *wextra;
 688 # endif
 689
 690                                 table_size =
 691                                   _NL_CURRENT_WORD (LC_COLLATE,
 692                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 693                                 symb_table = (const int32_t *)
 694                                   _NL_CURRENT (LC_COLLATE,
 695                                                _NL_COLLATE_SYMB_TABLEMB);
 696                                 extra = (const unsigned char *)
 697                                   _NL_CURRENT (LC_COLLATE,
 698                                                _NL_COLLATE_SYMB_EXTRAMB);
 699
 700                                 for (elem = 0; elem < table_size; elem++)
 701                                   if (symb_table[2 * elem] != 0)
 702                                     {
 703                                       idx = symb_table[2 * elem + 1];
 704                                       /* Skip the name of collating
 705                                          element.  */
 706                                       idx += 1 + extra[idx];
 707 # if WIDE_CHAR_VERSION
 708                                       /* Skip the byte sequence of the
 709                                          collating element.  */
 710                                       idx += 1 + extra[idx];
 711                                       /* Adjust for the alignment.  */
 712                                       idx = (idx + 3) & ~3;
 713
 714                                       wextra = (CHAR *) &extra[idx + 4];
 715
 716                                       if (/* Compare the length of the
 717                                              sequence.  */
 718                                           c1 == wextra[0]
 719                                           /* Compare the wide char sequence.  */
 720                                           && (__wmemcmp (startp + 1,
 721                                                          &wextra[1], c1)
 722                                               == 0))
 723                                         /* Yep, this is the entry.  */
 724                                         break;
 725 # else
 726                                       if (/* Compare the length of the
 727                                              sequence.  */
 728                                           c1 == extra[idx]
 729                                           /* Compare the byte sequence.  */
 730                                           && memcmp (startp + 1,
 731                                                      &extra[idx + 1], c1) == 0)
 732                                         /* Yep, this is the entry.  */
 733                                         break;
 734 # endif
 735                                     }
 736
 737                                 if (elem < table_size)
 738                                   {
 739                                     /* Get the collation sequence value.  */
 740                                     is_seqval = true;
 741 # if WIDE_CHAR_VERSION
 742                                     /* The compiler might warn that wextra may
 743                                        be used uninitialized, however it will
 744                                        be reached iff elem < table_size which
 745                                        means that it was properly set in the
 746                                        loop above.   */
 747                                     DIAG_PUSH_NEEDS_COMMENT;
 748                                     DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
 749                                     cend = wextra[1 + wextra[0]];
 750                                     DIAG_POP_NEEDS_COMMENT;
 751 # else
 752                                     /* The compile might warn that idx may
 753                                        be used uninitialized and similar to
 754                                        wextra above it will be properly set by
 755                                        the loop.   */
 756                                     DIAG_PUSH_NEEDS_COMMENT;
 757                                     DIAG_IGNORE_Os_NEEDS_COMMENT (8, "-Wmaybe-uninitialized");
 758                                     idx += 1 + extra[idx];
 759                                     DIAG_POP_NEEDS_COMMENT;
 760                                     /* Adjust for the alignment.  */
 761                                     idx = (idx + 3) & ~3;
 762                                     cend = *((int32_t *) &extra[idx]);
 763 # endif
 764                                   }
 765                                 else if (c1 == 1)
 766                                   {
 767                                     cend = startp[1];
 768                                     c = *p++;
 769                                   }
 770                                 else
 771                                   return FNM_NOMATCH;
 772                               }
 773                           }
 774                         else
 775                           {
 776                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 777                               cend = *p++;
 778                             if (cend == L_('\0'))
 779                               return FNM_NOMATCH;
 780                             cend = FOLD (cend);
 781                           }
 782
 783                         /* XXX It is not entirely clear to me how to handle
 784                            characters which are not mentioned in the
 785                            collation specification.  */
 786                         if (
 787 # if WIDE_CHAR_VERSION
 788                             lcollseq == 0xffffffff ||
 789 # endif
 790                             lcollseq <= fcollseq)
 791                           {
 792                             /* We have to look at the upper bound.  */
 793                             uint32_t hcollseq;
 794
 795                             if (is_seqval)
 796                               hcollseq = cend;
 797                             else
 798                               {
 799 # if WIDE_CHAR_VERSION
 800                                 hcollseq =
 801                                   __collseq_table_lookup (collseq, cend);
 802                                 if (hcollseq == ~((uint32_t) 0))
 803                                   {
 804                                     /* Hum, no information about the upper
 805                                        bound.  The matching succeeds if the
 806                                        lower bound is matched exactly.  */
 807                                     if (lcollseq != fcollseq)
 808                                       goto range_not_matched;
 809
 810                                     goto matched;
 811                                   }
 812 # else
 813                                 hcollseq = collseq[cend];
 814 # endif
 815                               }
 816
 817                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 818                               goto matched;
 819                           }
 820 # if WIDE_CHAR_VERSION
 821                       range_not_matched:
 822 # endif
 823 #else
 824                         /* We use a boring value comparison of the character
 825                            values.  This is better than comparing using
 826                            'strcoll' since the latter would have surprising
 827                            and sometimes fatal consequences.  */
 828                         UCHAR cend = *p++;
 829
 830                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 831                           cend = *p++;
 832                         if (cend == L_('\0'))
 833                           return FNM_NOMATCH;
 834
 835                         /* It is a range.  */
 836                         if ((UCHAR) cold <= fn && fn <= cend)
 837                           goto matched;
 838 #endif
 839
 840                         c = *p++;
 841                       }
 842                   }
 843
 844                 if (c == L_(']'))
 845                   break;
 846               }
 847
 848             if (!not)
 849               return FNM_NOMATCH;
 850             break;
 851
 852           matched:
 853             /* Skip the rest of the [...] that already matched.  */
 854             while ((c = *p++) != L_(']'))
 855               {
 856                 if (c == L_('\0'))
 857                   {
 858                     /* [ unterminated, treat as normal character.  */
 859                     p = p_init;
 860                     n = n_init;
 861                     c = L_('[');
 862                     goto normal_match;
 863                   }
 864
 865                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 866                   {
 867                     if (*p == L_('\0'))
 868                       return FNM_NOMATCH;
 869                     /* XXX 1003.2d11 is unclear if this is right.  */
 870                     ++p;
 871                   }
 872                 else if (c == L_('[') && *p == L_(':'))
 873                   {
 874                     int c1 = 0;
 875                     const CHAR *startp = p;
 876
 877                     while (1)
 878                       {
 879                         c = *++p;
 880                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 881                           return FNM_NOMATCH;
 882
 883                         if (*p == L_(':') && p[1] == L_(']'))
 884                           break;
 885
 886                         if (c < L_('a') || c >= L_('z'))
 887                           {
 888                             p = startp - 2;
 889                             break;
 890                           }
 891                       }
 892                     p += 2;
 893                   }
 894                 else if (c == L_('[') && *p == L_('='))
 895                   {
 896                     c = *++p;
 897                     if (c == L_('\0'))
 898                       return FNM_NOMATCH;
 899                     c = *++p;
 900                     if (c != L_('=') || p[1] != L_(']'))
 901                       return FNM_NOMATCH;
 902                     p += 2;
 903                   }
 904                 else if (c == L_('[') && *p == L_('.'))
 905                   {
 906                     while (1)
 907                       {
 908                         c = *++p;
 909                         if (c == L_('\0'))
 910                           return FNM_NOMATCH;
 911
 912                         if (c == L_('.') && p[1] == L_(']'))
 913                           break;
 914                       }
 915                     p += 2;
 916                   }
 917               }
 918             if (not)
 919               return FNM_NOMATCH;
 920           }
 921           break;
 922
 923         case L_('+'):
 924         case L_('@'):
 925         case L_('!'):
 926           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
 927             {
 928               int res = EXT (c, p, n, string_end, no_leading_period, flags);
 929               if (res != -1)
 930                 return res;
 931             }
 932           goto normal_match;
 933
 934         case L_('/'):
 935           if (NO_LEADING_PERIOD (flags))
 936             {
 937               if (n == string_end || c != (UCHAR) *n)
 938                 return FNM_NOMATCH;
 939
 940               new_no_leading_period = true;
 941               break;
 942             }
 943           FALLTHROUGH;
 944         default:
 945         normal_match:
 946           if (n == string_end || c != FOLD ((UCHAR) *n))
 947             return FNM_NOMATCH;
 948         }
 949
 950       no_leading_period = new_no_leading_period;
 951       ++n;
 952     }
 953
 954   if (n == string_end)
 955     return 0;
 956
 957   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
 958     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 959     return 0;
 960
 961   return FNM_NOMATCH;
 962 }
 963
 964
 965 static const CHAR *
 966 END (const CHAR *pattern)
 967 {
 968   const CHAR *p = pattern;
 969
 970   while (1)
 971     if (*++p == L_('\0'))
 972       /* This is an invalid pattern.  */
 973       return pattern;
 974     else if (*p == L_('['))
 975       {
 976         /* Handle brackets special.  */
 977         if (posixly_correct == 0)
 978           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 979
 980         /* Skip the not sign.  We have to recognize it because of a possibly
 981            following ']'.  */
 982         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
 983           ++p;
 984         /* A leading ']' is recognized as such.  */
 985         if (*p == L_(']'))
 986           ++p;
 987         /* Skip over all characters of the list.  */
 988         while (*p != L_(']'))
 989           if (*p++ == L_('\0'))
 990             /* This is no valid pattern.  */
 991             return pattern;
 992       }
 993     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
 994               || *p == L_('!')) && p[1] == L_('('))
 995       {
 996         p = END (p + 1);
 997         if (*p == L_('\0'))
 998           /* This is an invalid pattern.  */
 999           return pattern;
1000       }
1001     else if (*p == L_(')'))
1002       break;
1003
1004   return p + 1;
1005 }
1006
1007 #if WIDE_CHAR_VERSION
1008 # define PATTERN_PREFIX pattern_list
1009 #else
1010 # define PATTERN_PREFIX wpattern_list
1011 #endif
1012
1013 #define PASTE(a,b)                 PASTE1(a,b)
1014 #define PASTE1(a,b)                a##b
1015
1016 #define DYNARRAY_STRUCT            PATTERN_PREFIX
1017 #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
1018 #define DYNARRAY_ELEMENT           CHAR *
1019 #define DYNARRAY_PREFIX            PASTE(PATTERN_PREFIX,_)
1020 #define DYNARRAY_INITIAL_SIZE      8
1021 #include <malloc/dynarray-skeleton.c>
1022
1023 static int
1024 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1025      bool no_leading_period, int flags)
1026 {
1027   const CHAR *startp;
1028   ptrdiff_t level;
1029   struct PATTERN_PREFIX list;
1030   size_t pattern_len = STRLEN (pattern);
1031   size_t pattern_i = 0;
1032   const CHAR *p;
1033   const CHAR *rs;
1034   int retval = 0;
1035
1036   PASTE (PATTERN_PREFIX, _init) (&list);
1037
1038   /* Parse the pattern.  Store the individual parts in the list.  */
1039   level = 0;
1040   for (startp = p = pattern + 1; level >= 0; ++p)
1041     if (*p == L_('\0'))
1042       {
1043         /* This is an invalid pattern.  */
1044         retval = -1;
1045         goto out;
1046       }
1047     else if (*p == L_('['))
1048       {
1049         /* Handle brackets special.  */
1050         if (posixly_correct == 0)
1051           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1052
1053         /* Skip the not sign.  We have to recognize it because of a possibly
1054            following ']'.  */
1055         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1056           ++p;
1057         /* A leading ']' is recognized as such.  */
1058         if (*p == L_(']'))
1059           ++p;
1060         /* Skip over all characters of the list.  */
1061         while (*p != L_(']'))
1062           if (*p++ == L_('\0'))
1063             {
1064               /* This is no valid pattern.  */
1065               retval = -1;
1066               goto out;
1067             }
1068       }
1069     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1070               || *p == L_('!')) && p[1] == L_('('))
1071       /* Remember the nesting level.  */
1072       ++level;
1073     else if (*p == L_(')') || *p == L_('|'))
1074       {
1075         if (level == 0)
1076           {
1077             size_t slen = opt == L_('?') || opt == L_('@')
1078                           ? pattern_len : p - startp + 1;
1079             CHAR *newp = malloc (slen * sizeof (CHAR));
1080             if (newp != NULL)
1081               {
1082                 *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
1083                 PASTE (PATTERN_PREFIX,_add) (&list, newp);
1084               }
1085             if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
1086               {
1087                 retval = -2;
1088                 goto out;
1089               }
1090
1091             if (*p == L_('|'))
1092               startp = p + 1;
1093           }
1094         if (*p == L_(')'))
1095           level--;
1096       }
1097   assert (p[-1] == L_(')'));
1098
1099   switch (opt)
1100     {
1101     case L_('*'):
1102       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1103         goto success;
1104       FALLTHROUGH;
1105     case L_('+'):
1106       for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1107         {
1108           for (rs = string; rs <= string_end; ++rs)
1109             /* First match the prefix with the current pattern with the
1110                current pattern.  */
1111             if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1112                      rs, no_leading_period,
1113                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1114                      NULL) == 0
1115                 /* This was successful.  Now match the rest with the rest
1116                    of the pattern.  */
1117                 && (FCT (p, rs, string_end,
1118                          rs == string
1119                          ? no_leading_period
1120                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1121                          flags & FNM_FILE_NAME
1122                          ? flags : flags & ~FNM_PERIOD, NULL) == 0
1123                     /* This didn't work.  Try the whole pattern.  */
1124                     || (rs != string
1125                         && FCT (pattern - 1, rs, string_end,
1126                                 rs == string
1127                                 ? no_leading_period
1128                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1129                                 flags & FNM_FILE_NAME
1130                                 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1131               /* It worked.  Signal success.  */
1132               goto success;
1133         }
1134
1135       /* None of the patterns lead to a match.  */
1136       retval = FNM_NOMATCH;
1137       break;
1138
1139     case L_('?'):
1140       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1141         goto success;
1142       FALLTHROUGH;
1143     case L_('@'):
1144       for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1145         {
1146           /* I cannot believe it but `strcat' is actually acceptable
1147              here.  Match the entire string with the prefix from the
1148              pattern list and the rest of the pattern following the
1149              pattern list.  */
1150           if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1151                    string, string_end, no_leading_period,
1152                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1153                    NULL) == 0)
1154             /* It worked.  Signal success.  */
1155             goto success;
1156         }
1157
1158       /* None of the patterns lead to a match.  */
1159       retval = FNM_NOMATCH;
1160       break;
1161
1162     case L_('!'):
1163       for (rs = string; rs <= string_end; ++rs)
1164         {
1165           size_t runp_i;
1166
1167           for (runp_i = pattern_i;
1168                runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1169                runp_i++)
1170             {
1171               if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1172                        no_leading_period,
1173                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1174                        NULL) == 0)
1175               break;
1176             }
1177
1178           /* If none of the patterns matched see whether the rest does.  */
1179           if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1180               && (FCT (p, rs, string_end,
1181                        rs == string
1182                        ? no_leading_period
1183                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1184                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1185                        NULL) == 0))
1186             /* This is successful.  */
1187             goto success;
1188         }
1189
1190       /* None of the patterns together with the rest of the pattern
1191          lead to a match.  */
1192       retval = FNM_NOMATCH;
1193       break;
1194
1195     default:
1196       assert (! "Invalid extended matching operator");
1197       retval = -1;
1198       break;
1199     }
1200
1201  success:
1202  out:
1203   PASTE (PATTERN_PREFIX, _free) (&list);
1204
1205   return retval;
1206 }
1207
1208 #undef PATTERN_PREFIX
1209 #undef PASTE
1210 #undef PASTE1
1211
1212 #undef FOLD
1213 #undef CHAR
1214 #undef UCHAR
1215 #undef INT
1216 #undef FCT
1217 #undef EXT
1218 #undef END
1219 #undef STRUCT
1220 #undef MEMPCPY
1221 #undef MEMCHR
1222 #undef STRLEN
1223 #undef STRCAT
1224 #undef L_
1225 #undef BTOWC
1226 #undef WIDE_CHAR_VERSION
1227 #undef FINDIDX