35799: with NO_EXEC, parse parameter subscript expressions
[zsh/mirror.git] / Src / pattern.c
blob8fa1a727f804a098c3f3f078f556d4c6ce63310f
1 /*
2 * pattern.c - pattern matching
4 * This file is part of zsh, the Z shell.
6 * Copyright (c) 1999 Peter Stephenson
7 * All rights reserved.
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
15 * In no event shall Peter Stephenson or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Peter Stephenson and the Zsh Development Group have been advised of
19 * the possibility of such damage.
21 * Peter Stephenson and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Peter Stephenson and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
28 * Pattern matching code derived from the regexp library by Henry
29 * Spencer, which has the following copyright.
31 * Copyright (c) 1986 by University of Toronto.
32 * Written by Henry Spencer. Not derived from licensed software.
34 * Permission is granted to anyone to use this software for any
35 * purpose on any computer system, and to redistribute it freely,
36 * subject to the following restrictions:
38 * 1. The author is not responsible for the consequences of use of
39 * this software, no matter how awful, even if they arise
40 * from defects in it.
42 * 2. The origin of this software must not be misrepresented, either
43 * by explicit claim or by omission.
45 * 3. Altered versions must be plainly marked as such, and must not
46 * be misrepresented as being the original software.
48 * Eagle-eyed readers will notice this is an altered version. Incredibly
49 * sharp-eyed readers might even find bits that weren't altered.
52 * And I experienced a sense that, like certain regular
53 * expressions, seemed to match the day from beginning to end, so
54 * that I did not need to identify the parenthesised subexpression
55 * that told of dawn, nor the group of characters that indicated
56 * the moment when my grandfather returned home with news of
57 * Swann's departure for Paris; and the whole length of the month
58 * of May, as if matched by a closure, fitted into the buffer of my
59 * life with no sign of overflowing, turning the days, like a
60 * procession of insects that could consist of this or that
61 * species, into a random and unstructured repetition of different
62 * sequences, anchored from the first day of the month to the last
63 * in the same fashion as the weeks when I knew I would not see
64 * Gilberte and would search in vain for any occurrences of the
65 * string in the avenue of hawthorns by Tansonville, without my
66 * having to delimit explicitly the start or finish of the pattern.
68 * M. Proust, "In Search of Lost Files",
69 * bk I, "The Walk by Bourne's Place".
72 #include "zsh.mdh"
75 * The following union is used mostly for alignment purposes.
76 * Normal nodes are longs, while certain nodes take a char * as an argument;
77 * here we make sure that they both work out to the same length.
78 * The compiled regexp we construct consists of upats stuck together;
79 * anything else to be added (strings, numbers) is stuck after and
80 * then aligned to a whole number of upat units.
82 * Note also that offsets are in terms of the sizes of these things.
84 union upat {
85 long l;
86 unsigned char *p;
89 typedef union upat *Upat;
91 #include "pattern.pro"
93 /* Number of active parenthesized expressions allowed in backreferencing */
94 #define NSUBEXP 9
96 /* definition number opnd? meaning */
97 #define P_END 0x00 /* no End of program. */
98 #define P_EXCSYNC 0x01 /* no Test if following exclude already failed */
99 #define P_EXCEND 0x02 /* no Test if exclude matched orig branch */
100 #define P_BACK 0x03 /* no Match "", "next" ptr points backward. */
101 #define P_EXACTLY 0x04 /* lstr Match this string. */
102 #define P_NOTHING 0x05 /* no Match empty string. */
103 #define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */
104 #define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */
105 #define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */
106 #define P_ISSTART 0x09 /* no Match start of string. */
107 #define P_ISEND 0x0a /* no Match end of string. */
108 #define P_COUNTSTART 0x0b /* no Initialise P_COUNT */
109 #define P_COUNT 0x0c /* 3*long uc* node Match a number of repetitions */
110 /* numbered so we can test bit 5 for a branch */
111 #define P_BRANCH 0x20 /* node Match this alternative, or the next... */
112 #define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */
113 /* excludes are also branches, but have bit 4 set, too */
114 #define P_EXCLUDE 0x30 /* uc* node Exclude this from previous branch */
115 #define P_EXCLUDP 0x31 /* uc* node Exclude, using full file path so far */
116 /* numbered so we can test bit 6 so as not to match initial '.' */
117 #define P_ANY 0x40 /* no Match any one character. */
118 #define P_ANYOF 0x41 /* str Match any character in this string. */
119 #define P_ANYBUT 0x42 /* str Match any character not in this string. */
120 #define P_STAR 0x43 /* no Match any set of characters. */
121 #define P_NUMRNG 0x44 /* zr, zr Match a numeric range. */
122 #define P_NUMFROM 0x45 /* zr Match a number >= X */
123 #define P_NUMTO 0x46 /* zr Match a number <= X */
124 #define P_NUMANY 0x47 /* no Match any set of decimal digits */
125 /* spaces left for P_OPEN+n,... for backreferences */
126 #define P_OPEN 0x80 /* no Mark this point in input as start of n. */
127 #define P_CLOSE 0x90 /* no Analogous to OPEN. */
129 * no no argument
130 * zr the range type zrange_t: may be zlong or unsigned long
131 * char a single char
132 * uc* a pointer to unsigned char, used at run time and initialised
133 * to NULL.
134 * str null-terminated, metafied string
135 * lstr length as long then string, not null-terminated, unmetafied.
139 * Notes on usage:
140 * P_WBRANCH: This works like a branch and is used in complex closures,
141 * to ensure we don't succeed on a zero-length match of the pattern,
142 * since that would cause an infinite loop. We do this by recording
143 * the positions where we have already tried to match. See the
144 * P_WBRANCH test in patmatch().
146 * P_ANY, P_ANYOF: the operand is a null terminated
147 * string. Normal characters match as expected. Characters
148 * in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
149 * Posix range tests. This relies on imeta returning true for these
150 * characters. We treat unknown POSIX ranges as never matching.
151 * PP_RANGE means the next two (possibly metafied) characters form
152 * the limits of a range to test; it's too much like hard work to
153 * expand the range.
155 * P_EXCLUDE, P_EXCSYNC, PEXCEND: P_EXCLUDE appears in the pattern like
156 * P_BRANCH, but applies to the immediately preceding branch. The code in
157 * the corresponding branch is followed by a P_EXCSYNC, which simply
158 * acts as a marker that a P_EXCLUDE comes next. The P_EXCLUDE
159 * has a pointer to char embeded in it, which works
160 * like P_WBRANCH: if we get to the P_EXCSYNC, and we already matched
161 * up to the same position, fail. Thus we are forced to backtrack
162 * on closures in the P_BRANCH if the first attempt was excluded.
163 * Corresponding to P_EXCSYNC in the original branch, there is a
164 * P_EXCEND in the exclusion. If we get to this point, and we did
165 * *not* match in the original branch, the exclusion itself fails,
166 * otherwise it succeeds since we know the tail already matches,
167 * so P_EXCEND is the end of the exclusion test.
168 * The whole sorry mess looks like this, where the upper lines
169 * show the linkage of the branches, and the lower shows the linkage
170 * of their pattern arguments.
172 * --------------------- ----------------------
173 * ^ v ^ v
174 * ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
176 * | |
177 * --------------------------------------
179 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
180 * that we prepend the path so far to the exclude pattern. This is
181 * for top level file globs, e.g. ** / *.c~*foo.c
182 * ^ I had to leave this space
183 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
185 * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified
186 * closure (#cN,M) and is used to initialise the count. Executing
187 * the pattern leads back to the P_COUNT, while the next links of the
188 * P_COUNTSTART and P_COUNT lead to the tail of the pattern:
190 * ----------------
191 * v ^
192 * <COUNTSTART><COUNT>pattern<BACK> tail
193 * v v ^
194 * ------------------------
197 #define P_OP(p) ((p)->l & 0xff)
198 #define P_NEXT(p) ((p)->l >> 8)
199 #define P_OPERAND(p) ((p) + 1)
200 #define P_ISBRANCH(p) ((p)->l & 0x20)
201 #define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30)
202 #define P_NOTDOT(p) ((p)->l & 0x40)
204 /* Specific to lstr type, i.e. P_EXACTLY. */
205 #define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */
206 #define P_LS_STR(p) ((char *)((p) + 2))
208 /* Specific to P_COUNT: arguments as offset in nodes from operator */
209 #define P_CT_CURRENT (1) /* Current count */
210 #define P_CT_MIN (2) /* Minimum count */
211 #define P_CT_MAX (3) /* Maximum count, -1 for none */
212 #define P_CT_PTR (4) /* Pointer to last match start */
213 #define P_CT_OPERAND (5) /* Operand of P_COUNT */
215 /* Flags needed when pattern is executed */
216 #define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */
217 #define P_HSTART 0x02 /* Starts with # or ##'d pattern. */
218 #define P_PURESTR 0x04 /* Can be matched with a strcmp */
220 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
221 typedef zlong zrange_t;
222 #define ZRANGE_T_IS_SIGNED (1)
223 #else
224 typedef unsigned long zrange_t;
225 #endif
228 * Array of characters corresponding to zpc_chars enum, which it must match.
230 static const char zpc_chars[ZPC_COUNT] = {
231 '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
232 Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
236 * Corresponding strings used in enable/disable -p.
237 * NULL means no way of turning this on or off.
239 /**/
240 mod_export const char *zpc_strings[ZPC_COUNT] = {
241 NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
242 "^", "#", NULL, "?(", "*(", "+(", "!(", "@("
246 * Corresponding array of pattern disables as set by the user
247 * using "disable -p".
249 /**/
250 mod_export char zpc_disables[ZPC_COUNT];
253 * Stack of saved (compressed) zpc_disables for function scope.
256 static struct zpc_disables_save *zpc_disables_stack;
259 * Characters which terminate a simple string (ZPC_COUNT) or
260 * an entire pattern segment (the first ZPC_SEG_COUNT).
261 * Each entry is either the corresponding character in zpc_chars
262 * or Marker which is guaranteed not to match a character in a
263 * pattern we are compiling.
265 * The complete list indicates characters that are special, so e.g.
266 * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
267 * *and* Tilde is currently special.
270 /**/
271 char zpc_special[ZPC_COUNT];
273 /* Default size for pattern buffer */
274 #define P_DEF_ALLOC 256
276 /* Flags used in compilation */
277 static char *patstart, *patparse; /* input pointers */
278 static int patnpar; /* () count */
279 static char *patcode; /* point of code emission */
280 static long patsize; /* size of code */
281 static char *patout; /* start of code emission string */
282 static long patalloc; /* size allocated for same */
284 /* Flags used in both compilation and execution */
285 static int patflags; /* flags passed down to patcompile */
286 static int patglobflags; /* globbing flags & approx */
289 * Increment pointer to metafied multibyte string.
291 #ifdef MULTIBYTE_SUPPORT
292 typedef wint_t patint_t;
294 #define PEOF WEOF
296 #define METACHARINC(x) ((void)metacharinc(&x))
299 * TODO: the shiftstate isn't well handled; we don't guarantee
300 * to maintain it properly between characters. If we don't
301 * need it we should use mbtowc() instead.
303 static mbstate_t shiftstate;
306 * Multibyte version: it's (almost) as easy to return the
307 * value as not, so do so since we sometimes need it..
309 static wchar_t
310 metacharinc(char **x)
312 char *inptr = *x;
313 char inchar;
314 size_t ret = MB_INVALID;
315 wchar_t wc;
318 * Cheat if the top bit isn't set. This is second-guessing
319 * the library, but we know for sure that if the character
320 * set doesn't have the property that all bytes with the 8th
321 * bit clear are single characters then we are stuffed.
323 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80))
325 if (itok(*inptr))
326 inchar = ztokens[*inptr++ - Pound];
327 else if (*inptr == Meta) {
328 inptr++;
329 inchar = *inptr++ ^ 32;
330 } else {
331 inchar = *inptr++;
333 *x = inptr;
334 return (wchar_t)STOUC(inchar);
337 while (*inptr) {
338 if (itok(*inptr))
339 inchar = ztokens[*inptr++ - Pound];
340 else if (*inptr == Meta) {
341 inptr++;
342 inchar = *inptr++ ^ 32;
343 } else {
344 inchar = *inptr++;
346 ret = mbrtowc(&wc, &inchar, 1, &shiftstate);
348 if (ret == MB_INVALID)
349 break;
350 if (ret == MB_INCOMPLETE)
351 continue;
352 *x = inptr;
353 return wc;
356 /* Error. Treat as single byte. */
357 /* Reset the shift state for next time. */
358 memset(&shiftstate, 0, sizeof(shiftstate));
359 return (wchar_t) STOUC(*(*x)++);
362 #else
363 typedef int patint_t;
365 #define PEOF EOF
367 #define METACHARINC(x) ((void)((x) += (*(x) == Meta) ? 2 : 1))
368 #endif
371 * Return unmetafied char from string (x is any char *).
372 * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
373 * in effect.
375 #define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x))
377 /* Add n more characters, ensuring there is enough space. */
379 enum {
380 PA_NOALIGN = 1,
381 PA_UNMETA = 2
384 /**/
385 static void
386 patadd(char *add, int ch, long n, int paflags)
388 /* Make sure everything gets aligned unless we get PA_NOALIGN. */
389 long newpatsize = patsize + n;
390 if (!(paflags & PA_NOALIGN))
391 newpatsize = (newpatsize + sizeof(union upat) - 1) &
392 ~(sizeof(union upat) - 1);
393 if (patalloc < newpatsize) {
394 long newpatalloc =
395 2*(newpatsize > patalloc ? newpatsize : patalloc);
396 patout = (char *)zrealloc((char *)patout, newpatalloc);
397 patcode = patout + patsize;
398 patalloc = newpatalloc;
400 patsize = newpatsize;
401 if (add) {
402 if (paflags & PA_UNMETA) {
404 * Unmetafy and untokenize the string as we go.
405 * The Meta characters in add aren't counted in n.
407 while (n--) {
408 if (itok(*add))
409 *patcode++ = ztokens[*add++ - Pound];
410 else if (*add == Meta) {
411 add++;
412 *patcode++ = *add++ ^ 32;
413 } else {
414 *patcode++ = *add++;
417 } else {
418 while (n--)
419 *patcode++ = *add++;
421 } else
422 *patcode++ = ch;
423 patcode = patout + patsize;
426 static long rn_offs;
427 /* operates on pointers to union upat, returns a pointer */
428 #define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
429 (P_OP(p) == P_BACK) ? \
430 ((p)-rn_offs) : ((p)+rn_offs) : NULL)
433 * Set up zpc_special with characters that end a string segment.
434 * "Marker" cannot occur in the pattern we are compiling so
435 * is used to mark "invalid".
437 static void
438 patcompcharsset(void)
440 char *spp, *disp;
441 int i;
443 /* Initialise enabled special characters */
444 memcpy(zpc_special, zpc_chars, ZPC_COUNT);
445 /* Apply user disables from disable -p */
446 for (i = 0, spp = zpc_special, disp = zpc_disables;
447 i < ZPC_COUNT;
448 i++, spp++, disp++) {
449 if (*disp)
450 *spp = Marker;
453 if (!isset(EXTENDEDGLOB)) {
454 /* Extended glob characters are not active */
455 zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] =
456 zpc_special[ZPC_HASH] = Marker;
458 if (!isset(KSHGLOB)) {
460 * Ksh glob characters are not active.
461 * * and ? are shared with normal globbing, but for their
462 * use here we are looking for a following Inpar.
464 zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
465 zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
466 zpc_special[ZPC_KSH_AT] = Marker;
469 * Note that if we are using KSHGLOB, then we test for a following
470 * Inpar, not zpc_special[ZPC_INPAR]: the latter makes an Inpar on
471 * its own active. The zpc_special[ZPC_KSH_*] followed by any old Inpar
472 * discriminate ksh globbing.
474 if (isset(SHGLOB)) {
476 * Grouping and numeric ranges are not valid.
477 * We do allow alternation, however; it's needed for
478 * "case". This may not be entirely consistent.
480 * Don't disable Outpar: we may need to match the end of KSHGLOB
481 * parentheses and it would be difficult to tell them apart.
483 zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker;
487 /* Called before parsing a set of file matchs to initialize flags */
489 /**/
490 void
491 patcompstart(void)
493 patcompcharsset();
494 if (isset(CASEGLOB))
495 patglobflags = 0;
496 else
497 patglobflags = GF_IGNCASE;
498 if (isset(MULTIBYTE))
499 patglobflags |= GF_MULTIBYTE;
503 * Top level pattern compilation subroutine
504 * exp is a null-terminated, metafied string.
505 * inflags is an or of some PAT_* flags.
506 * endexp, if non-null, is set to a pointer to the end of the
507 * part of exp which was compiled. This is used when
508 * compiling patterns for directories which must be
509 * matched recursively.
512 /**/
513 mod_export Patprog
514 patcompile(char *exp, int inflags, char **endexp)
516 int flags = 0;
517 long len = 0;
518 long startoff;
519 Upat pscan;
520 char *lng, *strp = NULL;
521 Patprog p;
523 startoff = sizeof(struct patprog);
524 /* Ensure alignment of start of program string */
525 startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
527 /* Allocate reasonable sized chunk if none, reduce size if too big */
528 if (patalloc != P_DEF_ALLOC)
529 patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC);
530 patcode = patout + startoff;
531 patsize = patcode - patout;
532 patstart = patparse = exp;
534 * Note global patnpar numbers parentheses 1..9, while patnpar
535 * in struct is actual count of parentheses.
537 patnpar = 1;
538 patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
540 if (!(patflags & PAT_FILE)) {
541 patcompcharsset();
542 zpc_special[ZPC_SLASH] = Marker;
543 remnulargs(patparse);
544 if (isset(MULTIBYTE))
545 patglobflags = GF_MULTIBYTE;
546 else
547 patglobflags = 0;
549 if (patflags & PAT_LCMATCHUC)
550 patglobflags |= GF_LCMATCHUC;
552 * Have to be set now, since they get updated during compilation.
554 ((Patprog)patout)->globflags = patglobflags;
556 if (!(patflags & PAT_ANY)) {
557 /* Look for a really pure string, with no tokens at all. */
558 if (!(patglobflags & ~GF_MULTIBYTE)
559 #ifdef __CYGWIN__
561 * If the OS treats files case-insensitively and we
562 * are looking at files, we don't need to use pattern
563 * matching to find the file.
565 || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE))
566 #endif
570 * Waah! I wish I understood this.
571 * Empty metafied strings have an initial Nularg.
572 * This never corresponds to a real character in
573 * a glob pattern or string, so skip it.
575 if (*exp == Nularg)
576 exp++;
577 for (strp = exp; *strp &&
578 (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
579 strp++)
582 if (!strp || (*strp && *strp != '/')) {
583 /* No, do normal compilation. */
584 strp = NULL;
585 if (patcompswitch(0, &flags) == 0)
586 return NULL;
587 } else {
589 * Yes, copy the string, and skip compilation altogether.
590 * Null terminate for the benefit of globbing.
591 * Leave metafied both for globbing and for our own
592 * efficiency.
594 patparse = strp;
595 len = strp - exp;
596 patadd(exp, 0, len + 1, 0);
597 patout[startoff + len] = '\0';
598 patflags |= PAT_PURES;
602 /* end of compilation: safe to use pointers */
603 p = (Patprog)patout;
604 p->startoff = startoff;
605 p->patstartch = '\0';
606 p->globend = patglobflags;
607 p->flags = patflags;
608 p->mustoff = 0;
609 p->size = patsize;
610 p->patmlen = len;
611 p->patnpar = patnpar-1;
613 if (!strp) {
614 pscan = (Upat)(patout + startoff);
616 if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
617 /* only one top level choice */
618 pscan = P_OPERAND(pscan);
620 if (flags & P_PURESTR) {
622 * The pattern can be matched with a simple strncmp/strcmp.
623 * Careful in case we've overwritten the node for the next ptr.
625 char *dst = patout + startoff;
626 Upat next;
627 p->flags |= PAT_PURES;
628 for (; pscan; pscan = next) {
629 next = PATNEXT(pscan);
630 if (P_OP(pscan) == P_EXACTLY) {
631 char *opnd = P_LS_STR(pscan), *mtest;
632 long oplen = P_LS_LEN(pscan), ilen;
633 int nmeta = 0;
635 * Unfortunately we unmetafied the string
636 * and we need to put any metacharacters
637 * back now we know it's a pure string.
638 * This shouldn't happen too often, it's
639 * just that there are some cases such
640 * as . and .. in files where we really
641 * need a pure string even if there are
642 * pattern characters flying around.
644 for (mtest = opnd, ilen = oplen; ilen;
645 mtest++, ilen--)
646 if (imeta(*mtest))
647 nmeta++;
648 if (nmeta) {
649 char *oldpatout = patout;
650 patadd(NULL, 0, nmeta, 0);
652 * Yuk.
654 p = (Patprog)patout;
655 opnd = patout + (opnd - oldpatout);
656 dst = patout + startoff;
659 while (oplen--) {
660 if (imeta(*opnd)) {
661 *dst++ = Meta;
662 *dst++ = *opnd++ ^ 32;
663 } else {
664 *dst++ = *opnd++;
669 p->size = dst - patout;
670 /* patmlen is really strlen. We don't need a null. */
671 p->patmlen = p->size - startoff;
672 } else {
673 /* starting point info */
674 if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
675 P_LS_LEN(pscan))
676 p->patstartch = *P_LS_STR(pscan);
678 * Find the longest literal string in something expensive.
679 * This is itself not all that cheap if we have
680 * case-insensitive matching or approximation, so don't.
682 if ((flags & P_HSTART) && !p->globflags) {
683 lng = NULL;
684 len = 0;
685 for (; pscan; pscan = PATNEXT(pscan))
686 if (P_OP(pscan) == P_EXACTLY &&
687 P_LS_LEN(pscan) >= len) {
688 lng = P_LS_STR(pscan);
689 len = P_LS_LEN(pscan);
691 if (lng) {
692 p->mustoff = lng - patout;
693 p->patmlen = len;
701 * The pattern was compiled in a fixed buffer: unless told otherwise,
702 * we stick the compiled pattern on the heap. This is necessary
703 * for files where we will often be compiling multiple segments at once.
704 * But if we get the ZDUP flag we always put it in zalloc()ed memory.
706 if (patflags & PAT_ZDUP) {
707 Patprog newp = (Patprog)zalloc(patsize);
708 memcpy((char *)newp, (char *)p, patsize);
709 p = newp;
710 } else if (!(patflags & PAT_STATIC)) {
711 Patprog newp = (Patprog)zhalloc(patsize);
712 memcpy((char *)newp, (char *)p, patsize);
713 p = newp;
716 if (endexp)
717 *endexp = patparse;
718 return p;
722 * Main body or parenthesized subexpression in pattern
723 * Parenthesis (and any ksh_glob gubbins) will have been removed.
726 /**/
727 static long
728 patcompswitch(int paren, int *flagp)
730 long starter, br, ender, excsync = 0;
731 int parno = 0;
732 int flags, gfchanged = 0;
733 long savglobflags = (long)patglobflags;
734 Upat ptr;
736 *flagp = 0;
738 if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
740 * parenthesized: make an open node.
741 * We can only refer to the first nine parentheses.
742 * For any others, we just use P_OPEN on its own; there's
743 * no gain in arbitrarily limiting the number of parentheses.
745 parno = patnpar++;
746 starter = patnode(P_OPEN + parno);
747 } else
748 starter = 0;
750 br = patnode(P_BRANCH);
751 if (!patcompbranch(&flags, paren))
752 return 0;
753 if (patglobflags != (int)savglobflags)
754 gfchanged++;
755 if (starter)
756 pattail(starter, br);
757 else
758 starter = br;
760 *flagp |= flags & (P_HSTART|P_PURESTR);
762 while (*patparse == zpc_chars[ZPC_BAR] ||
763 (*patparse == zpc_special[ZPC_TILDE] &&
764 (patparse[1] == '/' ||
765 !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
766 int tilde = *patparse++ == zpc_special[ZPC_TILDE];
767 long gfnode = 0, newbr;
769 *flagp &= ~P_PURESTR;
771 if (tilde) {
772 union upat up;
773 /* excsync remembers the P_EXCSYNC node before a chain of
774 * exclusions: all point back to this. only the
775 * original (non-excluded) branch gets a trailing P_EXCSYNC.
777 if (!excsync) {
778 excsync = patnode(P_EXCSYNC);
779 patoptail(br, excsync);
782 * By default, approximations are turned off in exclusions:
783 * we need to do this here as otherwise the code compiling
784 * the exclusion doesn't know if the flags have really
785 * changed if the error count gets restored.
787 patglobflags &= ~0xff;
788 if (!(patflags & PAT_FILET) || paren) {
789 br = patnode(P_EXCLUDE);
790 } else {
792 * At top level (paren == 0) in a file glob !(patflags
793 * &PAT_FILET) do the exclusion prepending the file path
794 * so far. We need to flag this to avoid unnecessarily
795 * copying the path.
797 br = patnode(P_EXCLUDP);
798 patflags |= PAT_HAS_EXCLUDP;
800 up.p = NULL;
801 patadd((char *)&up, 0, sizeof(up), 0);
802 /* / is not treated as special if we are at top level */
803 if (!paren && zpc_special[ZPC_SLASH] == '/') {
804 tilde++;
805 zpc_special[ZPC_SLASH] = Marker;
807 } else {
808 excsync = 0;
809 br = patnode(P_BRANCH);
811 * The position of the following statements means globflags
812 * set in the main branch carry over to the exclusion.
814 if (!paren) {
815 patglobflags = 0;
816 if (((Patprog)patout)->globflags) {
818 * If at top level, we need to reinitialize flags to zero,
819 * since (#i)foo|bar only applies to foo and we stuck
820 * the #i into the global flags.
821 * We could have done it so that they only got set in the
822 * first branch, but it's quite convenient having any
823 * global flags set in the header and not buried in the
824 * pattern. (Or maybe it isn't and we should
825 * forget this bit and always stick in an explicit GFLAGS
826 * statement instead of using the header.)
827 * Also, this can't happen for file globs where there are
828 * no top-level |'s.
830 * No gfchanged, as nothing to follow branch at top
831 * level.
833 union upat up;
834 gfnode = patnode(P_GFLAGS);
835 up.l = patglobflags;
836 patadd((char *)&up, 0, sizeof(union upat), 0);
838 } else {
839 patglobflags = (int)savglobflags;
842 newbr = patcompbranch(&flags, paren);
843 if (tilde == 2) {
844 /* restore special treatment of / */
845 zpc_special[ZPC_SLASH] = '/';
847 if (!newbr)
848 return 0;
849 if (gfnode)
850 pattail(gfnode, newbr);
851 if (!tilde && patglobflags != (int)savglobflags)
852 gfchanged++;
853 pattail(starter, br);
854 if (excsync)
855 patoptail(br, patnode(P_EXCEND));
856 *flagp |= flags & P_HSTART;
860 * Make a closing node, hooking it to the end.
861 * Note that we can't optimize P_NOTHING out here, since another
862 * branch at that point would indicate the current choices continue,
863 * which they don't.
865 ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
866 pattail(starter, ender);
869 * Hook the tails of the branches to the closing node,
870 * except for exclusions which terminate where they are.
872 for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
873 if (!P_ISEXCLUDE(ptr))
874 patoptail(ptr-(Upat)patout, ender);
876 /* check for proper termination */
877 if ((paren && *patparse++ != Outpar) ||
878 (!paren && *patparse &&
879 !((patflags & PAT_FILE) && *patparse == '/')))
880 return 0;
882 if (paren && gfchanged) {
884 * Restore old values of flags when leaving parentheses.
885 * gfchanged detects a change in any branch (except exclusions
886 * which are separate), since we need to emit this even if
887 * a later branch happened to put the flags back.
889 pattail(ender, patnode(P_GFLAGS));
890 patglobflags = (int)savglobflags;
891 patadd((char *)&savglobflags, 0, sizeof(long), 0);
894 return starter;
898 * Compile something ended by Bar, Outpar, Tilde, or end of string.
899 * Note the BRANCH or EXCLUDE tag must already have been omitted:
900 * this returns the position of the operand of that.
903 /**/
904 static long
905 patcompbranch(int *flagp, int paren)
907 long chain, latest = 0, starter;
908 int flags = 0;
910 *flagp = P_PURESTR;
912 starter = chain = 0;
913 while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) ||
914 (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' &&
915 memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) {
916 if ((*patparse == zpc_special[ZPC_INPAR] &&
917 patparse[1] == zpc_special[ZPC_HASH]) ||
918 (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar &&
919 patparse[2] == zpc_special[ZPC_HASH])) {
920 /* Globbing flags. */
921 char *pp1 = patparse;
922 int oldglobflags = patglobflags, ignore;
923 long assert;
924 patparse += (*patparse == '@') ? 3 : 2;
925 if (!patgetglobflags(&patparse, &assert, &ignore))
926 return 0;
927 if (!ignore) {
928 if (assert) {
930 * Start/end assertion looking like flags, but
931 * actually handled as a normal node
933 latest = patnode(assert);
934 flags = 0;
935 } else {
936 if (pp1 == patstart) {
937 /* Right at start of pattern, the simplest case.
938 * Put them into the flags and don't emit anything.
940 ((Patprog)patout)->globflags = patglobflags;
941 continue;
942 } else if (!*patparse) {
943 /* Right at the end, so just leave the flags for
944 * the next Patprog in the chain to pick up.
946 break;
949 * Otherwise, we have to stick them in as a pattern
950 * matching nothing.
952 if (oldglobflags != patglobflags) {
953 /* Flags changed */
954 union upat up;
955 latest = patnode(P_GFLAGS);
956 up.l = patglobflags;
957 patadd((char *)&up, 0, sizeof(union upat), 0);
958 } else {
959 /* No effect. */
960 continue;
963 } else if (!*patparse)
964 break;
965 else
966 continue;
967 } else if (*patparse == zpc_special[ZPC_HAT]) {
969 * ^pat: anything but pat. For proper backtracking,
970 * etc., we turn this into (*~pat), except without the
971 * parentheses.
973 patparse++;
974 latest = patcompnot(0, &flags);
975 } else
976 latest = patcomppiece(&flags, paren);
977 if (!latest)
978 return 0;
979 if (!starter)
980 starter = latest;
981 if (!(flags & P_PURESTR))
982 *flagp &= ~P_PURESTR;
983 if (!chain)
984 *flagp |= flags & P_HSTART;
985 else
986 pattail(chain, latest);
987 chain = latest;
989 /* check if there was nothing in the loop, i.e. () */
990 if (!chain)
991 starter = patnode(P_NOTHING);
993 return starter;
996 /* get glob flags, return 1 for success, 0 for failure */
998 /**/
1000 patgetglobflags(char **strp, long *assertp, int *ignore)
1002 char *nptr, *ptr = *strp;
1003 zlong ret;
1005 *assertp = 0;
1006 *ignore = 1;
1007 /* (#X): assumes we are still positioned on the first X */
1008 for (; *ptr && *ptr != Outpar; ptr++) {
1009 if (*ptr == 'q') {
1010 /* Glob qualifiers, ignored in pattern code */
1011 while (*ptr && *ptr != Outpar)
1012 ptr++;
1013 break;
1014 } else {
1015 *ignore = 0;
1016 switch (*ptr) {
1017 case 'a':
1018 /* Approximate matching, max no. of errors follows */
1019 ret = zstrtol(++ptr, &nptr, 10);
1021 * We can't have more than 254, because we need 255 to
1022 * mark 254 errors in wbranch and exclude sync strings
1023 * (hypothetically --- hope no-one tries it).
1025 if (ret < 0 || ret > 254 || ptr == nptr)
1026 return 0;
1027 patglobflags = (patglobflags & ~0xff) | (ret & 0xff);
1028 ptr = nptr-1;
1029 break;
1031 case 'l':
1032 /* Lowercase in pattern matches lower or upper in target */
1033 patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
1034 break;
1036 case 'i':
1037 /* Fully case insensitive */
1038 patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
1039 break;
1041 case 'I':
1042 /* Restore case sensitivity */
1043 patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
1044 break;
1046 case 'b':
1047 /* Make backreferences */
1048 patglobflags |= GF_BACKREF;
1049 break;
1051 case 'B':
1052 /* Don't make backreferences */
1053 patglobflags &= ~GF_BACKREF;
1054 break;
1056 case 'm':
1057 /* Make references to complete match */
1058 patglobflags |= GF_MATCHREF;
1059 break;
1061 case 'M':
1062 /* Don't */
1063 patglobflags &= ~GF_MATCHREF;
1064 break;
1066 case 's':
1067 *assertp = P_ISSTART;
1068 break;
1070 case 'e':
1071 *assertp = P_ISEND;
1072 break;
1074 case 'u':
1075 patglobflags |= GF_MULTIBYTE;
1076 break;
1078 case 'U':
1079 patglobflags &= ~GF_MULTIBYTE;
1080 break;
1082 default:
1083 return 0;
1087 if (*ptr != Outpar)
1088 return 0;
1089 /* Start/end assertions must appear on their own. */
1090 if (*assertp && (*strp)[1] != Outpar)
1091 return 0;
1092 *strp = ptr + 1;
1093 return 1;
1097 static const char *colon_stuffs[] = {
1098 "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph",
1099 "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
1100 "IFS", "IFSSPACE", "WORD", NULL
1104 * Handle the guts of a [:stuff:] character class element.
1105 * start is the beginning of "stuff" and len is its length.
1106 * This code is exported for the benefit of completion matching.
1109 /**/
1110 mod_export int
1111 range_type(char *start, int len)
1113 const char **csp;
1115 for (csp = colon_stuffs; *csp; csp++) {
1116 if (strlen(*csp) == len && !strncmp(start, *csp, len))
1117 return (csp - colon_stuffs) + PP_FIRST;
1120 return PP_UNKWN;
1125 * Convert the contents of a [...] or [^...] expression (just the
1126 * ... part) back into a string. This is used by compfiles -p/-P
1127 * for some reason. The compiled form (a metafied string) is
1128 * passed in rangestr.
1130 * If outstr is non-NULL the compiled form is placed there. It
1131 * must be sufficiently long. A terminating NULL is appended.
1133 * Return the length required, not including the terminating NULL.
1135 * TODO: this is non-multibyte for now. It will need to be defined
1136 * appropriately with MULTIBYTE_SUPPORT when the completion matching
1137 * code catches up.
1140 /**/
1141 mod_export int
1142 pattern_range_to_string(char *rangestr, char *outstr)
1144 int len = 0;
1146 while (*rangestr) {
1147 if (imeta(STOUC(*rangestr))) {
1148 int swtype = STOUC(*rangestr) - STOUC(Meta);
1150 if (swtype == 0) {
1151 /* Ordindary metafied character */
1152 if (outstr)
1154 *outstr++ = Meta;
1155 *outstr++ = rangestr[1] ^ 32;
1157 len += 2;
1158 rangestr += 2;
1159 } else if (swtype == PP_RANGE) {
1160 /* X-Y range */
1161 int i;
1163 for (i = 0; i < 2; i++) {
1164 if (*rangestr == Meta) {
1165 if (outstr) {
1166 *outstr++ = Meta;
1167 *outstr++ = rangestr[1];
1169 len += 2;
1170 rangestr += 2;
1171 } else {
1172 if (outstr)
1173 *outstr++ = *rangestr;
1174 len++;
1175 rangestr++;
1178 if (i == 0) {
1179 if (outstr)
1180 *outstr++ = '-';
1181 len++;
1184 } else if (swtype >= PP_FIRST && swtype <= PP_LAST) {
1185 /* [:stuff:]; we need to output [: and :] */
1186 const char *found = colon_stuffs[swtype - PP_FIRST];
1187 int newlen = strlen(found);
1188 if (outstr) {
1189 strcpy(outstr, "[:");
1190 outstr += 2;
1191 memcpy(outstr, found, newlen);
1192 outstr += newlen;
1193 strcpy(outstr, ":]");
1194 outstr += 2;
1196 len += newlen + 4;
1197 rangestr++;
1198 } else {
1199 /* shouldn't happen */
1200 DPUTS(1, "BUG: unknown PP_ code in pattern range");
1201 rangestr++;
1203 } else {
1204 /* ordinary character, guaranteed no Meta handling needed */
1205 if (outstr)
1206 *outstr++ = *rangestr;
1207 len++;
1208 rangestr++;
1212 if (outstr)
1213 *outstr = '\0';
1214 return len;
1218 * compile a chunk such as a literal string or a [...] followed
1219 * by a possible hash operator
1222 /**/
1223 static long
1224 patcomppiece(int *flagp, int paren)
1226 long starter = 0, next, op, opnd;
1227 int flags, flags2, kshchar, len, ch, patch, nmeta;
1228 int hash, count;
1229 union upat up;
1230 char *nptr, *str0, *ptr, *patprev;
1231 zrange_t from, to;
1232 char *charstart;
1234 flags = 0;
1235 str0 = patprev = patparse;
1236 for (;;) {
1238 * Check if we have a string. First, we need to make sure
1239 * the string doesn't introduce a ksh-like parenthesized expression.
1241 kshchar = '\0';
1242 if (*patparse && patparse[1] == Inpar) {
1243 if (*patparse == zpc_special[ZPC_KSH_PLUS])
1244 kshchar = STOUC('+');
1245 else if (*patparse == zpc_special[ZPC_KSH_BANG])
1246 kshchar = STOUC('!');
1247 else if (*patparse == zpc_special[ZPC_KSH_AT])
1248 kshchar = STOUC('@');
1249 else if (*patparse == zpc_special[ZPC_KSH_STAR])
1250 kshchar = STOUC('*');
1251 else if (*patparse == zpc_special[ZPC_KSH_QUEST])
1252 kshchar = STOUC('?');
1256 * If '(' is disabled as a pattern char, allow ')' as
1257 * an ordinary string character if there are no parentheses to
1258 * close. Don't allow it otherwise, it changes the syntax.
1260 if (zpc_special[ZPC_INPAR] != Marker || *patparse != Outpar ||
1261 paren) {
1263 * End of string (or no string at all) if ksh-type parentheses,
1264 * or special character, unless that character is a tilde and
1265 * the character following is an end-of-segment character. Thus
1266 * tildes are not special if there is nothing following to
1267 * be excluded.
1269 * Don't look for X()-style kshglobs at this point; we've
1270 * checked above for the case with parentheses and we don't
1271 * want to match without parentheses.
1273 if (kshchar ||
1274 (memchr(zpc_special, *patparse, ZPC_NO_KSH_GLOB) &&
1275 (*patparse != zpc_special[ZPC_TILDE] ||
1276 patparse[1] == '/' ||
1277 !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
1278 break;
1282 /* Remember the previous character for backtracking */
1283 patprev = patparse;
1284 METACHARINC(patparse);
1287 if (patparse > str0) {
1288 long slen = patparse - str0;
1289 int morelen;
1291 /* Ordinary string: cancel kshchar lookahead */
1292 kshchar = '\0';
1294 * Assume it matches a simple string until we find otherwise.
1296 flags |= P_PURESTR;
1297 DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
1298 /* more than one character matched? */
1299 morelen = (patprev > str0);
1301 * If we have more than one character, a following hash
1302 * or (#c...) only applies to the last, so backtrack one character.
1304 if ((*patparse == zpc_special[ZPC_HASH] ||
1305 (*patparse == zpc_special[ZPC_INPAR] &&
1306 patparse[1] == zpc_special[ZPC_HASH] &&
1307 patparse[2] == 'c') ||
1308 (*patparse == zpc_special[ZPC_KSH_AT] &&
1309 patparse[1] == Inpar &&
1310 patparse[2] == zpc_special[ZPC_HASH] &&
1311 patparse[3] == 'c')) && morelen)
1312 patparse = patprev;
1314 * If len is 1, we can't have an active # following, so doesn't
1315 * matter that we don't make X in `XX#' simple.
1317 if (!morelen)
1318 flags |= P_SIMPLE;
1319 starter = patnode(P_EXACTLY);
1321 /* Get length of string without metafication. */
1322 nmeta = 0;
1323 /* inherited from domatch, but why, exactly? */
1324 if (*str0 == Nularg)
1325 str0++;
1326 for (ptr = str0; ptr < patparse; ptr++) {
1327 if (*ptr == Meta) {
1328 nmeta++;
1329 ptr++;
1332 slen = (patparse - str0) - nmeta;
1333 /* First add length, which is a long */
1334 patadd((char *)&slen, 0, sizeof(long), 0);
1336 * Then the string, not null terminated.
1337 * Unmetafy and untokenize; pass the final length,
1338 * which is what we need to allocate, i.e. not including
1339 * a count for each Meta in the string.
1341 patadd(str0, 0, slen, PA_UNMETA);
1342 nptr = P_LS_STR((Upat)patout + starter);
1344 * It's much simpler to turn off pure string mode for
1345 * any case-insensitive or approximate matching; usually,
1346 * that is correct, or they wouldn't have been turned on.
1347 * However, we need to make sure we match a "." or ".."
1348 * in a file name as a pure string. There's a minor bug
1349 * that this will also apply to something like
1350 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
1351 * going to write funny patterns, you get no sympathy from me.
1353 if (patglobflags &
1354 #ifdef __CYGWIN__
1356 * As above: don't use pattern matching for files
1357 * just because of case insensitivity if file system
1358 * is known to be case insensitive.
1360 * This is known to be necessary in at least one case:
1361 * if "mount -c /" is in effect, so that drives appear
1362 * directly under / instead of the usual /cygdrive, they
1363 * aren't shown by readdir(). So it's vital we don't use
1364 * globbing to find "/c", since that'll fail.
1366 ((patflags & PAT_FILE) ?
1367 (0xFF|GF_LCMATCHUC) :
1368 (0xFF|GF_LCMATCHUC|GF_IGNCASE))
1369 #else
1370 (0xFF|GF_LCMATCHUC|GF_IGNCASE)
1371 #endif
1373 if (!(patflags & PAT_FILE))
1374 flags &= ~P_PURESTR;
1375 else if (!(nptr[0] == '.' &&
1376 (slen == 1 || (nptr[1] == '.' && slen == 2))))
1377 flags &= ~P_PURESTR;
1379 } else {
1380 if (kshchar)
1381 patparse++;
1383 patch = *patparse;
1384 METACHARINC(patparse);
1385 switch(patch) {
1386 case Quest:
1387 DPUTS(zpc_special[ZPC_QUEST] == Marker,
1388 "Treating '?' as pattern character although disabled");
1389 flags |= P_SIMPLE;
1390 starter = patnode(P_ANY);
1391 break;
1392 case Star:
1393 DPUTS(zpc_special[ZPC_STAR] == Marker,
1394 "Treating '*' as pattern character although disabled");
1395 /* kshchar is used as a sign that we can't have #'s. */
1396 kshchar = -1;
1397 starter = patnode(P_STAR);
1398 break;
1399 case Inbrack:
1400 DPUTS(zpc_special[ZPC_INBRACK] == Marker,
1401 "Treating '[' as pattern character although disabled");
1402 flags |= P_SIMPLE;
1403 if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
1404 patparse++;
1405 starter = patnode(P_ANYBUT);
1406 } else
1407 starter = patnode(P_ANYOF);
1409 * []...] means match a "]" or other included characters.
1410 * However, to be a bit helpful and for compatibility
1411 * with other shells, don't take in that sense if
1412 * there's no further "]". That's still imperfect,
1413 * but it's all we can do --- we're required to
1414 * treat [$var]*[$var]with empty var as [ ... ]
1415 * containing "]*[".
1417 if (*patparse == Outbrack && strchr(patparse+1, Outbrack)) {
1418 patparse++;
1419 patadd(NULL, ']', 1, PA_NOALIGN);
1421 while (*patparse && *patparse != Outbrack) {
1422 /* Meta is not a token */
1423 if (*patparse == Inbrack && patparse[1] == ':' &&
1424 (nptr = strchr(patparse+2, ':')) &&
1425 nptr[1] == Outbrack) {
1426 /* Posix range. */
1427 patparse += 2;
1428 len = nptr - patparse;
1429 ch = range_type(patparse, len);
1430 patparse = nptr + 2;
1431 if (ch != PP_UNKWN)
1432 patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN);
1433 continue;
1435 charstart = patparse;
1436 METACHARINC(patparse);
1438 if (*patparse == '-' && patparse[1] &&
1439 patparse[1] != Outbrack) {
1440 patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
1441 if (itok(*charstart)) {
1442 patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1443 PA_NOALIGN);
1444 } else {
1445 patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1447 charstart = ++patparse; /* skip ASCII '-' */
1448 METACHARINC(patparse);
1450 if (itok(*charstart)) {
1451 patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1452 PA_NOALIGN);
1453 } else {
1454 patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1457 if (*patparse != Outbrack)
1458 return 0;
1459 patparse++;
1460 /* terminate null string and fix alignment */
1461 patadd(NULL, 0, 1, 0);
1462 break;
1463 case Inpar:
1464 DPUTS(!kshchar && zpc_special[ZPC_INPAR] == Marker,
1465 "Treating '(' as pattern character although disabled");
1466 DPUTS(isset(SHGLOB) && !kshchar,
1467 "Treating bare '(' as pattern character with SHGLOB");
1468 if (kshchar == '!') {
1469 /* This is nasty, we should really either handle all
1470 * kshglobbing below or here. But most of the
1471 * others look like non-ksh patterns, while this one
1472 * doesn't, so we handle it here and leave the rest.
1473 * We treat it like an extendedglob ^, except that
1474 * it goes into parentheses.
1476 * If we did do kshglob here, we could support
1477 * the old behaviour that things like !(foo)##
1478 * work, but it makes the code more complicated at
1479 * the expense of allowing the user to do things
1480 * they shouldn't.
1482 if (!(starter = patcompnot(1, &flags2)))
1483 return 0;
1484 } else if (!(starter = patcompswitch(1, &flags2)))
1485 return 0;
1486 flags |= flags2 & P_HSTART;
1487 break;
1488 case Inang:
1489 /* Numeric glob */
1490 DPUTS(zpc_special[ZPC_INANG] == Marker,
1491 "Treating '<' as pattern character although disabled");
1492 DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB");
1493 len = 0; /* beginning present 1, end present 2 */
1494 if (idigit(*patparse)) {
1495 from = (zrange_t) zstrtol((char *)patparse,
1496 (char **)&nptr, 10);
1497 patparse = nptr;
1498 len |= 1;
1500 DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
1501 patparse++;
1502 if (idigit(*patparse)) {
1503 to = (zrange_t) zstrtol((char *)patparse,
1504 (char **)&nptr, 10);
1505 patparse = nptr;
1506 len |= 2;
1508 if (*patparse != Outang)
1509 return 0;
1510 patparse++;
1511 switch(len) {
1512 case 3:
1513 starter = patnode(P_NUMRNG);
1514 patadd((char *)&from, 0, sizeof(from), 0);
1515 patadd((char *)&to, 0, sizeof(to), 0);
1516 break;
1517 case 2:
1518 starter = patnode(P_NUMTO);
1519 patadd((char *)&to, 0, sizeof(to), 0);
1520 break;
1521 case 1:
1522 starter = patnode(P_NUMFROM);
1523 patadd((char *)&from, 0, sizeof(from), 0);
1524 break;
1525 case 0:
1526 starter = patnode(P_NUMANY);
1527 break;
1529 /* This can't be simple, because it isn't.
1530 * Mention in manual that matching digits with [...]
1531 * is more efficient.
1533 break;
1534 case Pound:
1535 DPUTS(zpc_special[ZPC_HASH] == Marker,
1536 "Treating '#' as pattern character although disabled");
1537 DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
1539 * A hash here is an error; it should follow something
1540 * repeatable.
1542 return 0;
1543 break;
1544 case Bnullkeep:
1546 * Marker for restoring a backslash in output:
1547 * does not match a character.
1549 next = patcomppiece(flagp, paren);
1551 * Can't match a pure string since we need to do this
1552 * as multiple chunks.
1554 *flagp &= ~P_PURESTR;
1555 return next;
1556 break;
1557 #ifdef DEBUG
1558 default:
1559 dputs("BUG: character not handled in patcomppiece");
1560 return 0;
1561 break;
1562 #endif
1566 count = 0;
1567 if (!(hash = (*patparse == zpc_special[ZPC_HASH])) &&
1568 !(count = ((*patparse == zpc_special[ZPC_INPAR] &&
1569 patparse[1] == zpc_special[ZPC_HASH] &&
1570 patparse[2] == 'c') ||
1571 (*patparse == zpc_special[ZPC_KSH_AT] &&
1572 patparse[1] == Inpar &&
1573 patparse[2] == zpc_special[ZPC_HASH] &&
1574 patparse[3] == 'c'))) &&
1575 (kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
1576 *flagp = flags;
1577 return starter;
1580 /* too much at once doesn't currently work */
1581 if (kshchar && (hash || count))
1582 return 0;
1584 if (kshchar == '*') {
1585 op = P_ONEHASH;
1586 *flagp = P_HSTART;
1587 } else if (kshchar == '+') {
1588 op = P_TWOHASH;
1589 *flagp = P_HSTART;
1590 } else if (kshchar == '?') {
1591 op = 0;
1592 *flagp = 0;
1593 } else if (count) {
1594 op = P_COUNT;
1595 patparse += 3;
1596 *flagp = P_HSTART;
1597 } else if (*++patparse == zpc_special[ZPC_HASH]) {
1598 op = P_TWOHASH;
1599 patparse++;
1600 *flagp = P_HSTART;
1601 } else {
1602 op = P_ONEHASH;
1603 *flagp = P_HSTART;
1607 * Note optimizations with pointers into P_NOTHING branches: some
1608 * should logically point to next node after current piece.
1610 * Backtracking is also encoded in a slightly obscure way: the
1611 * code emitted ensures we test the non-empty branch of complex
1612 * patterns before the empty branch on each repetition. Hence
1613 * each time we fail on a non-empty branch, we try the empty branch,
1614 * which is equivalent to backtracking.
1616 if (op == P_COUNT) {
1617 /* (#cN,M) */
1618 union upat countargs[P_CT_OPERAND];
1619 char *opp = patparse;
1621 countargs[0].l = P_COUNT;
1622 countargs[P_CT_CURRENT].l = 0L;
1623 countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10);
1624 if (patparse == opp) {
1625 /* missing number treated as zero */
1626 countargs[P_CT_MIN].l = 0L;
1628 if (*patparse != ',' && *patparse != Comma) {
1629 /* either max = min or error */
1630 if (*patparse != Outpar)
1631 return 0;
1632 countargs[P_CT_MAX].l = countargs[P_CT_MIN].l;
1633 } else {
1634 opp = ++patparse;
1635 countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10);
1636 if (*patparse != Outpar)
1637 return 0;
1638 if (patparse == opp) {
1639 /* missing number treated as infinity: record as -1 */
1640 countargs[P_CT_MAX].l = -1L;
1643 patparse++;
1644 countargs[P_CT_PTR].p = NULL;
1645 /* Mark this chain as a min/max count... */
1646 patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs));
1648 * The next of the operand is a loop back to the P_COUNT. This is
1649 * how we get recursion for the count. We don't loop back to
1650 * the P_COUNTSTART; that's used for initialising the count
1651 * and saving and restoring the count for any enclosing use
1652 * of the match.
1654 opnd = P_OPERAND(starter) + P_CT_OPERAND;
1655 pattail(opnd, patnode(P_BACK));
1656 pattail(opnd, P_OPERAND(starter));
1658 * The next of the counter operators is what follows the
1659 * closure.
1660 * This handles matching of the tail.
1662 next = patnode(P_NOTHING);
1663 pattail(starter, next);
1664 pattail(P_OPERAND(starter), next);
1665 } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) &&
1666 P_OP((Upat)patout+starter) == P_ANY) {
1667 /* Optimize ?# to *. Silly thing to do, since who would use
1668 * use ?# ? But it makes the later code shorter.
1670 Upat uptr = (Upat)patout + starter;
1671 if (op == P_TWOHASH) {
1672 /* ?## becomes ?* */
1673 uptr->l = (uptr->l & ~0xff) | P_ANY;
1674 pattail(starter, patnode(P_STAR));
1675 } else {
1676 uptr->l = (uptr->l & ~0xff) | P_STAR;
1678 } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
1679 /* Simplify, but not if we need to look for approximations. */
1680 patinsert(op, starter, NULL, 0);
1681 } else if (op == P_ONEHASH) {
1682 /* Emit x# as (x&|), where & means "self". */
1683 up.p = NULL;
1684 patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
1685 /* Either x */
1686 patoptail(starter, patnode(P_BACK)); /* and loop */
1687 patoptail(starter, starter); /* back */
1688 pattail(starter, patnode(P_BRANCH)); /* or */
1689 pattail(starter, patnode(P_NOTHING)); /* null. */
1690 } else if (op == P_TWOHASH) {
1691 /* Emit x## as x(&|) where & means "self". */
1692 next = patnode(P_WBRANCH); /* Either */
1693 up.p = NULL;
1694 patadd((char *)&up, 0, sizeof(up), 0);
1695 pattail(starter, next);
1696 pattail(patnode(P_BACK), starter); /* loop back */
1697 pattail(next, patnode(P_BRANCH)); /* or */
1698 pattail(starter, patnode(P_NOTHING)); /* null. */
1699 } else if (kshchar == '?') {
1700 /* Emit ?(x) as (x|) */
1701 patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
1702 pattail(starter, patnode(P_BRANCH)); /* or */
1703 next = patnode(P_NOTHING); /* null */
1704 pattail(starter, next);
1705 patoptail(starter, next);
1707 if (*patparse == zpc_special[ZPC_HASH])
1708 return 0;
1710 return starter;
1714 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
1715 * parentheses if necessary. As you see, that's really quite easy.
1718 /**/
1719 static long
1720 patcompnot(int paren, int *flagsp)
1722 union upat up;
1723 long excsync, br, excl, n, starter;
1724 int dummy;
1726 /* Here, we're matching a star at the start. */
1727 *flagsp = P_HSTART;
1729 starter = patnode(P_BRANCH);
1730 br = patnode(P_STAR);
1731 excsync = patnode(P_EXCSYNC);
1732 pattail(br, excsync);
1733 pattail(starter, excl = patnode(P_EXCLUDE));
1734 up.p = NULL;
1735 patadd((char *)&up, 0, sizeof(up), 0);
1736 if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy, 0))))
1737 return 0;
1738 pattail(br, patnode(P_EXCEND));
1739 n = patnode(P_NOTHING); /* just so much easier */
1740 pattail(excsync, n);
1741 pattail(excl, n);
1743 return starter;
1746 /* Emit a node */
1748 /**/
1749 static long
1750 patnode(long op)
1752 long starter = (Upat)patcode - (Upat)patout;
1753 union upat up;
1755 up.l = op;
1756 patadd((char *)&up, 0, sizeof(union upat), 0);
1757 return starter;
1761 * insert an operator in front of an already emitted operand:
1762 * we relocate the operand. there had better be nothing else after.
1765 /**/
1766 static void
1767 patinsert(long op, int opnd, char *xtra, int sz)
1769 char *src, *dst, *opdst;
1770 union upat buf, *lptr;
1772 buf.l = 0;
1773 patadd((char *)&buf, 0, sizeof(buf), 0);
1774 if (sz)
1775 patadd(xtra, 0, sz, 0);
1776 src = patcode - sizeof(union upat) - sz;
1777 dst = patcode;
1778 opdst = patout + opnd * sizeof(union upat);
1779 while (src > opdst)
1780 *--dst = *--src;
1782 /* A cast can't be an lvalue */
1783 lptr = (Upat)opdst;
1784 lptr->l = op;
1785 opdst += sizeof(union upat);
1786 while (sz--)
1787 *opdst++ = *xtra++;
1790 /* set the 'next' pointer at the end of a node chain */
1792 /**/
1793 static void
1794 pattail(long p, long val)
1796 Upat scan, temp;
1797 long offset;
1799 scan = (Upat)patout + p;
1800 for (;;) {
1801 if (!(temp = PATNEXT(scan)))
1802 break;
1803 scan = temp;
1806 offset = (P_OP(scan) == P_BACK)
1807 ? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
1809 scan->l |= offset << 8;
1812 /* do pattail, but on operand of first argument; nop if operandless */
1814 /**/
1815 static void patoptail(long p, long val)
1817 Upat ptr = (Upat)patout + p;
1818 int op = P_OP(ptr);
1819 if (!p || !P_ISBRANCH(ptr))
1820 return;
1821 if (op == P_BRANCH)
1822 pattail(P_OPERAND(p), val);
1823 else
1824 pattail(P_OPERAND(p) + 1, val);
1829 * Run a pattern.
1831 static char *patinstart; /* Start of input string */
1832 static char *patinend; /* End of input string */
1833 static char *patinput; /* String input pointer */
1834 static char *patinpath; /* Full path for use with ~ exclusions */
1835 static int patinlen; /* Length of last successful match.
1836 * Includes count of Meta characters.
1839 static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */
1840 static char *patendp[NSUBEXP]; /* Pointer to backref ends */
1841 static int parsfound; /* parentheses (with backrefs) found */
1843 static int globdots; /* Glob initial dots? */
1846 * Character functions operating on unmetafied strings.
1848 #ifdef MULTIBYTE_SUPPORT
1850 /* Get a character from the start point in a string */
1851 #define CHARREF(x, y) charref((x), (y))
1852 static wchar_t
1853 charref(char *x, char *y)
1855 wchar_t wc;
1856 size_t ret;
1858 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1859 return (wchar_t) STOUC(*x);
1861 ret = mbrtowc(&wc, x, y-x, &shiftstate);
1863 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1864 /* Error. Treat as single byte. */
1865 /* Reset the shift state for next time. */
1866 memset(&shiftstate, 0, sizeof(shiftstate));
1867 return (wchar_t) STOUC(*x);
1870 return wc;
1873 /* Get a pointer to the next character */
1874 #define CHARNEXT(x, y) charnext((x), (y))
1875 static char *
1876 charnext(char *x, char *y)
1878 wchar_t wc;
1879 size_t ret;
1881 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1882 return x + 1;
1884 ret = mbrtowc(&wc, x, y-x, &shiftstate);
1886 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1887 /* Error. Treat as single byte. */
1888 /* Reset the shift state for next time. */
1889 memset(&shiftstate, 0, sizeof(shiftstate));
1890 return x + 1;
1893 /* Nulls here are normal characters */
1894 return x + (ret ? ret : 1);
1897 /* Increment a pointer past the current character. */
1898 #define CHARINC(x, y) ((x) = charnext((x), (y)))
1901 /* Get a character and increment */
1902 #define CHARREFINC(x, y, z) charrefinc(&(x), (y), (z))
1903 static wchar_t
1904 charrefinc(char **x, char *y, int *z)
1906 wchar_t wc;
1907 size_t ret;
1909 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
1910 return (wchar_t) STOUC(*(*x)++);
1912 ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
1914 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1915 /* Error. Treat as single byte, but flag. */
1916 *z = 1;
1917 /* Reset the shift state for next time. */
1918 memset(&shiftstate, 0, sizeof(shiftstate));
1919 return (wchar_t) STOUC(*(*x)++);
1922 /* Nulls here are normal characters */
1923 *x += ret ? ret : 1;
1925 return wc;
1930 * Counter the number of characters between two pointers, smaller first
1932 * This is used when setting values in parameters, so we obey
1933 * the MULTIBYTE option (even if it's been overridden locally).
1935 #define CHARSUB(x,y) charsub(x, y)
1936 static ptrdiff_t
1937 charsub(char *x, char *y)
1939 ptrdiff_t res = 0;
1940 size_t ret;
1941 wchar_t wc;
1943 if (!isset(MULTIBYTE))
1944 return y - x;
1946 while (x < y) {
1947 ret = mbrtowc(&wc, x, y-x, &shiftstate);
1949 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1950 /* Error. Treat remainder as single characters */
1951 return res + (y - x);
1954 /* Treat nulls as normal characters */
1955 if (!ret)
1956 ret = 1;
1957 res++;
1958 x += ret;
1961 return res;
1964 #else /* no MULTIBYTE_SUPPORT */
1966 /* Get a character from the start point in a string */
1967 #define CHARREF(x, y) (STOUC(*(x)))
1968 /* Get a pointer to the next character */
1969 #define CHARNEXT(x, y) ((x)+1)
1970 /* Increment a pointer past the current character. */
1971 #define CHARINC(x, y) ((x)++)
1972 /* Get a character and increment */
1973 #define CHARREFINC(x, y, z) (STOUC(*(x)++))
1974 /* Counter the number of characters between two pointers, smaller first */
1975 #define CHARSUB(x,y) ((y) - (x))
1977 #endif /* MULTIBYTE_SUPPORT */
1980 * The following need to be accessed in the globbing scanner for
1981 * a multi-component file path. See horror story in glob.c.
1983 /**/
1984 int errsfound; /* Total error count so far */
1986 /**/
1987 int forceerrs; /* Forced maximum error count */
1989 /**/
1990 void
1991 pattrystart(void)
1993 forceerrs = -1;
1994 errsfound = 0;
1998 * Test prog against null-terminated, metafied string.
2001 /**/
2002 mod_export int
2003 pattry(Patprog prog, char *string)
2005 return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL);
2009 * Test prog against string of given length, no null termination
2010 * but still metafied at this point. offset gives an offset
2011 * to include in reported match indices
2014 /**/
2015 mod_export int
2016 pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
2018 return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL);
2022 * Test prog against string with given lengths. The input
2023 * string is metafied; stringlen is the raw string length, and
2024 * unmetalen the number of characters in the original string (some
2025 * of which may now be metafied). Either value may be -1
2026 * to indicate a null-terminated string which will be counted. Note
2027 * there may be a severe penalty for this if a lot of matching is done
2028 * on one string.
2030 * offset is the position in the original string (not seen by
2031 * the pattern module) at which we are trying to match.
2032 * This is added in to the positions recorded in patbeginp and patendp
2033 * when we are looking for substrings. Currently this only happens
2034 * in the parameter substitution code.
2036 * Note this is a character offset, i.e. a metafied character
2037 * counts as 1.
2039 * The last three arguments are used to report the positions for the
2040 * backreferences. On entry, *nump should contain the maximum number
2041 * of positions to report. In this case the match, mbegin, mend
2042 * arrays are not altered.
2044 * If nump is NULL but endp is not NULL, then *endp is set to the
2045 * end position of the match, taking into account patinstart.
2048 /**/
2049 mod_export int
2050 pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
2051 int patoffset,
2052 int *nump, int *begp, int *endp)
2054 int i, maxnpos = 0, ret, needfullpath, unmetalenp;
2055 int origlen;
2056 char **sp, **ep, *tryalloced, *ptr;
2057 char *progstr = (char *)prog + prog->startoff;
2059 if (nump) {
2060 maxnpos = *nump;
2061 *nump = 0;
2063 /* inherited from domatch, but why, exactly? */
2064 if (*string == Nularg) {
2065 string++;
2066 unmetalen--;
2069 if (stringlen < 0)
2070 stringlen = strlen(string);
2071 origlen = stringlen;
2073 patflags = prog->flags;
2075 * For a top-level ~-exclusion, we will need the full
2076 * path to exclude, so copy the path so far and append the
2077 * current test string.
2079 needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
2081 /* Get the length of the full string when unmetafied. */
2082 if (unmetalen < 0)
2083 unmetalen = ztrsub(string + stringlen, string);
2084 if (needfullpath)
2085 unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
2086 else
2087 unmetalenp = 0;
2089 DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
2090 "rum sort of file exclusion");
2092 * Partly for efficiency, and partly for the convenience of
2093 * globbing, we don't unmetafy pure string patterns, and
2094 * there's no reason to if the pattern is just a *.
2096 if (!(patflags & (PAT_PURES|PAT_ANY))
2097 && (needfullpath || unmetalen != stringlen)) {
2099 * We need to copy if we need to prepend the path so far
2100 * (in which case we copy both chunks), or if we have
2101 * Meta characters.
2103 char *dst;
2104 int icopy, ncopy;
2106 dst = tryalloced = zalloc(unmetalen + unmetalenp);
2108 if (needfullpath) {
2109 /* loop twice, copy path buffer first time */
2110 ptr = pathbuf;
2111 ncopy = unmetalenp;
2112 } else {
2113 /* just loop once, copy string with unmetafication */
2114 ptr = string;
2115 ncopy = unmetalen;
2117 for (icopy = 0; icopy < 2; icopy++) {
2118 for (i = 0; i < ncopy; i++) {
2119 if (*ptr == Meta) {
2120 ptr++;
2121 *dst++ = *ptr++ ^ 32;
2122 } else {
2123 *dst++ = *ptr++;
2126 if (!needfullpath)
2127 break;
2128 /* next time append test string to path so far */
2129 ptr = string;
2130 ncopy = unmetalen;
2133 if (needfullpath) {
2134 patinstart = tryalloced + unmetalenp;
2135 patinpath = tryalloced;
2136 } else {
2137 patinstart = tryalloced;
2138 patinpath = NULL;
2140 stringlen = unmetalen;
2141 } else {
2142 patinstart = string;
2143 tryalloced = patinpath = NULL;
2146 patinend = patinstart + stringlen;
2148 * From now on we do not require NULL termination of
2149 * the test string. There should also be no more references
2150 * to the variable string.
2153 if (prog->flags & (PAT_PURES|PAT_ANY)) {
2155 * Either we are testing against a pure string,
2156 * or we can match anything at all.
2158 int ret;
2159 if (prog->flags & PAT_ANY) {
2161 * Optimisation for a single "*": always matches
2162 * (except for no_glob_dots, see below).
2164 ret = 1;
2165 } else {
2167 * Testing a pure string. See if initial
2168 * components match.
2170 int lendiff = stringlen - prog->patmlen;
2171 if (lendiff < 0) {
2172 /* No, the pattern string is too long. */
2173 ret = 0;
2174 } else if (!memcmp(progstr, patinstart, prog->patmlen)) {
2176 * Initial component matches. Matches either
2177 * if lengths are the same or we are not anchored
2178 * to the end of the string.
2180 ret = !lendiff || (prog->flags & PAT_NOANCH);
2181 } else {
2182 /* No match. */
2183 ret = 0;
2186 if (ret) {
2188 * For files, we won't match initial "."s unless
2189 * glob_dots is set.
2191 if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
2192 ret = 0;
2193 } else {
2195 * Remember the length in case used for ${..#..} etc.
2196 * In this case, we didn't unmetafy the string.
2198 patinlen = (int)prog->patmlen;
2199 /* if matching files, must update globbing flags */
2200 patglobflags = prog->globend;
2202 if ((patglobflags & GF_MATCHREF) &&
2203 !(patflags & PAT_FILE)) {
2204 char *str = ztrduppfx(patinstart, patinlen);
2205 int mlen;
2208 * Count the characters. We're not using CHARSUB()
2209 * because the string is still metafied.
2211 MB_METACHARINIT();
2212 mlen = MB_METASTRLEN2END(patinstart, 0,
2213 patinstart + patinlen);
2215 setsparam("MATCH", str);
2216 setiparam("MBEGIN",
2217 (zlong)(patoffset + !isset(KSHARRAYS)));
2218 setiparam("MEND",
2219 (zlong)(mlen + patoffset +
2220 !isset(KSHARRAYS) - 1));
2225 if (tryalloced)
2226 zfree(tryalloced, unmetalen + unmetalenp);
2228 return ret;
2229 } else {
2230 int q = queue_signal_level();
2233 * Test for a `must match' string, unless we're scanning for a match
2234 * in which case we don't need to do this each time.
2236 ret = 1;
2237 if (!(prog->flags & PAT_SCAN) && prog->mustoff)
2239 char *testptr; /* start pointer into test string */
2240 char *teststop; /* last point from which we can match */
2241 char *patptr = (char *)prog + prog->mustoff;
2242 int patlen = prog->patmlen;
2243 int found = 0;
2245 if (patlen > stringlen) {
2246 /* Too long, can't match. */
2247 ret = 0;
2248 } else {
2249 teststop = patinend - patlen;
2251 for (testptr = patinstart; testptr <= teststop; testptr++)
2253 if (!memcmp(testptr, patptr, patlen)) {
2254 found = 1;
2255 break;
2259 if (!found)
2260 ret = 0;
2263 if (!ret) {
2264 if (tryalloced)
2265 zfree(tryalloced, unmetalen + unmetalenp);
2266 return 0;
2269 patglobflags = prog->globflags;
2270 if (!(patflags & PAT_FILE)) {
2271 forceerrs = -1;
2272 errsfound = 0;
2274 globdots = !(patflags & PAT_NOGLD);
2275 parsfound = 0;
2277 patinput = patinstart;
2279 dont_queue_signals();
2281 if (patmatch((Upat)progstr)) {
2283 * we were lazy and didn't save the globflags if an exclusion
2284 * failed, so set it now
2286 patglobflags = prog->globend;
2289 * Record length of successful match, including Meta
2290 * characters. Do it here so that patmatchlen() can return
2291 * it even if we delete the pattern strings.
2293 patinlen = patinput - patinstart;
2295 * Optimization: if we didn't find any Meta characters
2296 * to begin with, we don't need to look for them now.
2298 if (unmetalen != origlen) {
2299 for (ptr = patinstart; ptr < patinput; ptr++)
2300 if (imeta(*ptr))
2301 patinlen++;
2305 * Should we clear backreferences and matches on a failed
2306 * match?
2308 if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
2310 * m flag: for global match. This carries no overhead
2311 * in the pattern matching part.
2313 * Remember the test pattern is already unmetafied.
2315 char *str;
2316 int mlen = CHARSUB(patinstart, patinput);
2318 str = metafy(patinstart, patinput - patinstart, META_DUP);
2319 setsparam("MATCH", str);
2320 setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
2321 setiparam("MEND",
2322 (zlong)(mlen + patoffset +
2323 !isset(KSHARRAYS) - 1));
2325 if (prog->patnpar && nump) {
2327 * b flag: for backreferences using parentheses. Reported
2328 * directly.
2330 *nump = prog->patnpar;
2332 sp = patbeginp;
2333 ep = patendp;
2335 for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
2336 if (parsfound & (1 << i)) {
2337 if (begp)
2338 *begp++ = CHARSUB(patinstart, *sp) + patoffset;
2339 if (endp)
2340 *endp++ = CHARSUB(patinstart, *ep) + patoffset
2341 - 1;
2342 } else {
2343 if (begp)
2344 *begp++ = -1;
2345 if (endp)
2346 *endp++ = -1;
2349 sp++;
2350 ep++;
2352 } else if (prog->patnpar && !(patflags & PAT_FILE)) {
2354 * b flag: for backreferences using parentheses.
2356 int palen = prog->patnpar+1;
2357 char **matcharr, **mbeginarr, **mendarr;
2358 char numbuf[DIGBUFSIZE];
2360 matcharr = zshcalloc(palen*sizeof(char *));
2361 mbeginarr = zshcalloc(palen*sizeof(char *));
2362 mendarr = zshcalloc(palen*sizeof(char *));
2364 sp = patbeginp;
2365 ep = patendp;
2367 for (i = 0; i < prog->patnpar; i++) {
2368 if (parsfound & (1 << i)) {
2369 matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
2371 * mbegin and mend give indexes into the string
2372 * in the standard notation, i.e. respecting
2373 * KSHARRAYS, and with the end index giving
2374 * the last character, not one beyond.
2375 * For example, foo=foo; [[ $foo = (f)oo ]] gives
2376 * (without KSHARRAYS) indexes 1 and 1, which
2377 * corresponds to indexing as ${foo[1,1]}.
2379 sprintf(numbuf, "%ld",
2380 (long)(CHARSUB(patinstart, *sp) +
2381 patoffset +
2382 !isset(KSHARRAYS)));
2383 mbeginarr[i] = ztrdup(numbuf);
2384 sprintf(numbuf, "%ld",
2385 (long)(CHARSUB(patinstart, *ep) +
2386 patoffset +
2387 !isset(KSHARRAYS) - 1));
2388 mendarr[i] = ztrdup(numbuf);
2389 } else {
2390 /* Pattern wasn't set: either it was in an
2391 * unmatched branch, or a hashed parenthesis
2392 * that didn't match at all.
2394 matcharr[i] = ztrdup("");
2395 mbeginarr[i] = ztrdup("-1");
2396 mendarr[i] = ztrdup("-1");
2398 sp++;
2399 ep++;
2401 setaparam("match", matcharr);
2402 setaparam("mbegin", mbeginarr);
2403 setaparam("mend", mendarr);
2406 if (!nump && endp) {
2408 * We just need the overall end position.
2410 *endp = CHARSUB(patinstart, patinput) + patoffset;
2413 ret = 1;
2414 } else
2415 ret = 0;
2417 restore_queue_signals(q);
2419 if (tryalloced)
2420 zfree(tryalloced, unmetalen + unmetalenp);
2422 return ret;
2427 * Return length of previous succesful match. This is
2428 * in metafied bytes, i.e. includes a count of Meta characters.
2429 * Unusual and futile attempt at modular encapsulation.
2432 /**/
2434 patmatchlen(void)
2436 return patinlen;
2440 * Match literal characters with case insensitivity test: the first
2441 * comes from the input string, the second the current pattern.
2443 #ifdef MULTIBYTE_SUPPORT
2444 #define ISUPPER(x) iswupper(x)
2445 #define ISLOWER(x) iswlower(x)
2446 #define TOUPPER(x) towupper(x)
2447 #define TOLOWER(x) towlower(x)
2448 #define ISDIGIT(x) iswdigit(x)
2449 #else
2450 #define ISUPPER(x) isupper(x)
2451 #define ISLOWER(x) islower(x)
2452 #define TOUPPER(x) toupper(x)
2453 #define TOLOWER(x) tolower(x)
2454 #define ISDIGIT(x) idigit(x)
2455 #endif
2456 #define CHARMATCH(chin, chpa) (chin == chpa || \
2457 ((patglobflags & GF_IGNCASE) ? \
2458 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \
2459 (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \
2460 (patglobflags & GF_LCMATCHUC) ? \
2461 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0))
2464 * The same but caching an expression from the first argument,
2465 * Requires local charmatch_cache definition.
2467 #define CHARMATCH_EXPR(expr, chpa) \
2468 (charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))
2471 * exactpos is used to remember how far down an exact string we have
2472 * matched, if we are doing approximation and can therefore redo from
2473 * the same point; we never need to otherwise.
2475 * exactend is a pointer to the end of the string, which isn't
2476 * null-terminated.
2478 static char *exactpos, *exactend;
2481 * Main matching routine.
2483 * Testing the tail end of a match is usually done by recursion, but
2484 * we try to eliminate that in favour of looping for simple cases.
2487 /**/
2488 static int
2489 patmatch(Upat prog)
2491 /* Current and next nodes */
2492 Upat scan = prog, next, opnd;
2493 char *start, *save, *chrop, *chrend, *compend;
2494 int savglobflags, op, no, min, fail = 0, saverrsfound;
2495 zrange_t from, to, comp;
2496 patint_t nextch;
2498 while (scan && !errflag) {
2499 next = PATNEXT(scan);
2501 if (!globdots && P_NOTDOT(scan) && patinput == patinstart &&
2502 patinput < patinend && *patinput == '.')
2503 return 0;
2505 switch (P_OP(scan)) {
2506 case P_ANY:
2507 if (patinput == patinend)
2508 fail = 1;
2509 else
2510 CHARINC(patinput, patinend);
2511 break;
2512 case P_EXACTLY:
2514 * acts as nothing if *chrop is null: this is used by
2515 * approx code.
2517 if (exactpos) {
2518 chrop = exactpos;
2519 chrend = exactend;
2520 } else {
2521 chrop = P_LS_STR(scan);
2522 chrend = chrop + P_LS_LEN(scan);
2524 exactpos = NULL;
2525 while (chrop < chrend && patinput < patinend) {
2526 char *savpatinput = patinput;
2527 char *savchrop = chrop;
2528 int badin = 0, badpa = 0;
2530 * Care with character matching:
2531 * We do need to convert the character to wide
2532 * representation if possible, because we may need
2533 * to do case transformation. However, we should
2534 * be careful in case one, but not the other, wasn't
2535 * representable in the current locale---in that
2536 * case they don't match even if the returned
2537 * values (one properly converted, one raw) are
2538 * the same.
2540 patint_t chin = CHARREFINC(patinput, patinend, &badin);
2541 patint_t chpa = CHARREFINC(chrop, chrend, &badpa);
2542 if (!CHARMATCH(chin, chpa) || badin != badpa) {
2543 fail = 1;
2544 patinput = savpatinput;
2545 chrop = savchrop;
2546 break;
2549 if (chrop < chrend) {
2550 exactpos = chrop;
2551 exactend = chrend;
2552 fail = 1;
2554 break;
2555 case P_ANYOF:
2556 case P_ANYBUT:
2557 if (patinput == patinend)
2558 fail = 1;
2559 else {
2560 #ifdef MULTIBYTE_SUPPORT
2561 wchar_t cr = CHARREF(patinput, patinend);
2562 char *scanop = (char *)P_OPERAND(scan);
2563 if (patglobflags & GF_MULTIBYTE) {
2564 if (mb_patmatchrange(scanop, cr, NULL, NULL) ^
2565 (P_OP(scan) == P_ANYOF))
2566 fail = 1;
2567 else
2568 CHARINC(patinput, patinend);
2569 } else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^
2570 (P_OP(scan) == P_ANYOF))
2571 fail = 1;
2572 else
2573 CHARINC(patinput, patinend);
2574 #else
2575 if (patmatchrange((char *)P_OPERAND(scan),
2576 CHARREF(patinput, patinend), NULL, NULL) ^
2577 (P_OP(scan) == P_ANYOF))
2578 fail = 1;
2579 else
2580 CHARINC(patinput, patinend);
2581 #endif
2583 break;
2584 case P_NUMRNG:
2585 case P_NUMFROM:
2586 case P_NUMTO:
2588 * To do this properly, we really have to treat numbers as
2589 * closures: that's so things like <1-1000>33 will
2590 * match 633 (they didn't up to 3.1.6). To avoid making this
2591 * too inefficient, we see if there's an exact match next:
2592 * if there is, and it's not a digit, we return 1 after
2593 * the first attempt.
2595 op = P_OP(scan);
2596 start = (char *)P_OPERAND(scan);
2597 from = to = 0;
2598 if (op != P_NUMTO) {
2599 #ifdef ZSH_64_BIT_TYPE
2600 /* We can't rely on pointer alignment being good enough. */
2601 memcpy((char *)&from, start, sizeof(zrange_t));
2602 #else
2603 from = *((zrange_t *) start);
2604 #endif
2605 start += sizeof(zrange_t);
2607 if (op != P_NUMFROM) {
2608 #ifdef ZSH_64_BIT_TYPE
2609 memcpy((char *)&to, start, sizeof(zrange_t));
2610 #else
2611 to = *((zrange_t *) start);
2612 #endif
2614 start = compend = patinput;
2615 comp = 0;
2616 while (patinput < patinend && idigit(*patinput)) {
2617 if (comp)
2618 comp *= 10;
2619 comp += *patinput - '0';
2620 patinput++;
2621 compend++;
2623 if (comp & ((zrange_t)1 << (sizeof(comp)*8 -
2624 #ifdef ZRANGE_T_IS_SIGNED
2626 #else
2628 #endif
2629 ))) {
2631 * Out of range (allowing for signedness, which
2632 * we need if we are using zlongs).
2633 * This is as far as we can go.
2634 * If we're doing a range "from", skip all the
2635 * remaining numbers. Otherwise, we can't
2636 * match beyond the previous point anyway.
2637 * Leave the pointer to the last calculated
2638 * position (compend) where it was before.
2640 if (op == P_NUMFROM) {
2641 while (patinput < patinend && idigit(*patinput))
2642 patinput++;
2646 save = patinput;
2647 no = 0;
2648 while (patinput > start) {
2649 /* if already too small, no power on earth can save it */
2650 if (comp < from && patinput <= compend)
2651 break;
2652 if ((op == P_NUMFROM || comp <= to) && patmatch(next)) {
2653 return 1;
2655 if (!no && P_OP(next) == P_EXACTLY &&
2656 (!P_LS_LEN(next) ||
2657 !idigit(STOUC(*P_LS_STR(next)))) &&
2658 !(patglobflags & 0xff))
2659 return 0;
2660 patinput = --save;
2661 no++;
2663 * With a range start and an unrepresentable test
2664 * number, we just back down the test string without
2665 * changing the number until we get to a representable
2666 * one.
2668 if (patinput < compend)
2669 comp /= 10;
2671 patinput = start;
2672 fail = 1;
2673 break;
2674 case P_NUMANY:
2675 /* This is <->: any old set of digits, don't bother comparing */
2676 start = patinput;
2677 while (patinput < patinend && idigit(*patinput))
2678 patinput++;
2679 save = patinput;
2680 no = 0;
2681 while (patinput > start) {
2682 if (patmatch(next))
2683 return 1;
2684 if (!no && P_OP(next) == P_EXACTLY &&
2685 (!P_LS_LEN(next) ||
2686 !idigit(*P_LS_STR(next))) &&
2687 !(patglobflags & 0xff))
2688 return 0;
2689 patinput = --save;
2690 no++;
2692 patinput = start;
2693 fail = 1;
2694 break;
2695 case P_NOTHING:
2696 break;
2697 case P_BACK:
2698 break;
2699 case P_GFLAGS:
2700 patglobflags = P_OPERAND(scan)->l;
2701 break;
2702 case P_OPEN:
2703 case P_OPEN+1:
2704 case P_OPEN+2:
2705 case P_OPEN+3:
2706 case P_OPEN+4:
2707 case P_OPEN+5:
2708 case P_OPEN+6:
2709 case P_OPEN+7:
2710 case P_OPEN+8:
2711 case P_OPEN+9:
2712 no = P_OP(scan) - P_OPEN;
2713 save = patinput;
2715 if (patmatch(next)) {
2717 * Don't set patbeginp if some later invocation of
2718 * the same parentheses already has.
2720 if (no && !(parsfound & (1 << (no - 1)))) {
2721 patbeginp[no-1] = save;
2722 parsfound |= 1 << (no - 1);
2724 return 1;
2725 } else
2726 return 0;
2727 break;
2728 case P_CLOSE:
2729 case P_CLOSE+1:
2730 case P_CLOSE+2:
2731 case P_CLOSE+3:
2732 case P_CLOSE+4:
2733 case P_CLOSE+5:
2734 case P_CLOSE+6:
2735 case P_CLOSE+7:
2736 case P_CLOSE+8:
2737 case P_CLOSE+9:
2738 no = P_OP(scan) - P_CLOSE;
2739 save = patinput;
2741 if (patmatch(next)) {
2742 if (no && !(parsfound & (1 << (no + 15)))) {
2743 patendp[no-1] = save;
2744 parsfound |= 1 << (no + 15);
2746 return 1;
2747 } else
2748 return 0;
2749 break;
2750 case P_EXCSYNC:
2751 /* See the P_EXCLUDE code below for where syncptr comes from */
2753 unsigned char *syncptr;
2754 Upat after;
2755 after = P_OPERAND(scan);
2756 DPUTS(!P_ISEXCLUDE(after),
2757 "BUG: EXCSYNC not followed by EXCLUDE.");
2758 DPUTS(!P_OPERAND(after)->p,
2759 "BUG: EXCSYNC not handled by EXCLUDE");
2760 syncptr = P_OPERAND(after)->p + (patinput - patinstart);
2762 * If we already matched from here, this time we fail.
2763 * See WBRANCH code for story about error count.
2765 if (*syncptr && errsfound + 1 >= *syncptr)
2766 return 0;
2768 * Else record that we (possibly) matched this time.
2769 * No harm if we don't: then the previous test will just
2770 * short cut the attempted match that is bound to fail.
2771 * We never try to exclude something that has already
2772 * failed anyway.
2774 *syncptr = errsfound + 1;
2776 break;
2777 case P_EXCEND:
2779 * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
2780 * branch. Actually, we don't bother following it: all we
2781 * need to know is that we successfully matched so far up
2782 * to the end of the asserted pattern; the endpoint
2783 * in the target string is nulled out.
2785 if (!(fail = (patinput < patinend)))
2786 return 1;
2787 break;
2788 case P_BRANCH:
2789 case P_WBRANCH:
2790 /* P_EXCLUDE shouldn't occur without a P_BRANCH */
2791 if (!P_ISBRANCH(next)) {
2792 /* no choice, avoid recursion */
2793 DPUTS(P_OP(scan) == P_WBRANCH,
2794 "BUG: WBRANCH with no alternative.");
2795 next = P_OPERAND(scan);
2796 } else {
2797 do {
2798 save = patinput;
2799 savglobflags = patglobflags;
2800 saverrsfound = errsfound;
2801 if (P_ISEXCLUDE(next)) {
2803 * The strategy is to test the asserted pattern,
2804 * recording via P_EXCSYNC how far the part to
2805 * be excluded matched. We then set the
2806 * length of the test string to that
2807 * point and see if the exclusion as far as
2808 * P_EXCEND also matches that string.
2809 * We need to keep testing the asserted pattern
2810 * by backtracking, since the first attempt
2811 * may be excluded while a later attempt may not.
2812 * For this we keep a pointer just after
2813 * the P_EXCLUDE which is tested by the P_EXCSYNC
2814 * to see if we matched there last time, in which
2815 * case we fail. If there is nothing to backtrack
2816 * over, that doesn't matter: we should fail anyway.
2817 * The pointer also tells us where the asserted
2818 * pattern matched for use by the exclusion.
2820 * It's hard to allocate space for this
2821 * beforehand since we may need to do it
2822 * recursively.
2824 * P.S. in case you were wondering, this code
2825 * is horrible.
2827 Upat syncstrp;
2828 char *origpatinend;
2829 unsigned char *oldsyncstr;
2830 char *matchpt = NULL;
2831 int ret, savglobdots, matchederrs = 0;
2832 int savparsfound = parsfound;
2833 DPUTS(P_OP(scan) == P_WBRANCH,
2834 "BUG: excluded WBRANCH");
2835 syncstrp = P_OPERAND(next);
2837 * Unlike WBRANCH, each test at the same exclude
2838 * sync point (due to an external loop) is separate,
2839 * i.e testing (foo~bar)# is no different from
2840 * (foo~bar)(foo~bar)... from the exclusion point
2841 * of view, so we use a different sync string.
2843 oldsyncstr = syncstrp->p;
2844 syncstrp->p = (unsigned char *)
2845 zshcalloc((patinend - patinstart) + 1);
2846 origpatinend = patinend;
2847 while ((ret = patmatch(P_OPERAND(scan)))) {
2848 unsigned char *syncpt;
2849 char *savpatinstart;
2850 int savforce = forceerrs;
2851 int savpatflags = patflags, synclen;
2852 forceerrs = -1;
2853 savglobdots = globdots;
2854 matchederrs = errsfound;
2855 matchpt = patinput; /* may not be end */
2856 globdots = 1; /* OK to match . first */
2857 /* Find the point where the scan
2858 * matched the part to be excluded: because
2859 * of backtracking, the one
2860 * most recently matched will be the first.
2861 * (Luckily, backtracking is done after all
2862 * possibilities for approximation have been
2863 * checked.)
2865 for (syncpt = syncstrp->p; !*syncpt; syncpt++)
2867 synclen = syncpt - syncstrp->p;
2868 if (patinstart + synclen != patinend) {
2870 * Temporarily mark the string as
2871 * ending at this point.
2873 DPUTS(patinstart + synclen > matchpt,
2874 "BUG: EXCSYNC failed");
2876 patinend = patinstart + synclen;
2878 * If this isn't really the end of the string,
2879 * remember this for the (#e) assertion.
2881 patflags |= PAT_NOTEND;
2883 savpatinstart = patinstart;
2884 next = PATNEXT(scan);
2885 while (next && P_ISEXCLUDE(next)) {
2886 patinput = save;
2888 * turn off approximations in exclusions:
2889 * note we keep remaining patglobflags
2890 * set by asserted branch (or previous
2891 * excluded branches, for consistency).
2893 patglobflags &= ~0xff;
2894 errsfound = 0;
2895 opnd = P_OPERAND(next) + 1;
2896 if (P_OP(next) == P_EXCLUDP && patinpath) {
2898 * Top level exclusion with a file,
2899 * applies to whole path so add the
2900 * segments already matched.
2901 * We copied these in front of the
2902 * test pattern, so patinend doesn't
2903 * need moving.
2905 DPUTS(patinput != patinstart,
2906 "BUG: not at start excluding path");
2907 patinput = patinstart = patinpath;
2909 if (patmatch(opnd)) {
2910 ret = 0;
2912 * Another subtlety: if we exclude the
2913 * match, any parentheses just found
2914 * become invalidated.
2916 parsfound = savparsfound;
2918 if (patinpath) {
2919 patinput = savpatinstart +
2920 (patinput - patinstart);
2921 patinstart = savpatinstart;
2923 if (!ret)
2924 break;
2925 next = PATNEXT(next);
2928 * Restore original end position.
2930 patinend = origpatinend;
2931 patflags = savpatflags;
2932 globdots = savglobdots;
2933 forceerrs = savforce;
2934 if (ret)
2935 break;
2936 patinput = save;
2937 patglobflags = savglobflags;
2938 errsfound = saverrsfound;
2940 zfree((char *)syncstrp->p,
2941 (patinend - patinstart) + 1);
2942 syncstrp->p = oldsyncstr;
2943 if (ret) {
2944 patinput = matchpt;
2945 errsfound = matchederrs;
2946 return 1;
2948 while ((scan = PATNEXT(scan)) &&
2949 P_ISEXCLUDE(scan))
2951 } else {
2952 int ret = 1, pfree = 0;
2953 Upat ptrp = NULL;
2954 unsigned char *ptr;
2955 if (P_OP(scan) == P_WBRANCH) {
2957 * This is where we make sure that we are not
2958 * repeatedly matching zero-length strings in
2959 * a closure, which would cause an infinite loop,
2960 * and also remove exponential behaviour in
2961 * backtracking nested closures.
2962 * The P_WBRANCH operator leaves a space for a
2963 * uchar *, initialized to NULL, which is
2964 * turned into a string the same length as the
2965 * target string. Every time we match from a
2966 * particular point in the target string, we
2967 * stick a 1 at the corresponding point here.
2968 * If we come round to the same branch again, and
2969 * there is already a 1, then the test fails.
2971 opnd = P_OPERAND(scan);
2972 ptrp = opnd++;
2973 if (!ptrp->p) {
2974 ptrp->p = (unsigned char *)
2975 zshcalloc((patinend - patinstart) + 1);
2976 pfree = 1;
2978 ptr = ptrp->p + (patinput - patinstart);
2981 * Without approximation, this is just a
2982 * single bit test. With approximation, we
2983 * need to know how many errors there were
2984 * last time we made the test. If errsfound
2985 * is now smaller than it was, hence we can
2986 * make more approximations in the remaining
2987 * code, we continue with the test.
2988 * (This is why the max number of errors is
2989 * 254, not 255.)
2991 if (*ptr && errsfound + 1 >= *ptr)
2992 ret = 0;
2993 *ptr = errsfound + 1;
2994 } else
2995 opnd = P_OPERAND(scan);
2996 if (ret)
2997 ret = patmatch(opnd);
2998 if (pfree) {
2999 zfree((char *)ptrp->p,
3000 (patinend - patinstart) + 1);
3001 ptrp->p = NULL;
3003 if (ret)
3004 return 1;
3005 scan = PATNEXT(scan);
3007 patinput = save;
3008 patglobflags = savglobflags;
3009 errsfound = saverrsfound;
3010 DPUTS(P_OP(scan) == P_WBRANCH,
3011 "BUG: WBRANCH not first choice.");
3012 next = PATNEXT(scan);
3013 } while (scan && P_ISBRANCH(scan));
3014 return 0;
3016 break;
3017 case P_STAR:
3018 /* Handle specially for speed, although really P_ONEHASH+P_ANY */
3019 while (P_OP(next) == P_STAR) {
3021 * If there's another * following we can optimise it
3022 * out. Chains of *'s can give pathologically bad
3023 * performance.
3025 scan = next;
3026 next = PATNEXT(scan);
3028 /*FALLTHROUGH*/
3029 case P_ONEHASH:
3030 case P_TWOHASH:
3032 * This is just simple cases, matching one character.
3033 * With approximations, we still handle * this way, since
3034 * no approximation is ever necessary, but other closures
3035 * are handled by the more complicated branching method
3037 op = P_OP(scan);
3038 /* Note that no counts possibly metafied characters */
3039 start = patinput;
3041 char *lastcharstart;
3043 * Array to record the start of characters for
3044 * backtracking.
3046 VARARR(char, charstart, patinend-patinput);
3047 memset(charstart, 0, patinend-patinput);
3049 if (op == P_STAR) {
3050 for (no = 0; patinput < patinend;
3051 CHARINC(patinput, patinend))
3053 charstart[patinput-start] = 1;
3054 no++;
3056 /* simple optimization for reasonably common case */
3057 if (P_OP(next) == P_END)
3058 return 1;
3059 } else {
3060 DPUTS(patglobflags & 0xff,
3061 "BUG: wrong backtracking with approximation.");
3062 if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
3063 patinput == patinstart && patinput < patinend &&
3064 CHARREF(patinput, patinend) == ZWC('.'))
3065 return 0;
3066 no = patrepeat(P_OPERAND(scan), charstart);
3068 min = (op == P_TWOHASH) ? 1 : 0;
3070 * Lookahead to avoid useless matches. This is not possible
3071 * with approximation.
3073 if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
3074 !(patglobflags & 0xff)) {
3075 char *nextop = P_LS_STR(next);
3076 #ifdef MULTIBYTE_SUPPORT
3077 /* else second argument of CHARREF isn't used */
3078 int nextlen = P_LS_LEN(next);
3079 #endif
3081 * If that P_EXACTLY is last (common in simple patterns,
3082 * such as *.c), then it can be only be matched at one
3083 * point in the test string, so record that.
3085 if (P_OP(PATNEXT(next)) == P_END &&
3086 !(patflags & PAT_NOANCH)) {
3087 int ptlen = patinend - patinput;
3088 int lenmatch = patinend -
3089 (min ? CHARNEXT(start, patinend) : start);
3090 /* Are we in the right range? */
3091 if (P_LS_LEN(next) > lenmatch ||
3092 P_LS_LEN(next) < ptlen)
3093 return 0;
3094 /* Yes, just position appropriately and test. */
3095 patinput += ptlen - P_LS_LEN(next);
3097 * Here we will need to be careful that patinput is not
3098 * in the middle of a multibyte character.
3100 /* Continue loop with P_EXACTLY test. */
3101 break;
3103 nextch = CHARREF(nextop, nextop + nextlen);
3104 } else
3105 nextch = PEOF;
3106 savglobflags = patglobflags;
3107 saverrsfound = errsfound;
3108 lastcharstart = charstart + (patinput - start);
3109 if (no >= min) {
3110 for (;;) {
3111 patint_t charmatch_cache;
3112 if (nextch == PEOF ||
3113 (patinput < patinend &&
3114 CHARMATCH_EXPR(CHARREF(patinput, patinend),
3115 nextch))) {
3116 if (patmatch(next))
3117 return 1;
3119 if (--no < min)
3120 break;
3121 /* find start of previous full character */
3122 while (!*--lastcharstart)
3123 DPUTS(lastcharstart < charstart,
3124 "lastcharstart invalid");
3125 patinput = start + (lastcharstart-charstart);
3126 patglobflags = savglobflags;
3127 errsfound = saverrsfound;
3132 * As with branches, the patmatch(next) stuff for *
3133 * handles approximation, so we don't need to try
3134 * anything here.
3136 return 0;
3137 case P_ISSTART:
3138 if (patinput != patinstart || (patflags & PAT_NOTSTART))
3139 fail = 1;
3140 break;
3141 case P_ISEND:
3142 if (patinput < patinend || (patflags & PAT_NOTEND))
3143 fail = 1;
3144 break;
3145 case P_COUNTSTART:
3148 * Save and restore the current count and the
3149 * start pointer in case the pattern has been
3150 * executed by a previous repetition of a
3151 * closure.
3153 long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l;
3154 long savecount = *curptr;
3155 unsigned char *saveptr = scan[P_CT_PTR].p;
3156 int ret;
3158 *curptr = 0L;
3159 ret = patmatch(P_OPERAND(scan));
3160 *curptr = savecount;
3161 scan[P_CT_PTR].p = saveptr;
3162 return ret;
3164 case P_COUNT:
3166 /* (#cN,M): execution is relatively straightforward */
3167 long cur = scan[P_CT_CURRENT].l;
3168 long min = scan[P_CT_MIN].l;
3169 long max = scan[P_CT_MAX].l;
3171 if (cur && cur >= min &&
3172 (unsigned char *)patinput == scan[P_CT_PTR].p) {
3174 * Not at the first attempt to match so
3175 * the previous attempt managed zero length.
3176 * We can do this indefinitely so there's
3177 * no point in going on. Simply try to
3178 * match the remainder of the pattern.
3180 return patmatch(next);
3182 scan[P_CT_PTR].p = (unsigned char *)patinput;
3184 if (max < 0 || cur < max) {
3185 char *patinput_thistime = patinput;
3186 scan[P_CT_CURRENT].l = cur + 1;
3187 if (patmatch(scan + P_CT_OPERAND))
3188 return 1;
3189 patinput = patinput_thistime;
3191 if (cur < min)
3192 return 0;
3193 return patmatch(next);
3195 case P_END:
3196 if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH))))
3197 return 1;
3198 break;
3199 #ifdef DEBUG
3200 default:
3201 dputs("BUG: bad operand in patmatch.");
3202 return 0;
3203 break;
3204 #endif
3207 if (fail) {
3208 if (errsfound < (patglobflags & 0xff) &&
3209 (forceerrs == -1 || errsfound < forceerrs)) {
3211 * Approximation code. There are four possibilities
3213 * 1. omit character from input string
3214 * 2. transpose characters in input and pattern strings
3215 * 3. omit character in both input and pattern strings
3216 * 4. omit character from pattern string.
3218 * which we try in that order.
3220 * Of these, 2, 3 and 4 require an exact match string
3221 * (P_EXACTLY) while 1, 2 and 3 require that we not
3222 * have reached the end of the input string.
3224 * Note in each case after making the approximation we
3225 * need to retry the *same* pattern; this is what
3226 * requires exactpos, a slightly doleful way of
3227 * communicating with the exact character matcher.
3229 char *savexact = exactpos;
3230 save = patinput;
3231 savglobflags = patglobflags;
3232 saverrsfound = ++errsfound;
3233 fail = 0;
3235 DPUTS(P_OP(scan) != P_EXACTLY && exactpos,
3236 "BUG: non-exact match has set exactpos");
3238 /* Try omitting a character from the input string */
3239 if (patinput < patinend) {
3240 CHARINC(patinput, patinend);
3241 /* If we are not on an exact match, then this is
3242 * our last gasp effort, so we can optimize out
3243 * the recursive call.
3245 if (P_OP(scan) != P_EXACTLY)
3246 continue;
3247 if (patmatch(scan))
3248 return 1;
3251 if (P_OP(scan) == P_EXACTLY) {
3252 char *nextexact = savexact;
3253 DPUTS(!savexact,
3254 "BUG: exact match has not set exactpos");
3255 CHARINC(nextexact, exactend);
3257 if (save < patinend) {
3258 char *nextin = save;
3259 CHARINC(nextin, patinend);
3260 patglobflags = savglobflags;
3261 errsfound = saverrsfound;
3262 exactpos = savexact;
3265 * Try swapping two characters in patinput and
3266 * exactpos
3268 if (save < patinend && nextin < patinend &&
3269 nextexact < exactend) {
3270 patint_t cin0 = CHARREF(save, patinend);
3271 patint_t cpa0 = CHARREF(exactpos, exactend);
3272 patint_t cin1 = CHARREF(nextin, patinend);
3273 patint_t cpa1 = CHARREF(nextexact, exactend);
3275 if (CHARMATCH(cin0, cpa1) &&
3276 CHARMATCH(cin1, cpa0)) {
3277 patinput = nextin;
3278 CHARINC(patinput, patinend);
3279 exactpos = nextexact;
3280 CHARINC(exactpos, exactend);
3281 if (patmatch(scan))
3282 return 1;
3284 patglobflags = savglobflags;
3285 errsfound = saverrsfound;
3290 * Try moving up both strings.
3292 patinput = nextin;
3293 exactpos = nextexact;
3294 if (patmatch(scan))
3295 return 1;
3297 patinput = save;
3298 patglobflags = savglobflags;
3299 errsfound = saverrsfound;
3300 exactpos = savexact;
3303 DPUTS(exactpos == exactend, "approximating too far");
3305 * Try moving up the exact match pattern.
3306 * This must be the last attempt, so just loop
3307 * instead of calling recursively.
3309 CHARINC(exactpos, exactend);
3310 continue;
3313 exactpos = NULL;
3314 return 0;
3317 scan = next;
3320 return 0;
3324 /**/
3325 #ifdef MULTIBYTE_SUPPORT
3328 * See if character ch matches a pattern range specification.
3329 * The null-terminated specification is in range; the test
3330 * character is in ch.
3332 * indptr is used by completion matching, which is why this
3333 * function is exported. If indptr is not NULL we set *indptr
3334 * to the index of the character in the range string, adjusted
3335 * in the case of "A-B" ranges such that A would count as its
3336 * normal index (say IA), B would count as IA + (B-A), and any
3337 * character within the range as appropriate. We're not strictly
3338 * guaranteed this fits within a wint_t, but if this is Unicode
3339 * in 32 bits we have a fair amount of distance left over.
3341 * mtp is used in the same circumstances. *mtp returns the match type:
3342 * 0 for a standard character, else the PP_ index. It's not
3343 * useful if the match failed.
3346 /**/
3347 mod_export int
3348 mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
3350 wchar_t r1, r2;
3352 if (indptr)
3353 *indptr = 0;
3355 * Careful here: unlike other strings, range is a NULL-terminated,
3356 * metafied string, because we need to treat the Posix and hyphenated
3357 * ranges specially.
3359 while (*range) {
3360 if (imeta(STOUC(*range))) {
3361 int swtype = STOUC(*range++) - STOUC(Meta);
3362 if (mtp)
3363 *mtp = swtype;
3364 switch (swtype) {
3365 case 0:
3366 /* ordinary metafied character */
3367 range--;
3368 if (metacharinc(&range) == ch)
3369 return 1;
3370 break;
3371 case PP_ALPHA:
3372 if (iswalpha(ch))
3373 return 1;
3374 break;
3375 case PP_ALNUM:
3376 if (iswalnum(ch))
3377 return 1;
3378 break;
3379 case PP_ASCII:
3380 if ((ch & ~0x7f) == 0)
3381 return 1;
3382 break;
3383 case PP_BLANK:
3384 if (ch == L' ' || ch == L'\t')
3385 return 1;
3386 break;
3387 case PP_CNTRL:
3388 if (iswcntrl(ch))
3389 return 1;
3390 break;
3391 case PP_DIGIT:
3392 if (iswdigit(ch))
3393 return 1;
3394 break;
3395 case PP_GRAPH:
3396 if (iswgraph(ch))
3397 return 1;
3398 break;
3399 case PP_LOWER:
3400 if (iswlower(ch))
3401 return 1;
3402 break;
3403 case PP_PRINT:
3404 if (iswprint(ch))
3405 return 1;
3406 break;
3407 case PP_PUNCT:
3408 if (iswpunct(ch))
3409 return 1;
3410 break;
3411 case PP_SPACE:
3412 if (iswspace(ch))
3413 return 1;
3414 break;
3415 case PP_UPPER:
3416 if (iswupper(ch))
3417 return 1;
3418 break;
3419 case PP_XDIGIT:
3420 if (iswxdigit(ch))
3421 return 1;
3422 break;
3423 case PP_IDENT:
3424 if (wcsitype(ch, IIDENT))
3425 return 1;
3426 break;
3427 case PP_IFS:
3428 if (wcsitype(ch, ISEP))
3429 return 1;
3430 break;
3431 case PP_IFSSPACE:
3432 /* must be ASCII space character */
3433 if (ch < 128 && iwsep((int)ch))
3434 return 1;
3435 break;
3436 case PP_WORD:
3437 if (wcsitype(ch, IWORD))
3438 return 1;
3439 break;
3440 case PP_RANGE:
3441 r1 = metacharinc(&range);
3442 r2 = metacharinc(&range);
3443 if (r1 <= ch && ch <= r2) {
3444 if (indptr)
3445 *indptr += ch - r1;
3446 return 1;
3448 /* Careful not to screw up counting with bogus range */
3449 if (indptr && r1 < r2) {
3451 * This gets incremented again below to get
3452 * us past the range end. This is correct.
3454 *indptr += r2 - r1;
3456 break;
3457 case PP_UNKWN:
3458 DPUTS(1, "BUG: unknown posix range passed through.\n");
3459 break;
3460 default:
3461 DPUTS(1, "BUG: unknown metacharacter in range.");
3462 break;
3464 } else if (metacharinc(&range) == ch) {
3465 if (mtp)
3466 *mtp = 0;
3467 return 1;
3469 if (indptr)
3470 (*indptr)++;
3472 return 0;
3477 * This is effectively the reverse of mb_patmatchrange().
3478 * Given a range descriptor of the same form, and an index into it,
3479 * try to determine the character that is matched. If the index
3480 * points to a [:...:] generic style match, set chr to WEOF and
3481 * return the type in mtp instead. Return 1 if successful, 0 if
3482 * there was no corresponding index. Note all pointer arguments
3483 * must be non-null.
3486 /**/
3487 mod_export int
3488 mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
3490 wchar_t r1, r2, rchr;
3491 wint_t rdiff;
3493 *chr = WEOF;
3494 *mtp = 0;
3496 while (*range) {
3497 if (imeta(STOUC(*range))) {
3498 int swtype = STOUC(*range++) - STOUC(Meta);
3499 switch (swtype) {
3500 case 0:
3501 range--;
3502 rchr = metacharinc(&range);
3503 if (!ind) {
3504 *chr = (wint_t) rchr;
3505 return 1;
3507 break;
3509 case PP_ALPHA:
3510 case PP_ALNUM:
3511 case PP_ASCII:
3512 case PP_BLANK:
3513 case PP_CNTRL:
3514 case PP_DIGIT:
3515 case PP_GRAPH:
3516 case PP_LOWER:
3517 case PP_PRINT:
3518 case PP_PUNCT:
3519 case PP_SPACE:
3520 case PP_UPPER:
3521 case PP_XDIGIT:
3522 case PP_IDENT:
3523 case PP_IFS:
3524 case PP_IFSSPACE:
3525 case PP_WORD:
3526 if (!ind) {
3527 *mtp = swtype;
3528 return 1;
3530 break;
3532 case PP_RANGE:
3533 r1 = metacharinc(&range);
3534 r2 = metacharinc(&range);
3535 rdiff = (wint_t)r2 - (wint_t)r1;
3536 if (rdiff >= ind) {
3537 *chr = (wint_t)r1 + ind;
3538 return 1;
3540 /* note the extra decrement to ind below */
3541 ind -= rdiff;
3542 break;
3543 case PP_UNKWN:
3544 DPUTS(1, "BUG: unknown posix range passed through.\n");
3545 break;
3546 default:
3547 DPUTS(1, "BUG: unknown metacharacter in range.");
3548 break;
3550 } else {
3551 rchr = metacharinc(&range);
3552 if (!ind) {
3553 *chr = (wint_t)rchr;
3554 return 1;
3557 if (!ind--)
3558 break;
3561 /* No corresponding index. */
3562 return 0;
3565 /**/
3566 #endif /* MULTIBYTE_SUPPORT */
3569 * Identical function to mb_patmatchrange() above for single-byte
3570 * characters.
3573 /**/
3574 mod_export int
3575 patmatchrange(char *range, int ch, int *indptr, int *mtp)
3577 int r1, r2;
3579 if (indptr)
3580 *indptr = 0;
3582 * Careful here: unlike other strings, range is a NULL-terminated,
3583 * metafied string, because we need to treat the Posix and hyphenated
3584 * ranges specially.
3586 for (; *range; range++) {
3587 if (imeta(STOUC(*range))) {
3588 int swtype = STOUC(*range) - STOUC(Meta);
3589 if (mtp)
3590 *mtp = swtype;
3591 switch (swtype) {
3592 case 0:
3593 if (STOUC(*++range ^ 32) == ch)
3594 return 1;
3595 break;
3596 case PP_ALPHA:
3597 if (isalpha(ch))
3598 return 1;
3599 break;
3600 case PP_ALNUM:
3601 if (isalnum(ch))
3602 return 1;
3603 break;
3604 case PP_ASCII:
3605 if ((ch & ~0x7f) == 0)
3606 return 1;
3607 break;
3608 case PP_BLANK:
3609 if (ch == ' ' || ch == '\t')
3610 return 1;
3611 break;
3612 case PP_CNTRL:
3613 if (iscntrl(ch))
3614 return 1;
3615 break;
3616 case PP_DIGIT:
3617 if (isdigit(ch))
3618 return 1;
3619 break;
3620 case PP_GRAPH:
3621 if (isgraph(ch))
3622 return 1;
3623 break;
3624 case PP_LOWER:
3625 if (islower(ch))
3626 return 1;
3627 break;
3628 case PP_PRINT:
3629 if (ZISPRINT(ch))
3630 return 1;
3631 break;
3632 case PP_PUNCT:
3633 if (ispunct(ch))
3634 return 1;
3635 break;
3636 case PP_SPACE:
3637 if (isspace(ch))
3638 return 1;
3639 break;
3640 case PP_UPPER:
3641 if (isupper(ch))
3642 return 1;
3643 break;
3644 case PP_XDIGIT:
3645 if (isxdigit(ch))
3646 return 1;
3647 break;
3648 case PP_IDENT:
3649 if (iident(ch))
3650 return 1;
3651 break;
3652 case PP_IFS:
3653 if (isep(ch))
3654 return 1;
3655 break;
3656 case PP_IFSSPACE:
3657 if (iwsep(ch))
3658 return 1;
3659 break;
3660 case PP_WORD:
3661 if (iword(ch))
3662 return 1;
3663 break;
3664 case PP_RANGE:
3665 range++;
3666 r1 = STOUC(UNMETA(range));
3667 METACHARINC(range);
3668 r2 = STOUC(UNMETA(range));
3669 if (*range == Meta)
3670 range++;
3671 if (r1 <= ch && ch <= r2) {
3672 if (indptr)
3673 *indptr += ch - r1;
3674 return 1;
3676 if (indptr && r1 < r2)
3677 *indptr += r2 - r1;
3678 break;
3679 case PP_UNKWN:
3680 DPUTS(1, "BUG: unknown posix range passed through.\n");
3681 break;
3682 default:
3683 DPUTS(1, "BUG: unknown metacharacter in range.");
3684 break;
3686 } else if (STOUC(*range) == ch) {
3687 if (mtp)
3688 *mtp = 0;
3689 return 1;
3691 if (indptr)
3692 (*indptr)++;
3694 return 0;
3698 /**/
3699 #ifndef MULTIBYTE_SUPPORT
3702 * Identical function to mb_patmatchindex() above for single-byte
3703 * characters. Here -1 represents a character that needs a special type.
3705 * Unlike patmatchrange, we only need this in ZLE, which always
3706 * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
3707 * this function in that case.
3710 /**/
3711 mod_export int
3712 patmatchindex(char *range, int ind, int *chr, int *mtp)
3714 int r1, r2, rdiff, rchr;
3716 *chr = -1;
3717 *mtp = 0;
3719 for (; *range; range++) {
3720 if (imeta(STOUC(*range))) {
3721 int swtype = STOUC(*range) - STOUC(Meta);
3722 switch (swtype) {
3723 case 0:
3724 /* ordinary metafied character */
3725 rchr = STOUC(*++range) ^ 32;
3726 if (!ind) {
3727 *chr = rchr;
3728 return 1;
3730 break;
3732 case PP_ALPHA:
3733 case PP_ALNUM:
3734 case PP_ASCII:
3735 case PP_BLANK:
3736 case PP_CNTRL:
3737 case PP_DIGIT:
3738 case PP_GRAPH:
3739 case PP_LOWER:
3740 case PP_PRINT:
3741 case PP_PUNCT:
3742 case PP_SPACE:
3743 case PP_UPPER:
3744 case PP_XDIGIT:
3745 case PP_IDENT:
3746 case PP_IFS:
3747 case PP_IFSSPACE:
3748 case PP_WORD:
3749 if (!ind) {
3750 *mtp = swtype;
3751 return 1;
3753 break;
3755 case PP_RANGE:
3756 range++;
3757 r1 = STOUC(UNMETA(range));
3758 METACHARINC(range);
3759 r2 = STOUC(UNMETA(range));
3760 if (*range == Meta)
3761 range++;
3762 rdiff = r2 - r1;
3763 if (rdiff >= ind) {
3764 *chr = r1 + ind;
3765 return 1;
3767 /* note the extra decrement to ind below */
3768 ind -= rdiff;
3769 break;
3770 case PP_UNKWN:
3771 DPUTS(1, "BUG: unknown posix range passed through.\n");
3772 break;
3773 default:
3774 DPUTS(1, "BUG: unknown metacharacter in range.");
3775 break;
3777 } else {
3778 if (!ind) {
3779 *chr = STOUC(*range);
3780 return 1;
3783 if (!ind--)
3784 break;
3787 /* No corresponding index. */
3788 return 0;
3791 /**/
3792 #endif /* MULTIBYTE_SUPPORT */
3795 * Repeatedly match something simple and say how many times.
3796 * charstart is an array parallel to that starting at patinput
3797 * and records the start of (possibly multibyte) characters
3798 * to aid in later backtracking.
3801 /**/
3802 static int patrepeat(Upat p, char *charstart)
3804 int count = 0;
3805 patint_t tch, charmatch_cache;
3806 char *scan, *opnd;
3808 scan = patinput;
3809 opnd = (char *)P_OPERAND(p);
3811 switch(P_OP(p)) {
3812 #ifdef DEBUG
3813 case P_ANY:
3814 dputs("BUG: ?# did not get optimized to *");
3815 return 0;
3816 break;
3817 #endif
3818 case P_EXACTLY:
3819 DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
3820 tch = CHARREF(P_LS_STR(p), P_LS_STR(p) + P_LS_LEN(p));
3821 while (scan < patinend &&
3822 CHARMATCH_EXPR(CHARREF(scan, patinend), tch)) {
3823 charstart[scan-patinput] = 1;
3824 count++;
3825 CHARINC(scan, patinend);
3827 break;
3828 case P_ANYOF:
3829 case P_ANYBUT:
3830 while (scan < patinend) {
3831 #ifdef MULTIBYTE_SUPPORT
3832 wchar_t cr = CHARREF(scan, patinend);
3833 if (patglobflags & GF_MULTIBYTE) {
3834 if (mb_patmatchrange(opnd, cr, NULL, NULL) ^
3835 (P_OP(p) == P_ANYOF))
3836 break;
3837 } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
3838 (P_OP(p) == P_ANYOF))
3839 break;
3840 #else
3841 if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^
3842 (P_OP(p) == P_ANYOF))
3843 break;
3844 #endif
3845 charstart[scan-patinput] = 1;
3846 count++;
3847 CHARINC(scan, patinend);
3849 break;
3850 #ifdef DEBUG
3851 default:
3852 dputs("BUG: something very strange is happening in patrepeat");
3853 return 0;
3854 break;
3855 #endif
3858 patinput = scan;
3859 return count;
3862 /* Free a patprog. */
3864 /**/
3865 mod_export void
3866 freepatprog(Patprog prog)
3868 if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
3869 zfree(prog, prog->size);
3872 /* Disable or reenable a pattern character */
3874 /**/
3876 pat_enables(const char *cmd, char **patp, int enable)
3878 int ret = 0;
3879 const char **stringp;
3880 char *disp;
3882 if (!*patp) {
3883 int done = 0;
3884 for (stringp = zpc_strings, disp = zpc_disables;
3885 stringp < zpc_strings + ZPC_COUNT;
3886 stringp++, disp++) {
3887 if (!*stringp)
3888 continue;
3889 if (enable ? *disp : !*disp)
3890 continue;
3891 if (done)
3892 putc(' ', stdout);
3893 printf("'%s'", *stringp);
3894 done = 1;
3896 if (done)
3897 putc('\n', stdout);
3898 return 0;
3901 for (; *patp; patp++) {
3902 for (stringp = zpc_strings, disp = zpc_disables;
3903 stringp < zpc_strings + ZPC_COUNT;
3904 stringp++, disp++) {
3905 if (*stringp && !strcmp(*stringp, *patp)) {
3906 *disp = (char)!enable;
3907 break;
3910 if (stringp == zpc_strings + ZPC_COUNT) {
3911 zerrnam(cmd, "invalid pattern: %s", *patp);
3912 ret = 1;
3916 return ret;
3920 * Save the current state of pattern disables, returning the saved value.
3923 /**/
3924 unsigned int
3925 savepatterndisables(void)
3927 unsigned int disables, bit;
3928 char *disp;
3930 disables = 0;
3931 for (bit = 1, disp = zpc_disables;
3932 disp < zpc_disables + ZPC_COUNT;
3933 bit <<= 1, disp++) {
3934 if (*disp)
3935 disables |= bit;
3937 return disables;
3941 * Function scope saving pattern enables.
3944 /**/
3945 void
3946 startpatternscope(void)
3948 Zpc_disables_save newdis;
3950 newdis = (Zpc_disables_save)zalloc(sizeof(*newdis));
3951 newdis->next = zpc_disables_stack;
3952 newdis->disables = savepatterndisables();
3954 zpc_disables_stack = newdis;
3958 * Restore completely the state of pattern disables.
3961 /**/
3962 void
3963 restorepatterndisables(unsigned int disables)
3965 char *disp;
3966 unsigned int bit;
3968 for (bit = 1, disp = zpc_disables;
3969 disp < zpc_disables + ZPC_COUNT;
3970 bit <<= 1, disp++) {
3971 if (disables & bit)
3972 *disp = 1;
3973 else
3974 *disp = 0;
3979 * Function scope to restore pattern enables if localpatterns is turned on.
3982 /**/
3983 void
3984 endpatternscope(void)
3986 Zpc_disables_save olddis;
3988 olddis = zpc_disables_stack;
3989 zpc_disables_stack = olddis->next;
3991 if (isset(LOCALPATTERNS))
3992 restorepatterndisables(olddis->disables);
3994 zfree(olddis, sizeof(*olddis));
3997 /* Reinitialise pattern disables */
3999 /**/
4000 void
4001 clearpatterndisables(void)
4003 memset(zpc_disables, 0, ZPC_COUNT);
4007 /* Check to see if str is eligible for filename generation. */
4009 /**/
4010 mod_export int
4011 haswilds(char *str)
4013 char *start;
4015 /* `[' and `]' are legal even if bad patterns are usually not. */
4016 if ((*str == Inbrack || *str == Outbrack) && !str[1])
4017 return 0;
4019 /* If % is immediately followed by ?, then that ? is *
4020 * not treated as a wildcard. This is so you don't have *
4021 * to escape job references such as %?foo. */
4022 if (str[0] == '%' && str[1] == Quest)
4023 str[1] = '?';
4026 * Note that at this point zpc_special has not been set up.
4028 start = str;
4029 for (; *str; str++) {
4030 switch (*str) {
4031 case Inpar:
4032 if ((!isset(SHGLOB) && !zpc_disables[ZPC_INPAR]) ||
4033 (str > start && isset(KSHGLOB) &&
4034 ((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
4035 (str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
4036 (str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
4037 (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG]) ||
4038 (str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
4039 return 1;
4040 break;
4042 case Bar:
4043 if (!zpc_disables[ZPC_BAR])
4044 return 1;
4045 break;
4047 case Star:
4048 if (!zpc_disables[ZPC_STAR])
4049 return 1;
4050 break;
4052 case Inbrack:
4053 if (!zpc_disables[ZPC_INBRACK])
4054 return 1;
4055 break;
4057 case Inang:
4058 if (!zpc_disables[ZPC_INANG])
4059 return 1;
4060 break;
4062 case Quest:
4063 if (!zpc_disables[ZPC_QUEST])
4064 return 1;
4065 break;
4067 case Pound:
4068 if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH])
4069 return 1;
4070 break;
4072 case Hat:
4073 if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HAT])
4074 return 1;
4075 break;
4078 return 0;