2 * pattern.c - pattern matching
4 * This file is part of zsh, the Z shell.
6 * Copyright (c) 1999 Peter Stephenson
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
15 * In no event shall Peter Stephenson or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Peter Stephenson and the Zsh Development Group have been advised of
19 * the possibility of such damage.
21 * Peter Stephenson and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Peter Stephenson and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
28 * Pattern matching code derived from the regexp library by Henry
29 * Spencer, which has the following copyright.
31 * Copyright (c) 1986 by University of Toronto.
32 * Written by Henry Spencer. Not derived from licensed software.
34 * Permission is granted to anyone to use this software for any
35 * purpose on any computer system, and to redistribute it freely,
36 * subject to the following restrictions:
38 * 1. The author is not responsible for the consequences of use of
39 * this software, no matter how awful, even if they arise
42 * 2. The origin of this software must not be misrepresented, either
43 * by explicit claim or by omission.
45 * 3. Altered versions must be plainly marked as such, and must not
46 * be misrepresented as being the original software.
48 * Eagle-eyed readers will notice this is an altered version. Incredibly
49 * sharp-eyed readers might even find bits that weren't altered.
52 * And I experienced a sense that, like certain regular
53 * expressions, seemed to match the day from beginning to end, so
54 * that I did not need to identify the parenthesised subexpression
55 * that told of dawn, nor the group of characters that indicated
56 * the moment when my grandfather returned home with news of
57 * Swann's departure for Paris; and the whole length of the month
58 * of May, as if matched by a closure, fitted into the buffer of my
59 * life with no sign of overflowing, turning the days, like a
60 * procession of insects that could consist of this or that
61 * species, into a random and unstructured repetition of different
62 * sequences, anchored from the first day of the month to the last
63 * in the same fashion as the weeks when I knew I would not see
64 * Gilberte and would search in vain for any occurrences of the
65 * string in the avenue of hawthorns by Tansonville, without my
66 * having to delimit explicitly the start or finish of the pattern.
68 * M. Proust, "In Search of Lost Files",
69 * bk I, "The Walk by Bourne's Place".
75 * The following union is used mostly for alignment purposes.
76 * Normal nodes are longs, while certain nodes take a char * as an argument;
77 * here we make sure that they both work out to the same length.
78 * The compiled regexp we construct consists of upats stuck together;
79 * anything else to be added (strings, numbers) is stuck after and
80 * then aligned to a whole number of upat units.
82 * Note also that offsets are in terms of the sizes of these things.
89 typedef union upat
*Upat
;
91 #include "pattern.pro"
93 /* Number of active parenthesized expressions allowed in backreferencing */
96 /* definition number opnd? meaning */
97 #define P_END 0x00 /* no End of program. */
98 #define P_EXCSYNC 0x01 /* no Test if following exclude already failed */
99 #define P_EXCEND 0x02 /* no Test if exclude matched orig branch */
100 #define P_BACK 0x03 /* no Match "", "next" ptr points backward. */
101 #define P_EXACTLY 0x04 /* lstr Match this string. */
102 #define P_NOTHING 0x05 /* no Match empty string. */
103 #define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */
104 #define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */
105 #define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */
106 #define P_ISSTART 0x09 /* no Match start of string. */
107 #define P_ISEND 0x0a /* no Match end of string. */
108 #define P_COUNTSTART 0x0b /* no Initialise P_COUNT */
109 #define P_COUNT 0x0c /* 3*long uc* node Match a number of repetitions */
110 /* numbered so we can test bit 5 for a branch */
111 #define P_BRANCH 0x20 /* node Match this alternative, or the next... */
112 #define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */
113 /* excludes are also branches, but have bit 4 set, too */
114 #define P_EXCLUDE 0x30 /* uc* node Exclude this from previous branch */
115 #define P_EXCLUDP 0x31 /* uc* node Exclude, using full file path so far */
116 /* numbered so we can test bit 6 so as not to match initial '.' */
117 #define P_ANY 0x40 /* no Match any one character. */
118 #define P_ANYOF 0x41 /* str Match any character in this string. */
119 #define P_ANYBUT 0x42 /* str Match any character not in this string. */
120 #define P_STAR 0x43 /* no Match any set of characters. */
121 #define P_NUMRNG 0x44 /* zr, zr Match a numeric range. */
122 #define P_NUMFROM 0x45 /* zr Match a number >= X */
123 #define P_NUMTO 0x46 /* zr Match a number <= X */
124 #define P_NUMANY 0x47 /* no Match any set of decimal digits */
125 /* spaces left for P_OPEN+n,... for backreferences */
126 #define P_OPEN 0x80 /* no Mark this point in input as start of n. */
127 #define P_CLOSE 0x90 /* no Analogous to OPEN. */
130 * zr the range type zrange_t: may be zlong or unsigned long
132 * uc* a pointer to unsigned char, used at run time and initialised
134 * str null-terminated, metafied string
135 * lstr length as long then string, not null-terminated, unmetafied.
140 * P_WBRANCH: This works like a branch and is used in complex closures,
141 * to ensure we don't succeed on a zero-length match of the pattern,
142 * since that would cause an infinite loop. We do this by recording
143 * the positions where we have already tried to match. See the
144 * P_WBRANCH test in patmatch().
146 * P_ANY, P_ANYOF: the operand is a null terminated
147 * string. Normal characters match as expected. Characters
148 * in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
149 * Posix range tests. This relies on imeta returning true for these
150 * characters. We treat unknown POSIX ranges as never matching.
151 * PP_RANGE means the next two (possibly metafied) characters form
152 * the limits of a range to test; it's too much like hard work to
155 * P_EXCLUDE, P_EXCSYNC, PEXCEND: P_EXCLUDE appears in the pattern like
156 * P_BRANCH, but applies to the immediately preceding branch. The code in
157 * the corresponding branch is followed by a P_EXCSYNC, which simply
158 * acts as a marker that a P_EXCLUDE comes next. The P_EXCLUDE
159 * has a pointer to char embeded in it, which works
160 * like P_WBRANCH: if we get to the P_EXCSYNC, and we already matched
161 * up to the same position, fail. Thus we are forced to backtrack
162 * on closures in the P_BRANCH if the first attempt was excluded.
163 * Corresponding to P_EXCSYNC in the original branch, there is a
164 * P_EXCEND in the exclusion. If we get to this point, and we did
165 * *not* match in the original branch, the exclusion itself fails,
166 * otherwise it succeeds since we know the tail already matches,
167 * so P_EXCEND is the end of the exclusion test.
168 * The whole sorry mess looks like this, where the upper lines
169 * show the linkage of the branches, and the lower shows the linkage
170 * of their pattern arguments.
172 * --------------------- ----------------------
174 * ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
177 * --------------------------------------
179 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
180 * that we prepend the path so far to the exclude pattern. This is
181 * for top level file globs, e.g. ** / *.c~*foo.c
182 * ^ I had to leave this space
183 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
185 * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified
186 * closure (#cN,M) and is used to initialise the count. Executing
187 * the pattern leads back to the P_COUNT, while the next links of the
188 * P_COUNTSTART and P_COUNT lead to the tail of the pattern:
192 * <COUNTSTART><COUNT>pattern<BACK> tail
194 * ------------------------
197 #define P_OP(p) ((p)->l & 0xff)
198 #define P_NEXT(p) ((p)->l >> 8)
199 #define P_OPERAND(p) ((p) + 1)
200 #define P_ISBRANCH(p) ((p)->l & 0x20)
201 #define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30)
202 #define P_NOTDOT(p) ((p)->l & 0x40)
204 /* Specific to lstr type, i.e. P_EXACTLY. */
205 #define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */
206 #define P_LS_STR(p) ((char *)((p) + 2))
208 /* Specific to P_COUNT: arguments as offset in nodes from operator */
209 #define P_CT_CURRENT (1) /* Current count */
210 #define P_CT_MIN (2) /* Minimum count */
211 #define P_CT_MAX (3) /* Maximum count, -1 for none */
212 #define P_CT_PTR (4) /* Pointer to last match start */
213 #define P_CT_OPERAND (5) /* Operand of P_COUNT */
215 /* Flags needed when pattern is executed */
216 #define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */
217 #define P_HSTART 0x02 /* Starts with # or ##'d pattern. */
218 #define P_PURESTR 0x04 /* Can be matched with a strcmp */
220 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
221 typedef zlong zrange_t
;
222 #define ZRANGE_T_IS_SIGNED (1)
224 typedef unsigned long zrange_t
;
228 * Array of characters corresponding to zpc_chars enum, which it must match.
230 static const char zpc_chars
[ZPC_COUNT
] = {
231 '/', '\0', Bar
, Outpar
, Tilde
, Inpar
, Quest
, Star
, Inbrack
, Inang
,
232 Hat
, Pound
, Bnullkeep
, Quest
, Star
, '+', '!', '@'
236 * Corresponding strings used in enable/disable -p.
237 * NULL means no way of turning this on or off.
240 mod_export
const char *zpc_strings
[ZPC_COUNT
] = {
241 NULL
, NULL
, "|", NULL
, "~", "(", "?", "*", "[", "<",
242 "^", "#", NULL
, "?(", "*(", "+(", "!(", "@("
246 * Corresponding array of pattern disables as set by the user
247 * using "disable -p".
250 mod_export
char zpc_disables
[ZPC_COUNT
];
253 * Stack of saved (compressed) zpc_disables for function scope.
256 static struct zpc_disables_save
*zpc_disables_stack
;
259 * Characters which terminate a simple string (ZPC_COUNT) or
260 * an entire pattern segment (the first ZPC_SEG_COUNT).
261 * Each entry is either the corresponding character in zpc_chars
262 * or Marker which is guaranteed not to match a character in a
263 * pattern we are compiling.
265 * The complete list indicates characters that are special, so e.g.
266 * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
267 * *and* Tilde is currently special.
271 char zpc_special
[ZPC_COUNT
];
273 /* Default size for pattern buffer */
274 #define P_DEF_ALLOC 256
276 /* Flags used in compilation */
277 static char *patstart
, *patparse
; /* input pointers */
278 static int patnpar
; /* () count */
279 static char *patcode
; /* point of code emission */
280 static long patsize
; /* size of code */
281 static char *patout
; /* start of code emission string */
282 static long patalloc
; /* size allocated for same */
284 /* Flags used in both compilation and execution */
285 static int patflags
; /* flags passed down to patcompile */
286 static int patglobflags
; /* globbing flags & approx */
289 * Increment pointer to metafied multibyte string.
291 #ifdef MULTIBYTE_SUPPORT
292 typedef wint_t patint_t
;
296 #define METACHARINC(x) ((void)metacharinc(&x))
299 * TODO: the shiftstate isn't well handled; we don't guarantee
300 * to maintain it properly between characters. If we don't
301 * need it we should use mbtowc() instead.
303 static mbstate_t shiftstate
;
306 * Multibyte version: it's (almost) as easy to return the
307 * value as not, so do so since we sometimes need it..
310 metacharinc(char **x
)
314 size_t ret
= MB_INVALID
;
318 * Cheat if the top bit isn't set. This is second-guessing
319 * the library, but we know for sure that if the character
320 * set doesn't have the property that all bytes with the 8th
321 * bit clear are single characters then we are stuffed.
323 if (!(patglobflags
& GF_MULTIBYTE
) || !(STOUC(*inptr
) & 0x80))
326 inchar
= ztokens
[*inptr
++ - Pound
];
327 else if (*inptr
== Meta
) {
329 inchar
= *inptr
++ ^ 32;
334 return (wchar_t)STOUC(inchar
);
339 inchar
= ztokens
[*inptr
++ - Pound
];
340 else if (*inptr
== Meta
) {
342 inchar
= *inptr
++ ^ 32;
346 ret
= mbrtowc(&wc
, &inchar
, 1, &shiftstate
);
348 if (ret
== MB_INVALID
)
350 if (ret
== MB_INCOMPLETE
)
356 /* Error. Treat as single byte. */
357 /* Reset the shift state for next time. */
358 memset(&shiftstate
, 0, sizeof(shiftstate
));
359 return (wchar_t) STOUC(*(*x
)++);
363 typedef int patint_t
;
367 #define METACHARINC(x) ((void)((x) += (*(x) == Meta) ? 2 : 1))
371 * Return unmetafied char from string (x is any char *).
372 * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
375 #define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x))
377 /* Add n more characters, ensuring there is enough space. */
386 patadd(char *add
, int ch
, long n
, int paflags
)
388 /* Make sure everything gets aligned unless we get PA_NOALIGN. */
389 long newpatsize
= patsize
+ n
;
390 if (!(paflags
& PA_NOALIGN
))
391 newpatsize
= (newpatsize
+ sizeof(union upat
) - 1) &
392 ~(sizeof(union upat
) - 1);
393 if (patalloc
< newpatsize
) {
395 2*(newpatsize
> patalloc
? newpatsize
: patalloc
);
396 patout
= (char *)zrealloc((char *)patout
, newpatalloc
);
397 patcode
= patout
+ patsize
;
398 patalloc
= newpatalloc
;
400 patsize
= newpatsize
;
402 if (paflags
& PA_UNMETA
) {
404 * Unmetafy and untokenize the string as we go.
405 * The Meta characters in add aren't counted in n.
409 *patcode
++ = ztokens
[*add
++ - Pound
];
410 else if (*add
== Meta
) {
412 *patcode
++ = *add
++ ^ 32;
423 patcode
= patout
+ patsize
;
427 /* operates on pointers to union upat, returns a pointer */
428 #define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
429 (P_OP(p) == P_BACK) ? \
430 ((p)-rn_offs) : ((p)+rn_offs) : NULL)
433 * Set up zpc_special with characters that end a string segment.
434 * "Marker" cannot occur in the pattern we are compiling so
435 * is used to mark "invalid".
438 patcompcharsset(void)
443 /* Initialise enabled special characters */
444 memcpy(zpc_special
, zpc_chars
, ZPC_COUNT
);
445 /* Apply user disables from disable -p */
446 for (i
= 0, spp
= zpc_special
, disp
= zpc_disables
;
448 i
++, spp
++, disp
++) {
453 if (!isset(EXTENDEDGLOB
)) {
454 /* Extended glob characters are not active */
455 zpc_special
[ZPC_TILDE
] = zpc_special
[ZPC_HAT
] =
456 zpc_special
[ZPC_HASH
] = Marker
;
458 if (!isset(KSHGLOB
)) {
460 * Ksh glob characters are not active.
461 * * and ? are shared with normal globbing, but for their
462 * use here we are looking for a following Inpar.
464 zpc_special
[ZPC_KSH_QUEST
] = zpc_special
[ZPC_KSH_STAR
] =
465 zpc_special
[ZPC_KSH_PLUS
] = zpc_special
[ZPC_KSH_BANG
] =
466 zpc_special
[ZPC_KSH_AT
] = Marker
;
469 * Note that if we are using KSHGLOB, then we test for a following
470 * Inpar, not zpc_special[ZPC_INPAR]: the latter makes an Inpar on
471 * its own active. The zpc_special[ZPC_KSH_*] followed by any old Inpar
472 * discriminate ksh globbing.
476 * Grouping and numeric ranges are not valid.
477 * We do allow alternation, however; it's needed for
478 * "case". This may not be entirely consistent.
480 * Don't disable Outpar: we may need to match the end of KSHGLOB
481 * parentheses and it would be difficult to tell them apart.
483 zpc_special
[ZPC_INPAR
] = zpc_special
[ZPC_INANG
] = Marker
;
487 /* Called before parsing a set of file matchs to initialize flags */
497 patglobflags
= GF_IGNCASE
;
498 if (isset(MULTIBYTE
))
499 patglobflags
|= GF_MULTIBYTE
;
503 * Top level pattern compilation subroutine
504 * exp is a null-terminated, metafied string.
505 * inflags is an or of some PAT_* flags.
506 * endexp, if non-null, is set to a pointer to the end of the
507 * part of exp which was compiled. This is used when
508 * compiling patterns for directories which must be
509 * matched recursively.
514 patcompile(char *exp
, int inflags
, char **endexp
)
520 char *lng
, *strp
= NULL
;
523 startoff
= sizeof(struct patprog
);
524 /* Ensure alignment of start of program string */
525 startoff
= (startoff
+ sizeof(union upat
) - 1) & ~(sizeof(union upat
) - 1);
527 /* Allocate reasonable sized chunk if none, reduce size if too big */
528 if (patalloc
!= P_DEF_ALLOC
)
529 patout
= (char *)zrealloc(patout
, patalloc
= P_DEF_ALLOC
);
530 patcode
= patout
+ startoff
;
531 patsize
= patcode
- patout
;
532 patstart
= patparse
= exp
;
534 * Note global patnpar numbers parentheses 1..9, while patnpar
535 * in struct is actual count of parentheses.
538 patflags
= inflags
& ~(PAT_PURES
|PAT_HAS_EXCLUDP
);
540 if (!(patflags
& PAT_FILE
)) {
542 zpc_special
[ZPC_SLASH
] = Marker
;
543 remnulargs(patparse
);
544 if (isset(MULTIBYTE
))
545 patglobflags
= GF_MULTIBYTE
;
549 if (patflags
& PAT_LCMATCHUC
)
550 patglobflags
|= GF_LCMATCHUC
;
552 * Have to be set now, since they get updated during compilation.
554 ((Patprog
)patout
)->globflags
= patglobflags
;
556 if (!(patflags
& PAT_ANY
)) {
557 /* Look for a really pure string, with no tokens at all. */
558 if (!(patglobflags
& ~GF_MULTIBYTE
)
561 * If the OS treats files case-insensitively and we
562 * are looking at files, we don't need to use pattern
563 * matching to find the file.
565 || (!(patglobflags
& ~GF_IGNCASE
) && (patflags
& PAT_FILE
))
570 * Waah! I wish I understood this.
571 * Empty metafied strings have an initial Nularg.
572 * This never corresponds to a real character in
573 * a glob pattern or string, so skip it.
577 for (strp
= exp
; *strp
&&
578 (!(patflags
& PAT_FILE
) || *strp
!= '/') && !itok(*strp
);
582 if (!strp
|| (*strp
&& *strp
!= '/')) {
583 /* No, do normal compilation. */
585 if (patcompswitch(0, &flags
) == 0)
589 * Yes, copy the string, and skip compilation altogether.
590 * Null terminate for the benefit of globbing.
591 * Leave metafied both for globbing and for our own
596 patadd(exp
, 0, len
+ 1, 0);
597 patout
[startoff
+ len
] = '\0';
598 patflags
|= PAT_PURES
;
602 /* end of compilation: safe to use pointers */
604 p
->startoff
= startoff
;
605 p
->patstartch
= '\0';
606 p
->globend
= patglobflags
;
611 p
->patnpar
= patnpar
-1;
614 pscan
= (Upat
)(patout
+ startoff
);
616 if (!(patflags
& PAT_ANY
) && P_OP(PATNEXT(pscan
)) == P_END
) {
617 /* only one top level choice */
618 pscan
= P_OPERAND(pscan
);
620 if (flags
& P_PURESTR
) {
622 * The pattern can be matched with a simple strncmp/strcmp.
623 * Careful in case we've overwritten the node for the next ptr.
625 char *dst
= patout
+ startoff
;
627 p
->flags
|= PAT_PURES
;
628 for (; pscan
; pscan
= next
) {
629 next
= PATNEXT(pscan
);
630 if (P_OP(pscan
) == P_EXACTLY
) {
631 char *opnd
= P_LS_STR(pscan
), *mtest
;
632 long oplen
= P_LS_LEN(pscan
), ilen
;
635 * Unfortunately we unmetafied the string
636 * and we need to put any metacharacters
637 * back now we know it's a pure string.
638 * This shouldn't happen too often, it's
639 * just that there are some cases such
640 * as . and .. in files where we really
641 * need a pure string even if there are
642 * pattern characters flying around.
644 for (mtest
= opnd
, ilen
= oplen
; ilen
;
649 char *oldpatout
= patout
;
650 patadd(NULL
, 0, nmeta
, 0);
655 opnd
= patout
+ (opnd
- oldpatout
);
656 dst
= patout
+ startoff
;
662 *dst
++ = *opnd
++ ^ 32;
669 p
->size
= dst
- patout
;
670 /* patmlen is really strlen. We don't need a null. */
671 p
->patmlen
= p
->size
- startoff
;
673 /* starting point info */
674 if (P_OP(pscan
) == P_EXACTLY
&& !p
->globflags
&&
676 p
->patstartch
= *P_LS_STR(pscan
);
678 * Find the longest literal string in something expensive.
679 * This is itself not all that cheap if we have
680 * case-insensitive matching or approximation, so don't.
682 if ((flags
& P_HSTART
) && !p
->globflags
) {
685 for (; pscan
; pscan
= PATNEXT(pscan
))
686 if (P_OP(pscan
) == P_EXACTLY
&&
687 P_LS_LEN(pscan
) >= len
) {
688 lng
= P_LS_STR(pscan
);
689 len
= P_LS_LEN(pscan
);
692 p
->mustoff
= lng
- patout
;
701 * The pattern was compiled in a fixed buffer: unless told otherwise,
702 * we stick the compiled pattern on the heap. This is necessary
703 * for files where we will often be compiling multiple segments at once.
704 * But if we get the ZDUP flag we always put it in zalloc()ed memory.
706 if (patflags
& PAT_ZDUP
) {
707 Patprog newp
= (Patprog
)zalloc(patsize
);
708 memcpy((char *)newp
, (char *)p
, patsize
);
710 } else if (!(patflags
& PAT_STATIC
)) {
711 Patprog newp
= (Patprog
)zhalloc(patsize
);
712 memcpy((char *)newp
, (char *)p
, patsize
);
722 * Main body or parenthesized subexpression in pattern
723 * Parenthesis (and any ksh_glob gubbins) will have been removed.
728 patcompswitch(int paren
, int *flagp
)
730 long starter
, br
, ender
, excsync
= 0;
732 int flags
, gfchanged
= 0;
733 long savglobflags
= (long)patglobflags
;
738 if (paren
&& (patglobflags
& GF_BACKREF
) && patnpar
<= NSUBEXP
) {
740 * parenthesized: make an open node.
741 * We can only refer to the first nine parentheses.
742 * For any others, we just use P_OPEN on its own; there's
743 * no gain in arbitrarily limiting the number of parentheses.
746 starter
= patnode(P_OPEN
+ parno
);
750 br
= patnode(P_BRANCH
);
751 if (!patcompbranch(&flags
, paren
))
753 if (patglobflags
!= (int)savglobflags
)
756 pattail(starter
, br
);
760 *flagp
|= flags
& (P_HSTART
|P_PURESTR
);
762 while (*patparse
== zpc_chars
[ZPC_BAR
] ||
763 (*patparse
== zpc_special
[ZPC_TILDE
] &&
764 (patparse
[1] == '/' ||
765 !memchr(zpc_special
, patparse
[1], ZPC_SEG_COUNT
)))) {
766 int tilde
= *patparse
++ == zpc_special
[ZPC_TILDE
];
767 long gfnode
= 0, newbr
;
769 *flagp
&= ~P_PURESTR
;
773 /* excsync remembers the P_EXCSYNC node before a chain of
774 * exclusions: all point back to this. only the
775 * original (non-excluded) branch gets a trailing P_EXCSYNC.
778 excsync
= patnode(P_EXCSYNC
);
779 patoptail(br
, excsync
);
782 * By default, approximations are turned off in exclusions:
783 * we need to do this here as otherwise the code compiling
784 * the exclusion doesn't know if the flags have really
785 * changed if the error count gets restored.
787 patglobflags
&= ~0xff;
788 if (!(patflags
& PAT_FILET
) || paren
) {
789 br
= patnode(P_EXCLUDE
);
792 * At top level (paren == 0) in a file glob !(patflags
793 * &PAT_FILET) do the exclusion prepending the file path
794 * so far. We need to flag this to avoid unnecessarily
797 br
= patnode(P_EXCLUDP
);
798 patflags
|= PAT_HAS_EXCLUDP
;
801 patadd((char *)&up
, 0, sizeof(up
), 0);
802 /* / is not treated as special if we are at top level */
803 if (!paren
&& zpc_special
[ZPC_SLASH
] == '/') {
805 zpc_special
[ZPC_SLASH
] = Marker
;
809 br
= patnode(P_BRANCH
);
811 * The position of the following statements means globflags
812 * set in the main branch carry over to the exclusion.
816 if (((Patprog
)patout
)->globflags
) {
818 * If at top level, we need to reinitialize flags to zero,
819 * since (#i)foo|bar only applies to foo and we stuck
820 * the #i into the global flags.
821 * We could have done it so that they only got set in the
822 * first branch, but it's quite convenient having any
823 * global flags set in the header and not buried in the
824 * pattern. (Or maybe it isn't and we should
825 * forget this bit and always stick in an explicit GFLAGS
826 * statement instead of using the header.)
827 * Also, this can't happen for file globs where there are
830 * No gfchanged, as nothing to follow branch at top
834 gfnode
= patnode(P_GFLAGS
);
836 patadd((char *)&up
, 0, sizeof(union upat
), 0);
839 patglobflags
= (int)savglobflags
;
842 newbr
= patcompbranch(&flags
, paren
);
844 /* restore special treatment of / */
845 zpc_special
[ZPC_SLASH
] = '/';
850 pattail(gfnode
, newbr
);
851 if (!tilde
&& patglobflags
!= (int)savglobflags
)
853 pattail(starter
, br
);
855 patoptail(br
, patnode(P_EXCEND
));
856 *flagp
|= flags
& P_HSTART
;
860 * Make a closing node, hooking it to the end.
861 * Note that we can't optimize P_NOTHING out here, since another
862 * branch at that point would indicate the current choices continue,
865 ender
= patnode(paren
? parno
? P_CLOSE
+parno
: P_NOTHING
: P_END
);
866 pattail(starter
, ender
);
869 * Hook the tails of the branches to the closing node,
870 * except for exclusions which terminate where they are.
872 for (ptr
= (Upat
)patout
+ starter
; ptr
; ptr
= PATNEXT(ptr
))
873 if (!P_ISEXCLUDE(ptr
))
874 patoptail(ptr
-(Upat
)patout
, ender
);
876 /* check for proper termination */
877 if ((paren
&& *patparse
++ != Outpar
) ||
878 (!paren
&& *patparse
&&
879 !((patflags
& PAT_FILE
) && *patparse
== '/')))
882 if (paren
&& gfchanged
) {
884 * Restore old values of flags when leaving parentheses.
885 * gfchanged detects a change in any branch (except exclusions
886 * which are separate), since we need to emit this even if
887 * a later branch happened to put the flags back.
889 pattail(ender
, patnode(P_GFLAGS
));
890 patglobflags
= (int)savglobflags
;
891 patadd((char *)&savglobflags
, 0, sizeof(long), 0);
898 * Compile something ended by Bar, Outpar, Tilde, or end of string.
899 * Note the BRANCH or EXCLUDE tag must already have been omitted:
900 * this returns the position of the operand of that.
905 patcompbranch(int *flagp
, int paren
)
907 long chain
, latest
= 0, starter
;
913 while (!memchr(zpc_special
, *patparse
, ZPC_SEG_COUNT
) ||
914 (*patparse
== zpc_special
[ZPC_TILDE
] && patparse
[1] != '/' &&
915 memchr(zpc_special
, patparse
[1], ZPC_SEG_COUNT
))) {
916 if ((*patparse
== zpc_special
[ZPC_INPAR
] &&
917 patparse
[1] == zpc_special
[ZPC_HASH
]) ||
918 (*patparse
== zpc_special
[ZPC_KSH_AT
] && patparse
[1] == Inpar
&&
919 patparse
[2] == zpc_special
[ZPC_HASH
])) {
920 /* Globbing flags. */
921 char *pp1
= patparse
;
922 int oldglobflags
= patglobflags
, ignore
;
924 patparse
+= (*patparse
== '@') ? 3 : 2;
925 if (!patgetglobflags(&patparse
, &assert, &ignore
))
930 * Start/end assertion looking like flags, but
931 * actually handled as a normal node
933 latest
= patnode(assert);
936 if (pp1
== patstart
) {
937 /* Right at start of pattern, the simplest case.
938 * Put them into the flags and don't emit anything.
940 ((Patprog
)patout
)->globflags
= patglobflags
;
942 } else if (!*patparse
) {
943 /* Right at the end, so just leave the flags for
944 * the next Patprog in the chain to pick up.
949 * Otherwise, we have to stick them in as a pattern
952 if (oldglobflags
!= patglobflags
) {
955 latest
= patnode(P_GFLAGS
);
957 patadd((char *)&up
, 0, sizeof(union upat
), 0);
963 } else if (!*patparse
)
967 } else if (*patparse
== zpc_special
[ZPC_HAT
]) {
969 * ^pat: anything but pat. For proper backtracking,
970 * etc., we turn this into (*~pat), except without the
974 latest
= patcompnot(0, &flags
);
976 latest
= patcomppiece(&flags
, paren
);
981 if (!(flags
& P_PURESTR
))
982 *flagp
&= ~P_PURESTR
;
984 *flagp
|= flags
& P_HSTART
;
986 pattail(chain
, latest
);
989 /* check if there was nothing in the loop, i.e. () */
991 starter
= patnode(P_NOTHING
);
996 /* get glob flags, return 1 for success, 0 for failure */
1000 patgetglobflags(char **strp
, long *assertp
, int *ignore
)
1002 char *nptr
, *ptr
= *strp
;
1007 /* (#X): assumes we are still positioned on the first X */
1008 for (; *ptr
&& *ptr
!= Outpar
; ptr
++) {
1010 /* Glob qualifiers, ignored in pattern code */
1011 while (*ptr
&& *ptr
!= Outpar
)
1018 /* Approximate matching, max no. of errors follows */
1019 ret
= zstrtol(++ptr
, &nptr
, 10);
1021 * We can't have more than 254, because we need 255 to
1022 * mark 254 errors in wbranch and exclude sync strings
1023 * (hypothetically --- hope no-one tries it).
1025 if (ret
< 0 || ret
> 254 || ptr
== nptr
)
1027 patglobflags
= (patglobflags
& ~0xff) | (ret
& 0xff);
1032 /* Lowercase in pattern matches lower or upper in target */
1033 patglobflags
= (patglobflags
& ~GF_IGNCASE
) | GF_LCMATCHUC
;
1037 /* Fully case insensitive */
1038 patglobflags
= (patglobflags
& ~GF_LCMATCHUC
) | GF_IGNCASE
;
1042 /* Restore case sensitivity */
1043 patglobflags
&= ~(GF_LCMATCHUC
|GF_IGNCASE
);
1047 /* Make backreferences */
1048 patglobflags
|= GF_BACKREF
;
1052 /* Don't make backreferences */
1053 patglobflags
&= ~GF_BACKREF
;
1057 /* Make references to complete match */
1058 patglobflags
|= GF_MATCHREF
;
1063 patglobflags
&= ~GF_MATCHREF
;
1067 *assertp
= P_ISSTART
;
1075 patglobflags
|= GF_MULTIBYTE
;
1079 patglobflags
&= ~GF_MULTIBYTE
;
1089 /* Start/end assertions must appear on their own. */
1090 if (*assertp
&& (*strp
)[1] != Outpar
)
1097 static const char *colon_stuffs
[] = {
1098 "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph",
1099 "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
1100 "IFS", "IFSSPACE", "WORD", NULL
1104 * Handle the guts of a [:stuff:] character class element.
1105 * start is the beginning of "stuff" and len is its length.
1106 * This code is exported for the benefit of completion matching.
1111 range_type(char *start
, int len
)
1115 for (csp
= colon_stuffs
; *csp
; csp
++) {
1116 if (strlen(*csp
) == len
&& !strncmp(start
, *csp
, len
))
1117 return (csp
- colon_stuffs
) + PP_FIRST
;
1125 * Convert the contents of a [...] or [^...] expression (just the
1126 * ... part) back into a string. This is used by compfiles -p/-P
1127 * for some reason. The compiled form (a metafied string) is
1128 * passed in rangestr.
1130 * If outstr is non-NULL the compiled form is placed there. It
1131 * must be sufficiently long. A terminating NULL is appended.
1133 * Return the length required, not including the terminating NULL.
1135 * TODO: this is non-multibyte for now. It will need to be defined
1136 * appropriately with MULTIBYTE_SUPPORT when the completion matching
1142 pattern_range_to_string(char *rangestr
, char *outstr
)
1147 if (imeta(STOUC(*rangestr
))) {
1148 int swtype
= STOUC(*rangestr
) - STOUC(Meta
);
1151 /* Ordindary metafied character */
1155 *outstr
++ = rangestr
[1] ^ 32;
1159 } else if (swtype
== PP_RANGE
) {
1163 for (i
= 0; i
< 2; i
++) {
1164 if (*rangestr
== Meta
) {
1167 *outstr
++ = rangestr
[1];
1173 *outstr
++ = *rangestr
;
1184 } else if (swtype
>= PP_FIRST
&& swtype
<= PP_LAST
) {
1185 /* [:stuff:]; we need to output [: and :] */
1186 const char *found
= colon_stuffs
[swtype
- PP_FIRST
];
1187 int newlen
= strlen(found
);
1189 strcpy(outstr
, "[:");
1191 memcpy(outstr
, found
, newlen
);
1193 strcpy(outstr
, ":]");
1199 /* shouldn't happen */
1200 DPUTS(1, "BUG: unknown PP_ code in pattern range");
1204 /* ordinary character, guaranteed no Meta handling needed */
1206 *outstr
++ = *rangestr
;
1218 * compile a chunk such as a literal string or a [...] followed
1219 * by a possible hash operator
1224 patcomppiece(int *flagp
, int paren
)
1226 long starter
= 0, next
, op
, opnd
;
1227 int flags
, flags2
, kshchar
, len
, ch
, patch
, nmeta
;
1230 char *nptr
, *str0
, *ptr
, *patprev
;
1235 str0
= patprev
= patparse
;
1238 * Check if we have a string. First, we need to make sure
1239 * the string doesn't introduce a ksh-like parenthesized expression.
1242 if (*patparse
&& patparse
[1] == Inpar
) {
1243 if (*patparse
== zpc_special
[ZPC_KSH_PLUS
])
1244 kshchar
= STOUC('+');
1245 else if (*patparse
== zpc_special
[ZPC_KSH_BANG
])
1246 kshchar
= STOUC('!');
1247 else if (*patparse
== zpc_special
[ZPC_KSH_AT
])
1248 kshchar
= STOUC('@');
1249 else if (*patparse
== zpc_special
[ZPC_KSH_STAR
])
1250 kshchar
= STOUC('*');
1251 else if (*patparse
== zpc_special
[ZPC_KSH_QUEST
])
1252 kshchar
= STOUC('?');
1256 * If '(' is disabled as a pattern char, allow ')' as
1257 * an ordinary string character if there are no parentheses to
1258 * close. Don't allow it otherwise, it changes the syntax.
1260 if (zpc_special
[ZPC_INPAR
] != Marker
|| *patparse
!= Outpar
||
1263 * End of string (or no string at all) if ksh-type parentheses,
1264 * or special character, unless that character is a tilde and
1265 * the character following is an end-of-segment character. Thus
1266 * tildes are not special if there is nothing following to
1269 * Don't look for X()-style kshglobs at this point; we've
1270 * checked above for the case with parentheses and we don't
1271 * want to match without parentheses.
1274 (memchr(zpc_special
, *patparse
, ZPC_NO_KSH_GLOB
) &&
1275 (*patparse
!= zpc_special
[ZPC_TILDE
] ||
1276 patparse
[1] == '/' ||
1277 !memchr(zpc_special
, patparse
[1], ZPC_SEG_COUNT
)))) {
1282 /* Remember the previous character for backtracking */
1284 METACHARINC(patparse
);
1287 if (patparse
> str0
) {
1288 long slen
= patparse
- str0
;
1291 /* Ordinary string: cancel kshchar lookahead */
1294 * Assume it matches a simple string until we find otherwise.
1297 DPUTS(patparse
== str0
, "BUG: matched nothing in patcomppiece.");
1298 /* more than one character matched? */
1299 morelen
= (patprev
> str0
);
1301 * If we have more than one character, a following hash
1302 * or (#c...) only applies to the last, so backtrack one character.
1304 if ((*patparse
== zpc_special
[ZPC_HASH
] ||
1305 (*patparse
== zpc_special
[ZPC_INPAR
] &&
1306 patparse
[1] == zpc_special
[ZPC_HASH
] &&
1307 patparse
[2] == 'c') ||
1308 (*patparse
== zpc_special
[ZPC_KSH_AT
] &&
1309 patparse
[1] == Inpar
&&
1310 patparse
[2] == zpc_special
[ZPC_HASH
] &&
1311 patparse
[3] == 'c')) && morelen
)
1314 * If len is 1, we can't have an active # following, so doesn't
1315 * matter that we don't make X in `XX#' simple.
1319 starter
= patnode(P_EXACTLY
);
1321 /* Get length of string without metafication. */
1323 /* inherited from domatch, but why, exactly? */
1324 if (*str0
== Nularg
)
1326 for (ptr
= str0
; ptr
< patparse
; ptr
++) {
1332 slen
= (patparse
- str0
) - nmeta
;
1333 /* First add length, which is a long */
1334 patadd((char *)&slen
, 0, sizeof(long), 0);
1336 * Then the string, not null terminated.
1337 * Unmetafy and untokenize; pass the final length,
1338 * which is what we need to allocate, i.e. not including
1339 * a count for each Meta in the string.
1341 patadd(str0
, 0, slen
, PA_UNMETA
);
1342 nptr
= P_LS_STR((Upat
)patout
+ starter
);
1344 * It's much simpler to turn off pure string mode for
1345 * any case-insensitive or approximate matching; usually,
1346 * that is correct, or they wouldn't have been turned on.
1347 * However, we need to make sure we match a "." or ".."
1348 * in a file name as a pure string. There's a minor bug
1349 * that this will also apply to something like
1350 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
1351 * going to write funny patterns, you get no sympathy from me.
1356 * As above: don't use pattern matching for files
1357 * just because of case insensitivity if file system
1358 * is known to be case insensitive.
1360 * This is known to be necessary in at least one case:
1361 * if "mount -c /" is in effect, so that drives appear
1362 * directly under / instead of the usual /cygdrive, they
1363 * aren't shown by readdir(). So it's vital we don't use
1364 * globbing to find "/c", since that'll fail.
1366 ((patflags
& PAT_FILE
) ?
1367 (0xFF|GF_LCMATCHUC
) :
1368 (0xFF|GF_LCMATCHUC
|GF_IGNCASE
))
1370 (0xFF|GF_LCMATCHUC
|GF_IGNCASE
)
1373 if (!(patflags
& PAT_FILE
))
1374 flags
&= ~P_PURESTR
;
1375 else if (!(nptr
[0] == '.' &&
1376 (slen
== 1 || (nptr
[1] == '.' && slen
== 2))))
1377 flags
&= ~P_PURESTR
;
1384 METACHARINC(patparse
);
1387 DPUTS(zpc_special
[ZPC_QUEST
] == Marker
,
1388 "Treating '?' as pattern character although disabled");
1390 starter
= patnode(P_ANY
);
1393 DPUTS(zpc_special
[ZPC_STAR
] == Marker
,
1394 "Treating '*' as pattern character although disabled");
1395 /* kshchar is used as a sign that we can't have #'s. */
1397 starter
= patnode(P_STAR
);
1400 DPUTS(zpc_special
[ZPC_INBRACK
] == Marker
,
1401 "Treating '[' as pattern character although disabled");
1403 if (*patparse
== Hat
|| *patparse
== '^' || *patparse
== '!') {
1405 starter
= patnode(P_ANYBUT
);
1407 starter
= patnode(P_ANYOF
);
1409 * []...] means match a "]" or other included characters.
1410 * However, to be a bit helpful and for compatibility
1411 * with other shells, don't take in that sense if
1412 * there's no further "]". That's still imperfect,
1413 * but it's all we can do --- we're required to
1414 * treat [$var]*[$var]with empty var as [ ... ]
1417 if (*patparse
== Outbrack
&& strchr(patparse
+1, Outbrack
)) {
1419 patadd(NULL
, ']', 1, PA_NOALIGN
);
1421 while (*patparse
&& *patparse
!= Outbrack
) {
1422 /* Meta is not a token */
1423 if (*patparse
== Inbrack
&& patparse
[1] == ':' &&
1424 (nptr
= strchr(patparse
+2, ':')) &&
1425 nptr
[1] == Outbrack
) {
1428 len
= nptr
- patparse
;
1429 ch
= range_type(patparse
, len
);
1430 patparse
= nptr
+ 2;
1432 patadd(NULL
, STOUC(Meta
) + ch
, 1, PA_NOALIGN
);
1435 charstart
= patparse
;
1436 METACHARINC(patparse
);
1438 if (*patparse
== '-' && patparse
[1] &&
1439 patparse
[1] != Outbrack
) {
1440 patadd(NULL
, STOUC(Meta
)+PP_RANGE
, 1, PA_NOALIGN
);
1441 if (itok(*charstart
)) {
1442 patadd(0, STOUC(ztokens
[*charstart
- Pound
]), 1,
1445 patadd(charstart
, 0, patparse
-charstart
, PA_NOALIGN
);
1447 charstart
= ++patparse
; /* skip ASCII '-' */
1448 METACHARINC(patparse
);
1450 if (itok(*charstart
)) {
1451 patadd(0, STOUC(ztokens
[*charstart
- Pound
]), 1,
1454 patadd(charstart
, 0, patparse
-charstart
, PA_NOALIGN
);
1457 if (*patparse
!= Outbrack
)
1460 /* terminate null string and fix alignment */
1461 patadd(NULL
, 0, 1, 0);
1464 DPUTS(!kshchar
&& zpc_special
[ZPC_INPAR
] == Marker
,
1465 "Treating '(' as pattern character although disabled");
1466 DPUTS(isset(SHGLOB
) && !kshchar
,
1467 "Treating bare '(' as pattern character with SHGLOB");
1468 if (kshchar
== '!') {
1469 /* This is nasty, we should really either handle all
1470 * kshglobbing below or here. But most of the
1471 * others look like non-ksh patterns, while this one
1472 * doesn't, so we handle it here and leave the rest.
1473 * We treat it like an extendedglob ^, except that
1474 * it goes into parentheses.
1476 * If we did do kshglob here, we could support
1477 * the old behaviour that things like !(foo)##
1478 * work, but it makes the code more complicated at
1479 * the expense of allowing the user to do things
1482 if (!(starter
= patcompnot(1, &flags2
)))
1484 } else if (!(starter
= patcompswitch(1, &flags2
)))
1486 flags
|= flags2
& P_HSTART
;
1490 DPUTS(zpc_special
[ZPC_INANG
] == Marker
,
1491 "Treating '<' as pattern character although disabled");
1492 DPUTS(isset(SHGLOB
), "Treating <..> as numeric range with SHGLOB");
1493 len
= 0; /* beginning present 1, end present 2 */
1494 if (idigit(*patparse
)) {
1495 from
= (zrange_t
) zstrtol((char *)patparse
,
1496 (char **)&nptr
, 10);
1500 DPUTS(*patparse
!= '-', "BUG: - missing from numeric glob");
1502 if (idigit(*patparse
)) {
1503 to
= (zrange_t
) zstrtol((char *)patparse
,
1504 (char **)&nptr
, 10);
1508 if (*patparse
!= Outang
)
1513 starter
= patnode(P_NUMRNG
);
1514 patadd((char *)&from
, 0, sizeof(from
), 0);
1515 patadd((char *)&to
, 0, sizeof(to
), 0);
1518 starter
= patnode(P_NUMTO
);
1519 patadd((char *)&to
, 0, sizeof(to
), 0);
1522 starter
= patnode(P_NUMFROM
);
1523 patadd((char *)&from
, 0, sizeof(from
), 0);
1526 starter
= patnode(P_NUMANY
);
1529 /* This can't be simple, because it isn't.
1530 * Mention in manual that matching digits with [...]
1531 * is more efficient.
1535 DPUTS(zpc_special
[ZPC_HASH
] == Marker
,
1536 "Treating '#' as pattern character although disabled");
1537 DPUTS(!isset(EXTENDEDGLOB
), "BUG: # not treated as string");
1539 * A hash here is an error; it should follow something
1546 * Marker for restoring a backslash in output:
1547 * does not match a character.
1549 next
= patcomppiece(flagp
, paren
);
1551 * Can't match a pure string since we need to do this
1552 * as multiple chunks.
1554 *flagp
&= ~P_PURESTR
;
1559 dputs("BUG: character not handled in patcomppiece");
1567 if (!(hash
= (*patparse
== zpc_special
[ZPC_HASH
])) &&
1568 !(count
= ((*patparse
== zpc_special
[ZPC_INPAR
] &&
1569 patparse
[1] == zpc_special
[ZPC_HASH
] &&
1570 patparse
[2] == 'c') ||
1571 (*patparse
== zpc_special
[ZPC_KSH_AT
] &&
1572 patparse
[1] == Inpar
&&
1573 patparse
[2] == zpc_special
[ZPC_HASH
] &&
1574 patparse
[3] == 'c'))) &&
1575 (kshchar
<= 0 || kshchar
== '@' || kshchar
== '!')) {
1580 /* too much at once doesn't currently work */
1581 if (kshchar
&& (hash
|| count
))
1584 if (kshchar
== '*') {
1587 } else if (kshchar
== '+') {
1590 } else if (kshchar
== '?') {
1597 } else if (*++patparse
== zpc_special
[ZPC_HASH
]) {
1607 * Note optimizations with pointers into P_NOTHING branches: some
1608 * should logically point to next node after current piece.
1610 * Backtracking is also encoded in a slightly obscure way: the
1611 * code emitted ensures we test the non-empty branch of complex
1612 * patterns before the empty branch on each repetition. Hence
1613 * each time we fail on a non-empty branch, we try the empty branch,
1614 * which is equivalent to backtracking.
1616 if (op
== P_COUNT
) {
1618 union upat countargs
[P_CT_OPERAND
];
1619 char *opp
= patparse
;
1621 countargs
[0].l
= P_COUNT
;
1622 countargs
[P_CT_CURRENT
].l
= 0L;
1623 countargs
[P_CT_MIN
].l
= (long)zstrtol(patparse
, &patparse
, 10);
1624 if (patparse
== opp
) {
1625 /* missing number treated as zero */
1626 countargs
[P_CT_MIN
].l
= 0L;
1628 if (*patparse
!= ',' && *patparse
!= Comma
) {
1629 /* either max = min or error */
1630 if (*patparse
!= Outpar
)
1632 countargs
[P_CT_MAX
].l
= countargs
[P_CT_MIN
].l
;
1635 countargs
[P_CT_MAX
].l
= (long)zstrtol(patparse
, &patparse
, 10);
1636 if (*patparse
!= Outpar
)
1638 if (patparse
== opp
) {
1639 /* missing number treated as infinity: record as -1 */
1640 countargs
[P_CT_MAX
].l
= -1L;
1644 countargs
[P_CT_PTR
].p
= NULL
;
1645 /* Mark this chain as a min/max count... */
1646 patinsert(P_COUNTSTART
, starter
, (char *)countargs
, sizeof(countargs
));
1648 * The next of the operand is a loop back to the P_COUNT. This is
1649 * how we get recursion for the count. We don't loop back to
1650 * the P_COUNTSTART; that's used for initialising the count
1651 * and saving and restoring the count for any enclosing use
1654 opnd
= P_OPERAND(starter
) + P_CT_OPERAND
;
1655 pattail(opnd
, patnode(P_BACK
));
1656 pattail(opnd
, P_OPERAND(starter
));
1658 * The next of the counter operators is what follows the
1660 * This handles matching of the tail.
1662 next
= patnode(P_NOTHING
);
1663 pattail(starter
, next
);
1664 pattail(P_OPERAND(starter
), next
);
1665 } else if ((flags
& P_SIMPLE
) && (op
== P_ONEHASH
|| op
== P_TWOHASH
) &&
1666 P_OP((Upat
)patout
+starter
) == P_ANY
) {
1667 /* Optimize ?# to *. Silly thing to do, since who would use
1668 * use ?# ? But it makes the later code shorter.
1670 Upat uptr
= (Upat
)patout
+ starter
;
1671 if (op
== P_TWOHASH
) {
1672 /* ?## becomes ?* */
1673 uptr
->l
= (uptr
->l
& ~0xff) | P_ANY
;
1674 pattail(starter
, patnode(P_STAR
));
1676 uptr
->l
= (uptr
->l
& ~0xff) | P_STAR
;
1678 } else if ((flags
& P_SIMPLE
) && op
&& !(patglobflags
& 0xff)) {
1679 /* Simplify, but not if we need to look for approximations. */
1680 patinsert(op
, starter
, NULL
, 0);
1681 } else if (op
== P_ONEHASH
) {
1682 /* Emit x# as (x&|), where & means "self". */
1684 patinsert(P_WBRANCH
, starter
, (char *)&up
, sizeof(up
));
1686 patoptail(starter
, patnode(P_BACK
)); /* and loop */
1687 patoptail(starter
, starter
); /* back */
1688 pattail(starter
, patnode(P_BRANCH
)); /* or */
1689 pattail(starter
, patnode(P_NOTHING
)); /* null. */
1690 } else if (op
== P_TWOHASH
) {
1691 /* Emit x## as x(&|) where & means "self". */
1692 next
= patnode(P_WBRANCH
); /* Either */
1694 patadd((char *)&up
, 0, sizeof(up
), 0);
1695 pattail(starter
, next
);
1696 pattail(patnode(P_BACK
), starter
); /* loop back */
1697 pattail(next
, patnode(P_BRANCH
)); /* or */
1698 pattail(starter
, patnode(P_NOTHING
)); /* null. */
1699 } else if (kshchar
== '?') {
1700 /* Emit ?(x) as (x|) */
1701 patinsert(P_BRANCH
, starter
, NULL
, 0); /* Either x */
1702 pattail(starter
, patnode(P_BRANCH
)); /* or */
1703 next
= patnode(P_NOTHING
); /* null */
1704 pattail(starter
, next
);
1705 patoptail(starter
, next
);
1707 if (*patparse
== zpc_special
[ZPC_HASH
])
1714 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
1715 * parentheses if necessary. As you see, that's really quite easy.
1720 patcompnot(int paren
, int *flagsp
)
1723 long excsync
, br
, excl
, n
, starter
;
1726 /* Here, we're matching a star at the start. */
1729 starter
= patnode(P_BRANCH
);
1730 br
= patnode(P_STAR
);
1731 excsync
= patnode(P_EXCSYNC
);
1732 pattail(br
, excsync
);
1733 pattail(starter
, excl
= patnode(P_EXCLUDE
));
1735 patadd((char *)&up
, 0, sizeof(up
), 0);
1736 if (!(br
= (paren
? patcompswitch(1, &dummy
) : patcompbranch(&dummy
, 0))))
1738 pattail(br
, patnode(P_EXCEND
));
1739 n
= patnode(P_NOTHING
); /* just so much easier */
1740 pattail(excsync
, n
);
1752 long starter
= (Upat
)patcode
- (Upat
)patout
;
1756 patadd((char *)&up
, 0, sizeof(union upat
), 0);
1761 * insert an operator in front of an already emitted operand:
1762 * we relocate the operand. there had better be nothing else after.
1767 patinsert(long op
, int opnd
, char *xtra
, int sz
)
1769 char *src
, *dst
, *opdst
;
1770 union upat buf
, *lptr
;
1773 patadd((char *)&buf
, 0, sizeof(buf
), 0);
1775 patadd(xtra
, 0, sz
, 0);
1776 src
= patcode
- sizeof(union upat
) - sz
;
1778 opdst
= patout
+ opnd
* sizeof(union upat
);
1782 /* A cast can't be an lvalue */
1785 opdst
+= sizeof(union upat
);
1790 /* set the 'next' pointer at the end of a node chain */
1794 pattail(long p
, long val
)
1799 scan
= (Upat
)patout
+ p
;
1801 if (!(temp
= PATNEXT(scan
)))
1806 offset
= (P_OP(scan
) == P_BACK
)
1807 ? (scan
- (Upat
)patout
) - val
: val
- (scan
- (Upat
)patout
);
1809 scan
->l
|= offset
<< 8;
1812 /* do pattail, but on operand of first argument; nop if operandless */
1815 static void patoptail(long p
, long val
)
1817 Upat ptr
= (Upat
)patout
+ p
;
1819 if (!p
|| !P_ISBRANCH(ptr
))
1822 pattail(P_OPERAND(p
), val
);
1824 pattail(P_OPERAND(p
) + 1, val
);
1831 static char *patinstart
; /* Start of input string */
1832 static char *patinend
; /* End of input string */
1833 static char *patinput
; /* String input pointer */
1834 static char *patinpath
; /* Full path for use with ~ exclusions */
1835 static int patinlen
; /* Length of last successful match.
1836 * Includes count of Meta characters.
1839 static char *patbeginp
[NSUBEXP
]; /* Pointer to backref beginnings */
1840 static char *patendp
[NSUBEXP
]; /* Pointer to backref ends */
1841 static int parsfound
; /* parentheses (with backrefs) found */
1843 static int globdots
; /* Glob initial dots? */
1846 * Character functions operating on unmetafied strings.
1848 #ifdef MULTIBYTE_SUPPORT
1850 /* Get a character from the start point in a string */
1851 #define CHARREF(x, y) charref((x), (y))
1853 charref(char *x
, char *y
)
1858 if (!(patglobflags
& GF_MULTIBYTE
) || !(STOUC(*x
) & 0x80))
1859 return (wchar_t) STOUC(*x
);
1861 ret
= mbrtowc(&wc
, x
, y
-x
, &shiftstate
);
1863 if (ret
== MB_INVALID
|| ret
== MB_INCOMPLETE
) {
1864 /* Error. Treat as single byte. */
1865 /* Reset the shift state for next time. */
1866 memset(&shiftstate
, 0, sizeof(shiftstate
));
1867 return (wchar_t) STOUC(*x
);
1873 /* Get a pointer to the next character */
1874 #define CHARNEXT(x, y) charnext((x), (y))
1876 charnext(char *x
, char *y
)
1881 if (!(patglobflags
& GF_MULTIBYTE
) || !(STOUC(*x
) & 0x80))
1884 ret
= mbrtowc(&wc
, x
, y
-x
, &shiftstate
);
1886 if (ret
== MB_INVALID
|| ret
== MB_INCOMPLETE
) {
1887 /* Error. Treat as single byte. */
1888 /* Reset the shift state for next time. */
1889 memset(&shiftstate
, 0, sizeof(shiftstate
));
1893 /* Nulls here are normal characters */
1894 return x
+ (ret
? ret
: 1);
1897 /* Increment a pointer past the current character. */
1898 #define CHARINC(x, y) ((x) = charnext((x), (y)))
1901 /* Get a character and increment */
1902 #define CHARREFINC(x, y, z) charrefinc(&(x), (y), (z))
1904 charrefinc(char **x
, char *y
, int *z
)
1909 if (!(patglobflags
& GF_MULTIBYTE
) || !(STOUC(**x
) & 0x80))
1910 return (wchar_t) STOUC(*(*x
)++);
1912 ret
= mbrtowc(&wc
, *x
, y
-*x
, &shiftstate
);
1914 if (ret
== MB_INVALID
|| ret
== MB_INCOMPLETE
) {
1915 /* Error. Treat as single byte, but flag. */
1917 /* Reset the shift state for next time. */
1918 memset(&shiftstate
, 0, sizeof(shiftstate
));
1919 return (wchar_t) STOUC(*(*x
)++);
1922 /* Nulls here are normal characters */
1923 *x
+= ret
? ret
: 1;
1930 * Counter the number of characters between two pointers, smaller first
1932 * This is used when setting values in parameters, so we obey
1933 * the MULTIBYTE option (even if it's been overridden locally).
1935 #define CHARSUB(x,y) charsub(x, y)
1937 charsub(char *x
, char *y
)
1943 if (!isset(MULTIBYTE
))
1947 ret
= mbrtowc(&wc
, x
, y
-x
, &shiftstate
);
1949 if (ret
== MB_INVALID
|| ret
== MB_INCOMPLETE
) {
1950 /* Error. Treat remainder as single characters */
1951 return res
+ (y
- x
);
1954 /* Treat nulls as normal characters */
1964 #else /* no MULTIBYTE_SUPPORT */
1966 /* Get a character from the start point in a string */
1967 #define CHARREF(x, y) (STOUC(*(x)))
1968 /* Get a pointer to the next character */
1969 #define CHARNEXT(x, y) ((x)+1)
1970 /* Increment a pointer past the current character. */
1971 #define CHARINC(x, y) ((x)++)
1972 /* Get a character and increment */
1973 #define CHARREFINC(x, y, z) (STOUC(*(x)++))
1974 /* Counter the number of characters between two pointers, smaller first */
1975 #define CHARSUB(x,y) ((y) - (x))
1977 #endif /* MULTIBYTE_SUPPORT */
1980 * The following need to be accessed in the globbing scanner for
1981 * a multi-component file path. See horror story in glob.c.
1984 int errsfound
; /* Total error count so far */
1987 int forceerrs
; /* Forced maximum error count */
1998 * Test prog against null-terminated, metafied string.
2003 pattry(Patprog prog
, char *string
)
2005 return pattryrefs(prog
, string
, -1, -1, 0, NULL
, NULL
, NULL
);
2009 * Test prog against string of given length, no null termination
2010 * but still metafied at this point. offset gives an offset
2011 * to include in reported match indices
2016 pattrylen(Patprog prog
, char *string
, int len
, int unmetalen
, int offset
)
2018 return pattryrefs(prog
, string
, len
, unmetalen
, offset
, NULL
, NULL
, NULL
);
2022 * Test prog against string with given lengths. The input
2023 * string is metafied; stringlen is the raw string length, and
2024 * unmetalen the number of characters in the original string (some
2025 * of which may now be metafied). Either value may be -1
2026 * to indicate a null-terminated string which will be counted. Note
2027 * there may be a severe penalty for this if a lot of matching is done
2030 * offset is the position in the original string (not seen by
2031 * the pattern module) at which we are trying to match.
2032 * This is added in to the positions recorded in patbeginp and patendp
2033 * when we are looking for substrings. Currently this only happens
2034 * in the parameter substitution code.
2036 * Note this is a character offset, i.e. a metafied character
2039 * The last three arguments are used to report the positions for the
2040 * backreferences. On entry, *nump should contain the maximum number
2041 * of positions to report. In this case the match, mbegin, mend
2042 * arrays are not altered.
2044 * If nump is NULL but endp is not NULL, then *endp is set to the
2045 * end position of the match, taking into account patinstart.
2050 pattryrefs(Patprog prog
, char *string
, int stringlen
, int unmetalen
,
2052 int *nump
, int *begp
, int *endp
)
2054 int i
, maxnpos
= 0, ret
, needfullpath
, unmetalenp
;
2056 char **sp
, **ep
, *tryalloced
, *ptr
;
2057 char *progstr
= (char *)prog
+ prog
->startoff
;
2063 /* inherited from domatch, but why, exactly? */
2064 if (*string
== Nularg
) {
2070 stringlen
= strlen(string
);
2071 origlen
= stringlen
;
2073 patflags
= prog
->flags
;
2075 * For a top-level ~-exclusion, we will need the full
2076 * path to exclude, so copy the path so far and append the
2077 * current test string.
2079 needfullpath
= (patflags
& PAT_HAS_EXCLUDP
) && pathpos
;
2081 /* Get the length of the full string when unmetafied. */
2083 unmetalen
= ztrsub(string
+ stringlen
, string
);
2085 unmetalenp
= ztrsub(pathbuf
+ pathpos
, pathbuf
);
2089 DPUTS(needfullpath
&& (patflags
& (PAT_PURES
|PAT_ANY
)),
2090 "rum sort of file exclusion");
2092 * Partly for efficiency, and partly for the convenience of
2093 * globbing, we don't unmetafy pure string patterns, and
2094 * there's no reason to if the pattern is just a *.
2096 if (!(patflags
& (PAT_PURES
|PAT_ANY
))
2097 && (needfullpath
|| unmetalen
!= stringlen
)) {
2099 * We need to copy if we need to prepend the path so far
2100 * (in which case we copy both chunks), or if we have
2106 dst
= tryalloced
= zalloc(unmetalen
+ unmetalenp
);
2109 /* loop twice, copy path buffer first time */
2113 /* just loop once, copy string with unmetafication */
2117 for (icopy
= 0; icopy
< 2; icopy
++) {
2118 for (i
= 0; i
< ncopy
; i
++) {
2121 *dst
++ = *ptr
++ ^ 32;
2128 /* next time append test string to path so far */
2134 patinstart
= tryalloced
+ unmetalenp
;
2135 patinpath
= tryalloced
;
2137 patinstart
= tryalloced
;
2140 stringlen
= unmetalen
;
2142 patinstart
= string
;
2143 tryalloced
= patinpath
= NULL
;
2146 patinend
= patinstart
+ stringlen
;
2148 * From now on we do not require NULL termination of
2149 * the test string. There should also be no more references
2150 * to the variable string.
2153 if (prog
->flags
& (PAT_PURES
|PAT_ANY
)) {
2155 * Either we are testing against a pure string,
2156 * or we can match anything at all.
2159 if (prog
->flags
& PAT_ANY
) {
2161 * Optimisation for a single "*": always matches
2162 * (except for no_glob_dots, see below).
2167 * Testing a pure string. See if initial
2170 int lendiff
= stringlen
- prog
->patmlen
;
2172 /* No, the pattern string is too long. */
2174 } else if (!memcmp(progstr
, patinstart
, prog
->patmlen
)) {
2176 * Initial component matches. Matches either
2177 * if lengths are the same or we are not anchored
2178 * to the end of the string.
2180 ret
= !lendiff
|| (prog
->flags
& PAT_NOANCH
);
2188 * For files, we won't match initial "."s unless
2191 if ((prog
->flags
& PAT_NOGLD
) && *patinstart
== '.') {
2195 * Remember the length in case used for ${..#..} etc.
2196 * In this case, we didn't unmetafy the string.
2198 patinlen
= (int)prog
->patmlen
;
2199 /* if matching files, must update globbing flags */
2200 patglobflags
= prog
->globend
;
2202 if ((patglobflags
& GF_MATCHREF
) &&
2203 !(patflags
& PAT_FILE
)) {
2204 char *str
= ztrduppfx(patinstart
, patinlen
);
2208 * Count the characters. We're not using CHARSUB()
2209 * because the string is still metafied.
2212 mlen
= MB_METASTRLEN2END(patinstart
, 0,
2213 patinstart
+ patinlen
);
2215 setsparam("MATCH", str
);
2217 (zlong
)(patoffset
+ !isset(KSHARRAYS
)));
2219 (zlong
)(mlen
+ patoffset
+
2220 !isset(KSHARRAYS
) - 1));
2226 zfree(tryalloced
, unmetalen
+ unmetalenp
);
2230 int q
= queue_signal_level();
2233 * Test for a `must match' string, unless we're scanning for a match
2234 * in which case we don't need to do this each time.
2237 if (!(prog
->flags
& PAT_SCAN
) && prog
->mustoff
)
2239 char *testptr
; /* start pointer into test string */
2240 char *teststop
; /* last point from which we can match */
2241 char *patptr
= (char *)prog
+ prog
->mustoff
;
2242 int patlen
= prog
->patmlen
;
2245 if (patlen
> stringlen
) {
2246 /* Too long, can't match. */
2249 teststop
= patinend
- patlen
;
2251 for (testptr
= patinstart
; testptr
<= teststop
; testptr
++)
2253 if (!memcmp(testptr
, patptr
, patlen
)) {
2265 zfree(tryalloced
, unmetalen
+ unmetalenp
);
2269 patglobflags
= prog
->globflags
;
2270 if (!(patflags
& PAT_FILE
)) {
2274 globdots
= !(patflags
& PAT_NOGLD
);
2277 patinput
= patinstart
;
2279 dont_queue_signals();
2281 if (patmatch((Upat
)progstr
)) {
2283 * we were lazy and didn't save the globflags if an exclusion
2284 * failed, so set it now
2286 patglobflags
= prog
->globend
;
2289 * Record length of successful match, including Meta
2290 * characters. Do it here so that patmatchlen() can return
2291 * it even if we delete the pattern strings.
2293 patinlen
= patinput
- patinstart
;
2295 * Optimization: if we didn't find any Meta characters
2296 * to begin with, we don't need to look for them now.
2298 if (unmetalen
!= origlen
) {
2299 for (ptr
= patinstart
; ptr
< patinput
; ptr
++)
2305 * Should we clear backreferences and matches on a failed
2308 if ((patglobflags
& GF_MATCHREF
) && !(patflags
& PAT_FILE
)) {
2310 * m flag: for global match. This carries no overhead
2311 * in the pattern matching part.
2313 * Remember the test pattern is already unmetafied.
2316 int mlen
= CHARSUB(patinstart
, patinput
);
2318 str
= metafy(patinstart
, patinput
- patinstart
, META_DUP
);
2319 setsparam("MATCH", str
);
2320 setiparam("MBEGIN", (zlong
)(patoffset
+ !isset(KSHARRAYS
)));
2322 (zlong
)(mlen
+ patoffset
+
2323 !isset(KSHARRAYS
) - 1));
2325 if (prog
->patnpar
&& nump
) {
2327 * b flag: for backreferences using parentheses. Reported
2330 *nump
= prog
->patnpar
;
2335 for (i
= 0; i
< prog
->patnpar
&& i
< maxnpos
; i
++) {
2336 if (parsfound
& (1 << i
)) {
2338 *begp
++ = CHARSUB(patinstart
, *sp
) + patoffset
;
2340 *endp
++ = CHARSUB(patinstart
, *ep
) + patoffset
2352 } else if (prog
->patnpar
&& !(patflags
& PAT_FILE
)) {
2354 * b flag: for backreferences using parentheses.
2356 int palen
= prog
->patnpar
+1;
2357 char **matcharr
, **mbeginarr
, **mendarr
;
2358 char numbuf
[DIGBUFSIZE
];
2360 matcharr
= zshcalloc(palen
*sizeof(char *));
2361 mbeginarr
= zshcalloc(palen
*sizeof(char *));
2362 mendarr
= zshcalloc(palen
*sizeof(char *));
2367 for (i
= 0; i
< prog
->patnpar
; i
++) {
2368 if (parsfound
& (1 << i
)) {
2369 matcharr
[i
] = metafy(*sp
, *ep
- *sp
, META_DUP
);
2371 * mbegin and mend give indexes into the string
2372 * in the standard notation, i.e. respecting
2373 * KSHARRAYS, and with the end index giving
2374 * the last character, not one beyond.
2375 * For example, foo=foo; [[ $foo = (f)oo ]] gives
2376 * (without KSHARRAYS) indexes 1 and 1, which
2377 * corresponds to indexing as ${foo[1,1]}.
2379 sprintf(numbuf
, "%ld",
2380 (long)(CHARSUB(patinstart
, *sp
) +
2382 !isset(KSHARRAYS
)));
2383 mbeginarr
[i
] = ztrdup(numbuf
);
2384 sprintf(numbuf
, "%ld",
2385 (long)(CHARSUB(patinstart
, *ep
) +
2387 !isset(KSHARRAYS
) - 1));
2388 mendarr
[i
] = ztrdup(numbuf
);
2390 /* Pattern wasn't set: either it was in an
2391 * unmatched branch, or a hashed parenthesis
2392 * that didn't match at all.
2394 matcharr
[i
] = ztrdup("");
2395 mbeginarr
[i
] = ztrdup("-1");
2396 mendarr
[i
] = ztrdup("-1");
2401 setaparam("match", matcharr
);
2402 setaparam("mbegin", mbeginarr
);
2403 setaparam("mend", mendarr
);
2406 if (!nump
&& endp
) {
2408 * We just need the overall end position.
2410 *endp
= CHARSUB(patinstart
, patinput
) + patoffset
;
2417 restore_queue_signals(q
);
2420 zfree(tryalloced
, unmetalen
+ unmetalenp
);
2427 * Return length of previous succesful match. This is
2428 * in metafied bytes, i.e. includes a count of Meta characters.
2429 * Unusual and futile attempt at modular encapsulation.
2440 * Match literal characters with case insensitivity test: the first
2441 * comes from the input string, the second the current pattern.
2443 #ifdef MULTIBYTE_SUPPORT
2444 #define ISUPPER(x) iswupper(x)
2445 #define ISLOWER(x) iswlower(x)
2446 #define TOUPPER(x) towupper(x)
2447 #define TOLOWER(x) towlower(x)
2448 #define ISDIGIT(x) iswdigit(x)
2450 #define ISUPPER(x) isupper(x)
2451 #define ISLOWER(x) islower(x)
2452 #define TOUPPER(x) toupper(x)
2453 #define TOLOWER(x) tolower(x)
2454 #define ISDIGIT(x) idigit(x)
2456 #define CHARMATCH(chin, chpa) (chin == chpa || \
2457 ((patglobflags & GF_IGNCASE) ? \
2458 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \
2459 (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \
2460 (patglobflags & GF_LCMATCHUC) ? \
2461 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0))
2464 * The same but caching an expression from the first argument,
2465 * Requires local charmatch_cache definition.
2467 #define CHARMATCH_EXPR(expr, chpa) \
2468 (charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))
2471 * exactpos is used to remember how far down an exact string we have
2472 * matched, if we are doing approximation and can therefore redo from
2473 * the same point; we never need to otherwise.
2475 * exactend is a pointer to the end of the string, which isn't
2478 static char *exactpos
, *exactend
;
2481 * Main matching routine.
2483 * Testing the tail end of a match is usually done by recursion, but
2484 * we try to eliminate that in favour of looping for simple cases.
2491 /* Current and next nodes */
2492 Upat scan
= prog
, next
, opnd
;
2493 char *start
, *save
, *chrop
, *chrend
, *compend
;
2494 int savglobflags
, op
, no
, min
, fail
= 0, saverrsfound
;
2495 zrange_t from
, to
, comp
;
2498 while (scan
&& !errflag
) {
2499 next
= PATNEXT(scan
);
2501 if (!globdots
&& P_NOTDOT(scan
) && patinput
== patinstart
&&
2502 patinput
< patinend
&& *patinput
== '.')
2505 switch (P_OP(scan
)) {
2507 if (patinput
== patinend
)
2510 CHARINC(patinput
, patinend
);
2514 * acts as nothing if *chrop is null: this is used by
2521 chrop
= P_LS_STR(scan
);
2522 chrend
= chrop
+ P_LS_LEN(scan
);
2525 while (chrop
< chrend
&& patinput
< patinend
) {
2526 char *savpatinput
= patinput
;
2527 char *savchrop
= chrop
;
2528 int badin
= 0, badpa
= 0;
2530 * Care with character matching:
2531 * We do need to convert the character to wide
2532 * representation if possible, because we may need
2533 * to do case transformation. However, we should
2534 * be careful in case one, but not the other, wasn't
2535 * representable in the current locale---in that
2536 * case they don't match even if the returned
2537 * values (one properly converted, one raw) are
2540 patint_t chin
= CHARREFINC(patinput
, patinend
, &badin
);
2541 patint_t chpa
= CHARREFINC(chrop
, chrend
, &badpa
);
2542 if (!CHARMATCH(chin
, chpa
) || badin
!= badpa
) {
2544 patinput
= savpatinput
;
2549 if (chrop
< chrend
) {
2557 if (patinput
== patinend
)
2560 #ifdef MULTIBYTE_SUPPORT
2561 wchar_t cr
= CHARREF(patinput
, patinend
);
2562 char *scanop
= (char *)P_OPERAND(scan
);
2563 if (patglobflags
& GF_MULTIBYTE
) {
2564 if (mb_patmatchrange(scanop
, cr
, NULL
, NULL
) ^
2565 (P_OP(scan
) == P_ANYOF
))
2568 CHARINC(patinput
, patinend
);
2569 } else if (patmatchrange(scanop
, (int)cr
, NULL
, NULL
) ^
2570 (P_OP(scan
) == P_ANYOF
))
2573 CHARINC(patinput
, patinend
);
2575 if (patmatchrange((char *)P_OPERAND(scan
),
2576 CHARREF(patinput
, patinend
), NULL
, NULL
) ^
2577 (P_OP(scan
) == P_ANYOF
))
2580 CHARINC(patinput
, patinend
);
2588 * To do this properly, we really have to treat numbers as
2589 * closures: that's so things like <1-1000>33 will
2590 * match 633 (they didn't up to 3.1.6). To avoid making this
2591 * too inefficient, we see if there's an exact match next:
2592 * if there is, and it's not a digit, we return 1 after
2593 * the first attempt.
2596 start
= (char *)P_OPERAND(scan
);
2598 if (op
!= P_NUMTO
) {
2599 #ifdef ZSH_64_BIT_TYPE
2600 /* We can't rely on pointer alignment being good enough. */
2601 memcpy((char *)&from
, start
, sizeof(zrange_t
));
2603 from
= *((zrange_t
*) start
);
2605 start
+= sizeof(zrange_t
);
2607 if (op
!= P_NUMFROM
) {
2608 #ifdef ZSH_64_BIT_TYPE
2609 memcpy((char *)&to
, start
, sizeof(zrange_t
));
2611 to
= *((zrange_t
*) start
);
2614 start
= compend
= patinput
;
2616 while (patinput
< patinend
&& idigit(*patinput
)) {
2619 comp
+= *patinput
- '0';
2623 if (comp
& ((zrange_t
)1 << (sizeof(comp
)*8 -
2624 #ifdef ZRANGE_T_IS_SIGNED
2631 * Out of range (allowing for signedness, which
2632 * we need if we are using zlongs).
2633 * This is as far as we can go.
2634 * If we're doing a range "from", skip all the
2635 * remaining numbers. Otherwise, we can't
2636 * match beyond the previous point anyway.
2637 * Leave the pointer to the last calculated
2638 * position (compend) where it was before.
2640 if (op
== P_NUMFROM
) {
2641 while (patinput
< patinend
&& idigit(*patinput
))
2648 while (patinput
> start
) {
2649 /* if already too small, no power on earth can save it */
2650 if (comp
< from
&& patinput
<= compend
)
2652 if ((op
== P_NUMFROM
|| comp
<= to
) && patmatch(next
)) {
2655 if (!no
&& P_OP(next
) == P_EXACTLY
&&
2657 !idigit(STOUC(*P_LS_STR(next
)))) &&
2658 !(patglobflags
& 0xff))
2663 * With a range start and an unrepresentable test
2664 * number, we just back down the test string without
2665 * changing the number until we get to a representable
2668 if (patinput
< compend
)
2675 /* This is <->: any old set of digits, don't bother comparing */
2677 while (patinput
< patinend
&& idigit(*patinput
))
2681 while (patinput
> start
) {
2684 if (!no
&& P_OP(next
) == P_EXACTLY
&&
2686 !idigit(*P_LS_STR(next
))) &&
2687 !(patglobflags
& 0xff))
2700 patglobflags
= P_OPERAND(scan
)->l
;
2712 no
= P_OP(scan
) - P_OPEN
;
2715 if (patmatch(next
)) {
2717 * Don't set patbeginp if some later invocation of
2718 * the same parentheses already has.
2720 if (no
&& !(parsfound
& (1 << (no
- 1)))) {
2721 patbeginp
[no
-1] = save
;
2722 parsfound
|= 1 << (no
- 1);
2738 no
= P_OP(scan
) - P_CLOSE
;
2741 if (patmatch(next
)) {
2742 if (no
&& !(parsfound
& (1 << (no
+ 15)))) {
2743 patendp
[no
-1] = save
;
2744 parsfound
|= 1 << (no
+ 15);
2751 /* See the P_EXCLUDE code below for where syncptr comes from */
2753 unsigned char *syncptr
;
2755 after
= P_OPERAND(scan
);
2756 DPUTS(!P_ISEXCLUDE(after
),
2757 "BUG: EXCSYNC not followed by EXCLUDE.");
2758 DPUTS(!P_OPERAND(after
)->p
,
2759 "BUG: EXCSYNC not handled by EXCLUDE");
2760 syncptr
= P_OPERAND(after
)->p
+ (patinput
- patinstart
);
2762 * If we already matched from here, this time we fail.
2763 * See WBRANCH code for story about error count.
2765 if (*syncptr
&& errsfound
+ 1 >= *syncptr
)
2768 * Else record that we (possibly) matched this time.
2769 * No harm if we don't: then the previous test will just
2770 * short cut the attempted match that is bound to fail.
2771 * We never try to exclude something that has already
2774 *syncptr
= errsfound
+ 1;
2779 * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
2780 * branch. Actually, we don't bother following it: all we
2781 * need to know is that we successfully matched so far up
2782 * to the end of the asserted pattern; the endpoint
2783 * in the target string is nulled out.
2785 if (!(fail
= (patinput
< patinend
)))
2790 /* P_EXCLUDE shouldn't occur without a P_BRANCH */
2791 if (!P_ISBRANCH(next
)) {
2792 /* no choice, avoid recursion */
2793 DPUTS(P_OP(scan
) == P_WBRANCH
,
2794 "BUG: WBRANCH with no alternative.");
2795 next
= P_OPERAND(scan
);
2799 savglobflags
= patglobflags
;
2800 saverrsfound
= errsfound
;
2801 if (P_ISEXCLUDE(next
)) {
2803 * The strategy is to test the asserted pattern,
2804 * recording via P_EXCSYNC how far the part to
2805 * be excluded matched. We then set the
2806 * length of the test string to that
2807 * point and see if the exclusion as far as
2808 * P_EXCEND also matches that string.
2809 * We need to keep testing the asserted pattern
2810 * by backtracking, since the first attempt
2811 * may be excluded while a later attempt may not.
2812 * For this we keep a pointer just after
2813 * the P_EXCLUDE which is tested by the P_EXCSYNC
2814 * to see if we matched there last time, in which
2815 * case we fail. If there is nothing to backtrack
2816 * over, that doesn't matter: we should fail anyway.
2817 * The pointer also tells us where the asserted
2818 * pattern matched for use by the exclusion.
2820 * It's hard to allocate space for this
2821 * beforehand since we may need to do it
2824 * P.S. in case you were wondering, this code
2829 unsigned char *oldsyncstr
;
2830 char *matchpt
= NULL
;
2831 int ret
, savglobdots
, matchederrs
= 0;
2832 int savparsfound
= parsfound
;
2833 DPUTS(P_OP(scan
) == P_WBRANCH
,
2834 "BUG: excluded WBRANCH");
2835 syncstrp
= P_OPERAND(next
);
2837 * Unlike WBRANCH, each test at the same exclude
2838 * sync point (due to an external loop) is separate,
2839 * i.e testing (foo~bar)# is no different from
2840 * (foo~bar)(foo~bar)... from the exclusion point
2841 * of view, so we use a different sync string.
2843 oldsyncstr
= syncstrp
->p
;
2844 syncstrp
->p
= (unsigned char *)
2845 zshcalloc((patinend
- patinstart
) + 1);
2846 origpatinend
= patinend
;
2847 while ((ret
= patmatch(P_OPERAND(scan
)))) {
2848 unsigned char *syncpt
;
2849 char *savpatinstart
;
2850 int savforce
= forceerrs
;
2851 int savpatflags
= patflags
, synclen
;
2853 savglobdots
= globdots
;
2854 matchederrs
= errsfound
;
2855 matchpt
= patinput
; /* may not be end */
2856 globdots
= 1; /* OK to match . first */
2857 /* Find the point where the scan
2858 * matched the part to be excluded: because
2859 * of backtracking, the one
2860 * most recently matched will be the first.
2861 * (Luckily, backtracking is done after all
2862 * possibilities for approximation have been
2865 for (syncpt
= syncstrp
->p
; !*syncpt
; syncpt
++)
2867 synclen
= syncpt
- syncstrp
->p
;
2868 if (patinstart
+ synclen
!= patinend
) {
2870 * Temporarily mark the string as
2871 * ending at this point.
2873 DPUTS(patinstart
+ synclen
> matchpt
,
2874 "BUG: EXCSYNC failed");
2876 patinend
= patinstart
+ synclen
;
2878 * If this isn't really the end of the string,
2879 * remember this for the (#e) assertion.
2881 patflags
|= PAT_NOTEND
;
2883 savpatinstart
= patinstart
;
2884 next
= PATNEXT(scan
);
2885 while (next
&& P_ISEXCLUDE(next
)) {
2888 * turn off approximations in exclusions:
2889 * note we keep remaining patglobflags
2890 * set by asserted branch (or previous
2891 * excluded branches, for consistency).
2893 patglobflags
&= ~0xff;
2895 opnd
= P_OPERAND(next
) + 1;
2896 if (P_OP(next
) == P_EXCLUDP
&& patinpath
) {
2898 * Top level exclusion with a file,
2899 * applies to whole path so add the
2900 * segments already matched.
2901 * We copied these in front of the
2902 * test pattern, so patinend doesn't
2905 DPUTS(patinput
!= patinstart
,
2906 "BUG: not at start excluding path");
2907 patinput
= patinstart
= patinpath
;
2909 if (patmatch(opnd
)) {
2912 * Another subtlety: if we exclude the
2913 * match, any parentheses just found
2914 * become invalidated.
2916 parsfound
= savparsfound
;
2919 patinput
= savpatinstart
+
2920 (patinput
- patinstart
);
2921 patinstart
= savpatinstart
;
2925 next
= PATNEXT(next
);
2928 * Restore original end position.
2930 patinend
= origpatinend
;
2931 patflags
= savpatflags
;
2932 globdots
= savglobdots
;
2933 forceerrs
= savforce
;
2937 patglobflags
= savglobflags
;
2938 errsfound
= saverrsfound
;
2940 zfree((char *)syncstrp
->p
,
2941 (patinend
- patinstart
) + 1);
2942 syncstrp
->p
= oldsyncstr
;
2945 errsfound
= matchederrs
;
2948 while ((scan
= PATNEXT(scan
)) &&
2952 int ret
= 1, pfree
= 0;
2955 if (P_OP(scan
) == P_WBRANCH
) {
2957 * This is where we make sure that we are not
2958 * repeatedly matching zero-length strings in
2959 * a closure, which would cause an infinite loop,
2960 * and also remove exponential behaviour in
2961 * backtracking nested closures.
2962 * The P_WBRANCH operator leaves a space for a
2963 * uchar *, initialized to NULL, which is
2964 * turned into a string the same length as the
2965 * target string. Every time we match from a
2966 * particular point in the target string, we
2967 * stick a 1 at the corresponding point here.
2968 * If we come round to the same branch again, and
2969 * there is already a 1, then the test fails.
2971 opnd
= P_OPERAND(scan
);
2974 ptrp
->p
= (unsigned char *)
2975 zshcalloc((patinend
- patinstart
) + 1);
2978 ptr
= ptrp
->p
+ (patinput
- patinstart
);
2981 * Without approximation, this is just a
2982 * single bit test. With approximation, we
2983 * need to know how many errors there were
2984 * last time we made the test. If errsfound
2985 * is now smaller than it was, hence we can
2986 * make more approximations in the remaining
2987 * code, we continue with the test.
2988 * (This is why the max number of errors is
2991 if (*ptr
&& errsfound
+ 1 >= *ptr
)
2993 *ptr
= errsfound
+ 1;
2995 opnd
= P_OPERAND(scan
);
2997 ret
= patmatch(opnd
);
2999 zfree((char *)ptrp
->p
,
3000 (patinend
- patinstart
) + 1);
3005 scan
= PATNEXT(scan
);
3008 patglobflags
= savglobflags
;
3009 errsfound
= saverrsfound
;
3010 DPUTS(P_OP(scan
) == P_WBRANCH
,
3011 "BUG: WBRANCH not first choice.");
3012 next
= PATNEXT(scan
);
3013 } while (scan
&& P_ISBRANCH(scan
));
3018 /* Handle specially for speed, although really P_ONEHASH+P_ANY */
3019 while (P_OP(next
) == P_STAR
) {
3021 * If there's another * following we can optimise it
3022 * out. Chains of *'s can give pathologically bad
3026 next
= PATNEXT(scan
);
3032 * This is just simple cases, matching one character.
3033 * With approximations, we still handle * this way, since
3034 * no approximation is ever necessary, but other closures
3035 * are handled by the more complicated branching method
3038 /* Note that no counts possibly metafied characters */
3041 char *lastcharstart
;
3043 * Array to record the start of characters for
3046 VARARR(char, charstart
, patinend
-patinput
);
3047 memset(charstart
, 0, patinend
-patinput
);
3050 for (no
= 0; patinput
< patinend
;
3051 CHARINC(patinput
, patinend
))
3053 charstart
[patinput
-start
] = 1;
3056 /* simple optimization for reasonably common case */
3057 if (P_OP(next
) == P_END
)
3060 DPUTS(patglobflags
& 0xff,
3061 "BUG: wrong backtracking with approximation.");
3062 if (!globdots
&& P_NOTDOT(P_OPERAND(scan
)) &&
3063 patinput
== patinstart
&& patinput
< patinend
&&
3064 CHARREF(patinput
, patinend
) == ZWC('.'))
3066 no
= patrepeat(P_OPERAND(scan
), charstart
);
3068 min
= (op
== P_TWOHASH
) ? 1 : 0;
3070 * Lookahead to avoid useless matches. This is not possible
3071 * with approximation.
3073 if (P_OP(next
) == P_EXACTLY
&& P_LS_LEN(next
) &&
3074 !(patglobflags
& 0xff)) {
3075 char *nextop
= P_LS_STR(next
);
3076 #ifdef MULTIBYTE_SUPPORT
3077 /* else second argument of CHARREF isn't used */
3078 int nextlen
= P_LS_LEN(next
);
3081 * If that P_EXACTLY is last (common in simple patterns,
3082 * such as *.c), then it can be only be matched at one
3083 * point in the test string, so record that.
3085 if (P_OP(PATNEXT(next
)) == P_END
&&
3086 !(patflags
& PAT_NOANCH
)) {
3087 int ptlen
= patinend
- patinput
;
3088 int lenmatch
= patinend
-
3089 (min
? CHARNEXT(start
, patinend
) : start
);
3090 /* Are we in the right range? */
3091 if (P_LS_LEN(next
) > lenmatch
||
3092 P_LS_LEN(next
) < ptlen
)
3094 /* Yes, just position appropriately and test. */
3095 patinput
+= ptlen
- P_LS_LEN(next
);
3097 * Here we will need to be careful that patinput is not
3098 * in the middle of a multibyte character.
3100 /* Continue loop with P_EXACTLY test. */
3103 nextch
= CHARREF(nextop
, nextop
+ nextlen
);
3106 savglobflags
= patglobflags
;
3107 saverrsfound
= errsfound
;
3108 lastcharstart
= charstart
+ (patinput
- start
);
3111 patint_t charmatch_cache
;
3112 if (nextch
== PEOF
||
3113 (patinput
< patinend
&&
3114 CHARMATCH_EXPR(CHARREF(patinput
, patinend
),
3121 /* find start of previous full character */
3122 while (!*--lastcharstart
)
3123 DPUTS(lastcharstart
< charstart
,
3124 "lastcharstart invalid");
3125 patinput
= start
+ (lastcharstart
-charstart
);
3126 patglobflags
= savglobflags
;
3127 errsfound
= saverrsfound
;
3132 * As with branches, the patmatch(next) stuff for *
3133 * handles approximation, so we don't need to try
3138 if (patinput
!= patinstart
|| (patflags
& PAT_NOTSTART
))
3142 if (patinput
< patinend
|| (patflags
& PAT_NOTEND
))
3148 * Save and restore the current count and the
3149 * start pointer in case the pattern has been
3150 * executed by a previous repetition of a
3153 long *curptr
= &P_OPERAND(scan
)[P_CT_CURRENT
].l
;
3154 long savecount
= *curptr
;
3155 unsigned char *saveptr
= scan
[P_CT_PTR
].p
;
3159 ret
= patmatch(P_OPERAND(scan
));
3160 *curptr
= savecount
;
3161 scan
[P_CT_PTR
].p
= saveptr
;
3166 /* (#cN,M): execution is relatively straightforward */
3167 long cur
= scan
[P_CT_CURRENT
].l
;
3168 long min
= scan
[P_CT_MIN
].l
;
3169 long max
= scan
[P_CT_MAX
].l
;
3171 if (cur
&& cur
>= min
&&
3172 (unsigned char *)patinput
== scan
[P_CT_PTR
].p
) {
3174 * Not at the first attempt to match so
3175 * the previous attempt managed zero length.
3176 * We can do this indefinitely so there's
3177 * no point in going on. Simply try to
3178 * match the remainder of the pattern.
3180 return patmatch(next
);
3182 scan
[P_CT_PTR
].p
= (unsigned char *)patinput
;
3184 if (max
< 0 || cur
< max
) {
3185 char *patinput_thistime
= patinput
;
3186 scan
[P_CT_CURRENT
].l
= cur
+ 1;
3187 if (patmatch(scan
+ P_CT_OPERAND
))
3189 patinput
= patinput_thistime
;
3193 return patmatch(next
);
3196 if (!(fail
= (patinput
< patinend
&& !(patflags
& PAT_NOANCH
))))
3201 dputs("BUG: bad operand in patmatch.");
3208 if (errsfound
< (patglobflags
& 0xff) &&
3209 (forceerrs
== -1 || errsfound
< forceerrs
)) {
3211 * Approximation code. There are four possibilities
3213 * 1. omit character from input string
3214 * 2. transpose characters in input and pattern strings
3215 * 3. omit character in both input and pattern strings
3216 * 4. omit character from pattern string.
3218 * which we try in that order.
3220 * Of these, 2, 3 and 4 require an exact match string
3221 * (P_EXACTLY) while 1, 2 and 3 require that we not
3222 * have reached the end of the input string.
3224 * Note in each case after making the approximation we
3225 * need to retry the *same* pattern; this is what
3226 * requires exactpos, a slightly doleful way of
3227 * communicating with the exact character matcher.
3229 char *savexact
= exactpos
;
3231 savglobflags
= patglobflags
;
3232 saverrsfound
= ++errsfound
;
3235 DPUTS(P_OP(scan
) != P_EXACTLY
&& exactpos
,
3236 "BUG: non-exact match has set exactpos");
3238 /* Try omitting a character from the input string */
3239 if (patinput
< patinend
) {
3240 CHARINC(patinput
, patinend
);
3241 /* If we are not on an exact match, then this is
3242 * our last gasp effort, so we can optimize out
3243 * the recursive call.
3245 if (P_OP(scan
) != P_EXACTLY
)
3251 if (P_OP(scan
) == P_EXACTLY
) {
3252 char *nextexact
= savexact
;
3254 "BUG: exact match has not set exactpos");
3255 CHARINC(nextexact
, exactend
);
3257 if (save
< patinend
) {
3258 char *nextin
= save
;
3259 CHARINC(nextin
, patinend
);
3260 patglobflags
= savglobflags
;
3261 errsfound
= saverrsfound
;
3262 exactpos
= savexact
;
3265 * Try swapping two characters in patinput and
3268 if (save
< patinend
&& nextin
< patinend
&&
3269 nextexact
< exactend
) {
3270 patint_t cin0
= CHARREF(save
, patinend
);
3271 patint_t cpa0
= CHARREF(exactpos
, exactend
);
3272 patint_t cin1
= CHARREF(nextin
, patinend
);
3273 patint_t cpa1
= CHARREF(nextexact
, exactend
);
3275 if (CHARMATCH(cin0
, cpa1
) &&
3276 CHARMATCH(cin1
, cpa0
)) {
3278 CHARINC(patinput
, patinend
);
3279 exactpos
= nextexact
;
3280 CHARINC(exactpos
, exactend
);
3284 patglobflags
= savglobflags
;
3285 errsfound
= saverrsfound
;
3290 * Try moving up both strings.
3293 exactpos
= nextexact
;
3298 patglobflags
= savglobflags
;
3299 errsfound
= saverrsfound
;
3300 exactpos
= savexact
;
3303 DPUTS(exactpos
== exactend
, "approximating too far");
3305 * Try moving up the exact match pattern.
3306 * This must be the last attempt, so just loop
3307 * instead of calling recursively.
3309 CHARINC(exactpos
, exactend
);
3325 #ifdef MULTIBYTE_SUPPORT
3328 * See if character ch matches a pattern range specification.
3329 * The null-terminated specification is in range; the test
3330 * character is in ch.
3332 * indptr is used by completion matching, which is why this
3333 * function is exported. If indptr is not NULL we set *indptr
3334 * to the index of the character in the range string, adjusted
3335 * in the case of "A-B" ranges such that A would count as its
3336 * normal index (say IA), B would count as IA + (B-A), and any
3337 * character within the range as appropriate. We're not strictly
3338 * guaranteed this fits within a wint_t, but if this is Unicode
3339 * in 32 bits we have a fair amount of distance left over.
3341 * mtp is used in the same circumstances. *mtp returns the match type:
3342 * 0 for a standard character, else the PP_ index. It's not
3343 * useful if the match failed.
3348 mb_patmatchrange(char *range
, wchar_t ch
, wint_t *indptr
, int *mtp
)
3355 * Careful here: unlike other strings, range is a NULL-terminated,
3356 * metafied string, because we need to treat the Posix and hyphenated
3360 if (imeta(STOUC(*range
))) {
3361 int swtype
= STOUC(*range
++) - STOUC(Meta
);
3366 /* ordinary metafied character */
3368 if (metacharinc(&range
) == ch
)
3380 if ((ch
& ~0x7f) == 0)
3384 if (ch
== L
' ' || ch
== L
'\t')
3424 if (wcsitype(ch
, IIDENT
))
3428 if (wcsitype(ch
, ISEP
))
3432 /* must be ASCII space character */
3433 if (ch
< 128 && iwsep((int)ch
))
3437 if (wcsitype(ch
, IWORD
))
3441 r1
= metacharinc(&range
);
3442 r2
= metacharinc(&range
);
3443 if (r1
<= ch
&& ch
<= r2
) {
3448 /* Careful not to screw up counting with bogus range */
3449 if (indptr
&& r1
< r2
) {
3451 * This gets incremented again below to get
3452 * us past the range end. This is correct.
3458 DPUTS(1, "BUG: unknown posix range passed through.\n");
3461 DPUTS(1, "BUG: unknown metacharacter in range.");
3464 } else if (metacharinc(&range
) == ch
) {
3477 * This is effectively the reverse of mb_patmatchrange().
3478 * Given a range descriptor of the same form, and an index into it,
3479 * try to determine the character that is matched. If the index
3480 * points to a [:...:] generic style match, set chr to WEOF and
3481 * return the type in mtp instead. Return 1 if successful, 0 if
3482 * there was no corresponding index. Note all pointer arguments
3488 mb_patmatchindex(char *range
, wint_t ind
, wint_t *chr
, int *mtp
)
3490 wchar_t r1
, r2
, rchr
;
3497 if (imeta(STOUC(*range
))) {
3498 int swtype
= STOUC(*range
++) - STOUC(Meta
);
3502 rchr
= metacharinc(&range
);
3504 *chr
= (wint_t) rchr
;
3533 r1
= metacharinc(&range
);
3534 r2
= metacharinc(&range
);
3535 rdiff
= (wint_t)r2
- (wint_t)r1
;
3537 *chr
= (wint_t)r1
+ ind
;
3540 /* note the extra decrement to ind below */
3544 DPUTS(1, "BUG: unknown posix range passed through.\n");
3547 DPUTS(1, "BUG: unknown metacharacter in range.");
3551 rchr
= metacharinc(&range
);
3553 *chr
= (wint_t)rchr
;
3561 /* No corresponding index. */
3566 #endif /* MULTIBYTE_SUPPORT */
3569 * Identical function to mb_patmatchrange() above for single-byte
3575 patmatchrange(char *range
, int ch
, int *indptr
, int *mtp
)
3582 * Careful here: unlike other strings, range is a NULL-terminated,
3583 * metafied string, because we need to treat the Posix and hyphenated
3586 for (; *range
; range
++) {
3587 if (imeta(STOUC(*range
))) {
3588 int swtype
= STOUC(*range
) - STOUC(Meta
);
3593 if (STOUC(*++range
^ 32) == ch
)
3605 if ((ch
& ~0x7f) == 0)
3609 if (ch
== ' ' || ch
== '\t')
3666 r1
= STOUC(UNMETA(range
));
3668 r2
= STOUC(UNMETA(range
));
3671 if (r1
<= ch
&& ch
<= r2
) {
3676 if (indptr
&& r1
< r2
)
3680 DPUTS(1, "BUG: unknown posix range passed through.\n");
3683 DPUTS(1, "BUG: unknown metacharacter in range.");
3686 } else if (STOUC(*range
) == ch
) {
3699 #ifndef MULTIBYTE_SUPPORT
3702 * Identical function to mb_patmatchindex() above for single-byte
3703 * characters. Here -1 represents a character that needs a special type.
3705 * Unlike patmatchrange, we only need this in ZLE, which always
3706 * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
3707 * this function in that case.
3712 patmatchindex(char *range
, int ind
, int *chr
, int *mtp
)
3714 int r1
, r2
, rdiff
, rchr
;
3719 for (; *range
; range
++) {
3720 if (imeta(STOUC(*range
))) {
3721 int swtype
= STOUC(*range
) - STOUC(Meta
);
3724 /* ordinary metafied character */
3725 rchr
= STOUC(*++range
) ^ 32;
3757 r1
= STOUC(UNMETA(range
));
3759 r2
= STOUC(UNMETA(range
));
3767 /* note the extra decrement to ind below */
3771 DPUTS(1, "BUG: unknown posix range passed through.\n");
3774 DPUTS(1, "BUG: unknown metacharacter in range.");
3779 *chr
= STOUC(*range
);
3787 /* No corresponding index. */
3792 #endif /* MULTIBYTE_SUPPORT */
3795 * Repeatedly match something simple and say how many times.
3796 * charstart is an array parallel to that starting at patinput
3797 * and records the start of (possibly multibyte) characters
3798 * to aid in later backtracking.
3802 static int patrepeat(Upat p
, char *charstart
)
3805 patint_t tch
, charmatch_cache
;
3809 opnd
= (char *)P_OPERAND(p
);
3814 dputs("BUG: ?# did not get optimized to *");
3819 DPUTS(P_LS_LEN(p
) != 1, "closure following more than one character");
3820 tch
= CHARREF(P_LS_STR(p
), P_LS_STR(p
) + P_LS_LEN(p
));
3821 while (scan
< patinend
&&
3822 CHARMATCH_EXPR(CHARREF(scan
, patinend
), tch
)) {
3823 charstart
[scan
-patinput
] = 1;
3825 CHARINC(scan
, patinend
);
3830 while (scan
< patinend
) {
3831 #ifdef MULTIBYTE_SUPPORT
3832 wchar_t cr
= CHARREF(scan
, patinend
);
3833 if (patglobflags
& GF_MULTIBYTE
) {
3834 if (mb_patmatchrange(opnd
, cr
, NULL
, NULL
) ^
3835 (P_OP(p
) == P_ANYOF
))
3837 } else if (patmatchrange(opnd
, (int)cr
, NULL
, NULL
) ^
3838 (P_OP(p
) == P_ANYOF
))
3841 if (patmatchrange(opnd
, CHARREF(scan
, patinend
), NULL
, NULL
) ^
3842 (P_OP(p
) == P_ANYOF
))
3845 charstart
[scan
-patinput
] = 1;
3847 CHARINC(scan
, patinend
);
3852 dputs("BUG: something very strange is happening in patrepeat");
3862 /* Free a patprog. */
3866 freepatprog(Patprog prog
)
3868 if (prog
&& prog
!= dummy_patprog1
&& prog
!= dummy_patprog2
)
3869 zfree(prog
, prog
->size
);
3872 /* Disable or reenable a pattern character */
3876 pat_enables(const char *cmd
, char **patp
, int enable
)
3879 const char **stringp
;
3884 for (stringp
= zpc_strings
, disp
= zpc_disables
;
3885 stringp
< zpc_strings
+ ZPC_COUNT
;
3886 stringp
++, disp
++) {
3889 if (enable
? *disp
: !*disp
)
3893 printf("'%s'", *stringp
);
3901 for (; *patp
; patp
++) {
3902 for (stringp
= zpc_strings
, disp
= zpc_disables
;
3903 stringp
< zpc_strings
+ ZPC_COUNT
;
3904 stringp
++, disp
++) {
3905 if (*stringp
&& !strcmp(*stringp
, *patp
)) {
3906 *disp
= (char)!enable
;
3910 if (stringp
== zpc_strings
+ ZPC_COUNT
) {
3911 zerrnam(cmd
, "invalid pattern: %s", *patp
);
3920 * Save the current state of pattern disables, returning the saved value.
3925 savepatterndisables(void)
3927 unsigned int disables
, bit
;
3931 for (bit
= 1, disp
= zpc_disables
;
3932 disp
< zpc_disables
+ ZPC_COUNT
;
3933 bit
<<= 1, disp
++) {
3941 * Function scope saving pattern enables.
3946 startpatternscope(void)
3948 Zpc_disables_save newdis
;
3950 newdis
= (Zpc_disables_save
)zalloc(sizeof(*newdis
));
3951 newdis
->next
= zpc_disables_stack
;
3952 newdis
->disables
= savepatterndisables();
3954 zpc_disables_stack
= newdis
;
3958 * Restore completely the state of pattern disables.
3963 restorepatterndisables(unsigned int disables
)
3968 for (bit
= 1, disp
= zpc_disables
;
3969 disp
< zpc_disables
+ ZPC_COUNT
;
3970 bit
<<= 1, disp
++) {
3979 * Function scope to restore pattern enables if localpatterns is turned on.
3984 endpatternscope(void)
3986 Zpc_disables_save olddis
;
3988 olddis
= zpc_disables_stack
;
3989 zpc_disables_stack
= olddis
->next
;
3991 if (isset(LOCALPATTERNS
))
3992 restorepatterndisables(olddis
->disables
);
3994 zfree(olddis
, sizeof(*olddis
));
3997 /* Reinitialise pattern disables */
4001 clearpatterndisables(void)
4003 memset(zpc_disables
, 0, ZPC_COUNT
);
4007 /* Check to see if str is eligible for filename generation. */
4015 /* `[' and `]' are legal even if bad patterns are usually not. */
4016 if ((*str
== Inbrack
|| *str
== Outbrack
) && !str
[1])
4019 /* If % is immediately followed by ?, then that ? is *
4020 * not treated as a wildcard. This is so you don't have *
4021 * to escape job references such as %?foo. */
4022 if (str
[0] == '%' && str
[1] == Quest
)
4026 * Note that at this point zpc_special has not been set up.
4029 for (; *str
; str
++) {
4032 if ((!isset(SHGLOB
) && !zpc_disables
[ZPC_INPAR
]) ||
4033 (str
> start
&& isset(KSHGLOB
) &&
4034 ((str
[-1] == Quest
&& !zpc_disables
[ZPC_KSH_QUEST
]) ||
4035 (str
[-1] == Star
&& !zpc_disables
[ZPC_KSH_STAR
]) ||
4036 (str
[-1] == '+' && !zpc_disables
[ZPC_KSH_PLUS
]) ||
4037 (str
[-1] == '!' && !zpc_disables
[ZPC_KSH_BANG
]) ||
4038 (str
[-1] == '@' && !zpc_disables
[ZPC_KSH_AT
]))))
4043 if (!zpc_disables
[ZPC_BAR
])
4048 if (!zpc_disables
[ZPC_STAR
])
4053 if (!zpc_disables
[ZPC_INBRACK
])
4058 if (!zpc_disables
[ZPC_INANG
])
4063 if (!zpc_disables
[ZPC_QUEST
])
4068 if (isset(EXTENDEDGLOB
) && !zpc_disables
[ZPC_HASH
])
4073 if (isset(EXTENDEDGLOB
) && !zpc_disables
[ZPC_HAT
])