stages/*: change license to Apache 2.0
[dragora.git] / patches / unzip / unzip-6.0-fix-recmatch.patch
blob2a8583c8ca16228741c06e5d08d825cb35885e89
1 diff -up unzip60/match.c.recmatch unzip60/match.c
2 --- unzip60/match.c.recmatch 2005-08-14 13:00:36.000000000 -0400
3 +++ unzip60/match.c 2013-05-28 10:29:57.949077543 -0400
4 @@ -27,16 +27,14 @@
6 ---------------------------------------------------------------------------
8 - Copyright on recmatch() from Zip's util.c (although recmatch() was almost
9 - certainly written by Mark Adler...ask me how I can tell :-) ):
10 + Copyright on recmatch() from Zip's util.c
11 + Copyright (c) 1990-2005 Info-ZIP. All rights reserved.
13 - Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
14 - Kai Uwe Rommel and Igor Mandrichenko.
15 + See the accompanying file LICENSE, version 2004-May-22 or later
16 + for terms of use.
17 + If, for some reason, both of these files are missing, the Info-ZIP license
18 + also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
20 - Permission is granted to any individual or institution to use, copy,
21 - or redistribute this software so long as all of the original files are
22 - included unmodified, that it is not sold for profit, and that this copy-
23 - right notice is retained.
25 ---------------------------------------------------------------------------
27 @@ -53,7 +51,7 @@
29 A set is composed of characters or ranges; a range looks like ``character
30 hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
31 - characters allowed in the [..] pattern construct. Other characters are
32 + characters ALlowed in the [..] pattern construct. Other characters are
33 allowed (i.e., 8-bit characters) if your system will support them.
35 To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
36 @@ -101,8 +99,32 @@
37 # define WILDCHAR '?'
38 # define BEG_RANGE '['
39 # define END_RANGE ']'
40 +# define WILDCHR_SINGLE '?'
41 +# define DIRSEP_CHR '/'
42 +# define WILDCHR_MULTI '*'
43 #endif
45 +#ifdef WILD_STOP_AT_DIR
46 + int wild_stop_at_dir = 1; /* default wildcards do not include / in matches */
47 +#else
48 + int wild_stop_at_dir = 0; /* default wildcards do include / in matches */
49 +#endif
53 +/*
54 + * case mapping functions. case_map is used to ignore case in comparisons,
55 + * to_up is used to force upper case even on Unix (for dosify option).
56 + */
57 +#ifdef USE_CASE_MAP
58 +# define case_map(c) upper[(c) & 0xff]
59 +# define to_up(c) upper[(c) & 0xff]
60 +#else
61 +# define case_map(c) (c)
62 +# define to_up(c) ((c) >= 'a' && (c) <= 'z' ? (c)-'a'+'A' : (c))
63 +#endif /* USE_CASE_MAP */
66 #if 0 /* GRR: add this to unzip.h someday... */
67 #if !(defined(MSDOS) && defined(DOSWILD))
68 #ifdef WILD_STOP_AT_DIR
69 @@ -114,8 +136,8 @@ int recmatch OF((ZCONST uch *pattern, ZC
70 int ignore_case __WDLPRO));
71 #endif
72 #endif /* 0 */
73 -static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string,
74 - int ignore_case __WDLPRO));
75 +static int recmatch OF((ZCONST char *, ZCONST char *,
76 + int));
77 static char *isshexp OF((ZCONST char *p));
78 static int namecmp OF((ZCONST char *s1, ZCONST char *s2));
80 @@ -154,192 +176,240 @@ int match(string, pattern, ignore_case _
82 dospattern[j-1] = '\0'; /* nuke the end "." */
84 - j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL);
85 + j = recmatch(dospattern, string, ignore_case);
86 free(dospattern);
87 return j == 1;
88 } else
89 #endif /* MSDOS && DOSWILD */
90 - return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1;
91 + return recmatch(pattern, string, ignore_case) == 1;
94 +#ifdef _MBCS
96 +char *___tmp_ptr;
98 +#endif
100 -static int recmatch(p, s, ic __WDL)
101 - ZCONST uch *p; /* sh pattern to match */
102 - ZCONST uch *s; /* string to which to match it */
103 - int ic; /* true for case insensitivity */
104 - __WDLDEF /* directory sepchar for WildStopAtDir mode, or 0 */
105 +static int recmatch(p, s, cs)
106 +ZCONST char *p; /* sh pattern to match */
107 +ZCONST char *s; /* string to match it to */
108 +int cs; /* flag: force case-sensitive matching */
109 /* Recursively compare the sh pattern p with the string s and return 1 if
110 - * they match, and 0 or 2 if they don't or if there is a syntax error in the
111 - * pattern. This routine recurses on itself no more deeply than the number
112 - * of characters in the pattern. */
113 + they match, and 0 or 2 if they don't or if there is a syntax error in the
114 + pattern. This routine recurses on itself no deeper than the number of
115 + characters in the pattern. */
117 - unsigned int c; /* pattern char or start of range in [-] loop */
118 + int c; /* pattern char or start of range in [-] loop */
119 + /* Get first character, the pattern for new recmatch calls follows */
120 + /* borrowed from Zip's global.c */
121 + int no_wild = 0;
122 + int allow_regex=1;
123 + /* This fix provided by akt@m5.dion.ne.jp for Japanese.
124 + See 21 July 2006 mail.
125 + It only applies when p is pointing to a doublebyte character and
126 + things like / and wildcards are not doublebyte. This probably
127 + should not be needed. */
129 - /* Get first character, the pattern for new recmatch calls follows */
130 - c = *p; INCSTR(p);
131 +#ifdef _MBCS
132 + if (CLEN(p) == 2) {
133 + if (CLEN(s) == 2) {
134 + return (*p == *s && *(p+1) == *(s+1)) ?
135 + recmatch(p + 2, s + 2, cs) : 0;
136 + } else {
137 + return 0;
140 +#endif /* ?_MBCS */
142 - /* If that was the end of the pattern, match if string empty too */
143 - if (c == 0)
144 - return *s == 0;
145 + c = *POSTINCSTR(p);
147 - /* '?' (or '%') matches any character (but not an empty string). */
148 - if (c == WILDCHAR)
149 -#ifdef WILD_STOP_AT_DIR
150 - /* If uO.W_flag is non-zero, it won't match '/' */
151 - return (*s && (!sepc || *s != (uch)sepc))
152 - ? recmatch(p, s + CLEN(s), ic, sepc) : 0;
153 -#else
154 - return *s ? recmatch(p, s + CLEN(s), ic) : 0;
155 -#endif
156 + /* If that was the end of the pattern, match if string empty too */
157 + if (c == 0)
158 + return *s == 0;
160 + /* '?' (or '%' or '#') matches any character (but not an empty string) */
161 + if (c == WILDCHR_SINGLE) {
162 + if (wild_stop_at_dir)
163 + return (*s && *s != DIRSEP_CHR) ? recmatch(p, s + CLEN(s), cs) : 0;
164 + else
165 + return *s ? recmatch(p, s + CLEN(s), cs) : 0;
168 - /* '*' matches any number of characters, including zero */
169 + /* WILDCHR_MULTI ('*') matches any number of characters, including zero */
170 #ifdef AMIGA
171 - if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
172 - c = '*', p++;
173 + if (!no_wild && c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
174 + c = WILDCHR_MULTI, p++;
175 #endif /* AMIGA */
176 - if (c == '*') {
177 -#ifdef WILD_STOP_AT_DIR
178 - if (sepc) {
179 - /* check for single "*" or double "**" */
180 -# ifdef AMIGA
181 - if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
182 - c = '*', p++;
183 - if (c != '*') {
184 -# else /* !AMIGA */
185 - if (*p != '*') {
186 -# endif /* ?AMIGA */
187 - /* single "*": this doesn't match the dirsep character */
188 - for (; *s && *s != (uch)sepc; INCSTR(s))
189 - if ((c = recmatch(p, s, ic, sepc)) != 0)
190 - return (int)c;
191 - /* end of pattern: matched if at end of string, else continue */
192 - if (*p == '\0')
193 - return (*s == 0);
194 - /* continue to match if at sepc in pattern, else give up */
195 - return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc))
196 - ? recmatch(p, s, ic, sepc) : 2;
198 - /* "**": this matches slashes */
199 - ++p; /* move p behind the second '*' */
200 - /* and continue with the non-W_flag code variant */
202 -#endif /* WILD_STOP_AT_DIR */
203 + if (!no_wild && c == WILDCHR_MULTI)
205 + if (wild_stop_at_dir) {
206 + /* Check for an immediately following WILDCHR_MULTI */
207 +# ifdef AMIGA
208 + if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */
209 + c = WILDCHR_MULTI, p++;
210 + if (c != WILDCHR_MULTI) {
211 +# else /* !AMIGA */
212 + if (*p != WILDCHR_MULTI) {
213 +# endif /* ?AMIGA */
214 + /* Single WILDCHR_MULTI ('*'): this doesn't match slashes */
215 + for (; *s && *s != DIRSEP_CHR; INCSTR(s))
216 + if ((c = recmatch(p, s, cs)) != 0)
217 + return c;
218 + /* end of pattern: matched if at end of string, else continue */
219 if (*p == 0)
220 - return 1;
221 - if (isshexp((ZCONST char *)p) == NULL) {
222 - /* Optimization for rest of pattern being a literal string:
223 - * If there are no other shell expression chars in the rest
224 - * of the pattern behind the multi-char wildcard, then just
225 - * compare the literal string tail.
226 - */
227 - ZCONST uch *srest;
229 - srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p));
230 - if (srest - s < 0)
231 - /* remaining literal string from pattern is longer than rest
232 - * of test string, there can't be a match
233 - */
234 - return 0;
235 - else
236 - /* compare the remaining literal pattern string with the last
237 - * bytes of the test string to check for a match
238 - */
239 + return (*s == 0);
240 + /* continue to match if at DIRSEP_CHR in pattern, else give up */
241 + return (*p == DIRSEP_CHR || (*p == '\\' && p[1] == DIRSEP_CHR))
242 + ? recmatch(p, s, cs) : 2;
244 + /* Two consecutive WILDCHR_MULTI ("**"): this matches DIRSEP_CHR ('/') */
245 + p++; /* move p past the second WILDCHR_MULTI */
246 + /* continue with the normal non-WILD_STOP_AT_DIR code */
247 + } /* wild_stop_at_dir */
249 + /* Not wild_stop_at_dir */
250 + if (*p == 0)
251 + return 1;
252 + if (!isshexp((char *)p))
254 + /* optimization for rest of pattern being a literal string */
256 + /* optimization to handle patterns like *.txt */
257 + /* if the first char in the pattern is '*' and there */
258 + /* are no other shell expression chars, i.e. a literal string */
259 + /* then just compare the literal string at the end */
261 + ZCONST char *srest;
263 + srest = s + (strlen(s) - strlen(p));
264 + if (srest - s < 0)
265 + /* remaining literal string from pattern is longer than rest of
266 + test string, there can't be a match
267 + */
268 + return 0;
269 + else
270 + /* compare the remaining literal pattern string with the last bytes
271 + of the test string to check for a match */
272 #ifdef _MBCS
274 - ZCONST uch *q = s;
276 + ZCONST char *q = s;
278 - /* MBCS-aware code must not scan backwards into a string from
279 - * the end.
280 - * So, we have to move forward by character from our well-known
281 - * character position s in the test string until we have
282 - * advanced to the srest position.
283 - */
284 - while (q < srest)
285 - INCSTR(q);
286 - /* In case the byte *srest is a trailing byte of a multibyte
287 - * character in the test string s, we have actually advanced
288 - * past the position (srest).
289 - * For this case, the match has failed!
290 - */
291 - if (q != srest)
292 - return 0;
293 - return ((ic
294 - ? namecmp((ZCONST char *)p, (ZCONST char *)q)
295 - : strcmp((ZCONST char *)p, (ZCONST char *)q)
296 - ) == 0);
298 + /* MBCS-aware code must not scan backwards into a string from
299 + * the end.
300 + * So, we have to move forward by character from our well-known
301 + * character position s in the test string until we have advanced
302 + * to the srest position.
303 + */
304 + while (q < srest)
305 + INCSTR(q);
306 + /* In case the byte *srest is a trailing byte of a multibyte
307 + * character, we have actually advanced past the position (srest).
308 + * For this case, the match has failed!
309 + */
310 + if (q != srest)
311 + return 0;
312 + return ((cs ? strcmp(p, q) : namecmp(p, q)) == 0);
314 #else /* !_MBCS */
315 - return ((ic
316 - ? namecmp((ZCONST char *)p, (ZCONST char *)srest)
317 - : strcmp((ZCONST char *)p, (ZCONST char *)srest)
318 - ) == 0);
319 + return ((cs ? strcmp(p, srest) : namecmp(p, srest)) == 0);
320 #endif /* ?_MBCS */
321 - } else {
322 - /* pattern contains more wildcards, continue with recursion... */
323 - for (; *s; INCSTR(s))
324 - if ((c = recmatch(p, s, ic __WDL)) != 0)
325 - return (int)c;
326 - return 2; /* 2 means give up--match will return false */
330 - /* Parse and process the list of characters and ranges in brackets */
331 - if (c == BEG_RANGE) {
332 - int e; /* flag true if next char to be taken literally */
333 - ZCONST uch *q; /* pointer to end of [-] group */
334 - int r; /* flag true to match anything but the range */
336 - if (*s == 0) /* need a character to match */
337 - return 0;
338 - p += (r = (*p == '!' || *p == '^')); /* see if reverse */
339 - for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */
340 - if (e)
341 - e = 0;
342 - else
343 - if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */
344 - e = 1;
345 - else if (*q == END_RANGE)
346 - break;
347 - if (*q != END_RANGE) /* nothing matches if bad syntax */
348 - return 0;
349 - for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) {
350 - /* go through the list */
351 - if (!e && *p == '\\') /* set escape flag if \ */
352 - e = 1;
353 - else if (!e && *p == '-') /* set start of range if - */
354 - c = *(p-1);
355 - else {
356 - unsigned int cc = Case(*s);
358 - if (*(p+1) != '-')
359 - for (c = c ? c : *p; c <= *p; c++) /* compare range */
360 - if ((unsigned)Case(c) == cc) /* typecast for MSC bug */
361 - return r ? 0 : recmatch(q + 1, s + 1, ic __WDL);
362 - c = e = 0; /* clear range, escape flags */
365 - return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0;
366 - /* bracket match failed */
367 + else
369 + /* pattern contains more wildcards, continue with recursion... */
370 + for (; *s; INCSTR(s))
371 + if ((c = recmatch(p, s, cs)) != 0)
372 + return c;
373 + return 2; /* 2 means give up--shmatch will return false */
377 - /* if escape ('\\'), just compare next character */
378 - if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */
379 - return 0;
380 +#ifndef VMS /* No bracket matching in VMS */
381 + /* Parse and process the list of characters and ranges in brackets */
382 + if (!no_wild && allow_regex && c == '[')
384 + int e; /* flag true if next char to be taken literally */
385 + ZCONST char *q; /* pointer to end of [-] group */
386 + int r; /* flag true to match anything but the range */
388 + if (*s == 0) /* need a character to match */
389 + return 0;
390 + p += (r = (*p == '!' || *p == '^')); /* see if reverse */
391 + for (q = p, e = 0; *q; q++) /* find closing bracket */
392 + if (e)
393 + e = 0;
394 + else
395 + if (*q == '\\')
396 + e = 1;
397 + else if (*q == ']')
398 + break;
399 + if (*q != ']') /* nothing matches if bad syntax */
400 + return 0;
401 + for (c = 0, e = *p == '-'; p < q; p++) /* go through the list */
403 + if (e == 0 && *p == '\\') /* set escape flag if \ */
404 + e = 1;
405 + else if (e == 0 && *p == '-') /* set start of range if - */
406 + c = *(p-1);
407 + else
409 + uch cc = (cs ? (uch)*s : case_map((uch)*s));
410 + uch uc = (uch) c;
411 + if (*(p+1) != '-')
412 + for (uc = uc ? uc : (uch)*p; uc <= (uch)*p; uc++)
413 + /* compare range */
414 + if ((cs ? uc : case_map(uc)) == cc)
415 + return r ? 0 : recmatch(q + CLEN(q), s + CLEN(s), cs);
416 + c = e = 0; /* clear range, escape flags */
419 + return r ? recmatch(q + CLEN(q), s + CLEN(s), cs) : 0;
420 + /* bracket match failed */
422 +#endif /* !VMS */
424 - /* just a character--compare it */
425 -#ifdef QDOS
426 - return QMatch(Case((uch)c), Case(*s)) ?
427 - recmatch(p, s + CLEN(s), ic __WDL) : 0;
428 -#else
429 - return Case((uch)c) == Case(*s) ?
430 - recmatch(p, s + CLEN(s), ic __WDL) : 0;
431 -#endif
432 + /* If escape ('\'), just compare next character */
433 + if (!no_wild && c == '\\')
434 + if ((c = *p++) == '\0') /* if \ at end, then syntax error */
435 + return 0;
437 +#ifdef VMS
438 + /* 2005-11-06 SMS.
439 + Handle "..." wildcard in p with "." or "]" in s.
440 + */
441 + if ((c == '.') && (*p == '.') && (*(p+ CLEN( p)) == '.') &&
442 + ((*s == '.') || (*s == ']')))
444 + /* Match "...]" with "]". Continue after "]" in both. */
445 + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
446 + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
448 + /* Else, look for a reduced match in s, until "]" in or end of s. */
449 + for (; *s && (*s != ']'); INCSTR(s))
450 + if (*s == '.')
451 + /* If reduced match, then continue after "..." in p, "." in s. */
452 + if ((c = recmatch( (p+ CLEN( p)), s, cs)) != 0)
453 + return (int)c;
455 + /* Match "...]" with "]". Continue after "]" in both. */
456 + if ((*(p+ 2* CLEN( p)) == ']') && (*s == ']'))
457 + return recmatch( (p+ 3* CLEN( p)), (s+ CLEN( s)), cs);
459 + /* No reduced match. Quit. */
460 + return 2;
463 +#endif /* def VMS */
465 + /* Just a character--compare it */
466 + return (cs ? c == *s : case_map((uch)c) == case_map((uch)*s)) ?
467 + recmatch(p, s + CLEN(s), cs) : 0;
470 -} /* end function recmatch() */
474 +/*************************************************************************************************/
475 static char *isshexp(p)
476 ZCONST char *p;
477 /* If p is a sh expression, a pointer to the first special character is