port to arm64
[lnanohttp.git] / ulinux / utils / ascii / match / match.c
bloba62515c8078017c2963deeacd95106248ffca00c
1 #ifndef ULINUX_UTILS_ASCII_MATCH_MATCH_C
2 #define ULINUX_UTILS_ASCII_MATCH_MATCH_C
3 /*
4 * This is a derived work based on GNU glibc implementation on 20130409.
5 * Switch to GNU Lesser GPLv3 protection.
6 * author:Sylvain BERTRAND
7 */
8 /******************************************************************************/
9 /*
10 * NOTE:do *NOT* even try to read that code without ksh extended glob
11 * specifications, and keeping an eye on the abbreviations below
13 * upstream code is hard to grasp
15 /******************************************************************************/
16 #include <stdbool.h>
17 #include <ulinux/compiler_types.h>
18 #include <ulinux/types.h>
19 #include <ulinux/sysc.h>
20 #include <ulinux/error.h>
22 #include <ulinux/utils/mem.h>
23 #include <ulinux/utils/ascii/ascii.h>
24 #include <ulinux/utils/ascii/string/string.h>
25 #include <ulinux/utils/ascii/match/match.h>
26 #include <ulinux/mmap.h>
28 /*----------------------------------------------------------------------------*/
29 /* "One Compilation Unit" support */
30 #ifdef ULINUX_UTILS_EXTERNAL
31 #define ULINUX_EXPORT
32 #else
33 #define ULINUX_EXPORT static
34 #endif
35 /*----------------------------------------------------------------------------*/
37 /*----------------------------------------------------------------------------*/
38 /* ulinux namespace */
39 #define loop for(;;)
40 #define ANONYMOUS ULINUX_MAP_ANONYMOUS
41 #define CASEFOLD ULINUX_MATCH_CASEFOLD
42 #define ERR ULINUX_MATCH_ERR
43 #define ERR_NOMEM ULINUX_MATCH_ERR_NOMEM
44 #define EXTMATCH ULINUX_MATCH_EXTMATCH
45 #define is_alnum ulinux_is_alnum
46 #define is_alpha ulinux_is_alpha
47 #define is_blank ulinux_is_blank
48 #define is_cntrl ulinux_is_cntrl
49 #define is_digit ulinux_is_digit
50 #define is_graph ulinux_is_graph
51 #define is_lower ulinux_is_lower
52 #define is_print ulinux_is_print
53 #define is_punct ulinux_is_punct
54 #define is_space ulinux_is_space
55 #define is_upper ulinux_is_upper
56 #define is_xdigit ulinux_is_xdigit
57 #define ISERR ULINUX_ISERR
58 #define sl ulinux_sl
59 #define MATCH ULINUX_MATCH_MATCH
60 #define memchr ulinux_memchr
61 #define memcpy ulinux_memcpy
62 #define mmap(a,b,c) ulinux_sysc(mmap,6,0,a,b,c,-1,0)
63 #define mremap(a,b,c) ulinux_sysc(mremap,4,a,b,c,0)
64 #define munmap(a,b) ulinux_sysc(munmap,2,a,b)
65 #define NOMATCH ULINUX_MATCH_NOMATCH
66 #define PRIVATE ULINUX_MAP_PRIVATE
67 #define RD ULINUX_PROT_READ
68 #define s64 ulinux_s64
69 #define s8 ulinux_s8
70 #define u16 ulinux_u16
71 #define u64 ulinux_u64
72 #define u8 ulinux_u8
73 #define strcat ulinux_strcat
74 #define strcmp ulinux_strcmp
75 #define strlen ulinux_strlen
76 #define WR ULINUX_PROT_WRITE
77 /*----------------------------------------------------------------------------*/
78 /* local symbols and types */
79 #define match_params ulinux_utils_ascii_match_match_params
80 #define fold ulinux_utils_ascii_match_fold
81 #define endp ulinux_utils_ascii_match_endp
82 #define star_handler ulinux_utils_ascii_match_star_handler
83 #define bracket_skip ulinux_utils_ascii_match_bracket_skip
84 #define bracket_matched ulinux_utils_ascii_match_bracket_matched
85 #define bracket_normal ulinux_utils_ascii_match_bracket_normal
86 #define bracket_char_class_get ulinux_utils_ascii_match_bracket_char_class_get
87 #define bracket_escape ulinux_utils_ascii_match_bracket_escape
88 #define bracket_class ulinux_utils_ascii_match_bracket_class
89 #define bracket_handler ulinux_utils_ascii_match_bracket_handler
90 #define question_mark_handler ulinux_utils_ascii_match_question_mark_handler
91 #define misc_handler ulinux_utils_ascii_match_misc_handler
92 #define escape_handler ulinux_utils_ascii_match_escape_handler
93 #define special_char ulinux_utils_ascii_match_special_char
94 #define match ulinux_utils_ascii_match_match
95 #define p ulinux_utils_ascii_match_p
96 #define ep ulinux_utils_ascii_match_ep
97 #define ep_p_new ulinux_utils_ascii_match_ep_p_new
98 #define ep_p_get ulinux_utils_ascii_match_ep_p_get
99 #define ep_p_next ulinux_utils_ascii_match_ep_p_next
100 #define ep_split ulinux_utils_ascii_match_ep_split
101 #define match_at_least_once ulinux_utils_ascii_match_match_at_least_once
102 #define match_only_once ulinux_utils_ascii_match_match_only_once
103 #define match_none ulinux_utils_ascii_match_match_none
104 #define extended_match_try ulinux_utils_ascii_match_extended_match_try
105 /*----------------------------------------------------------------------------*/
107 * ABBREVIATIONS
108 * addr address
109 * c character
110 * char(s) character(s)
111 * cur current
112 * ep(s) extended pattern(s)
113 * err error
114 * nomem no memory
115 * of offset
116 * p(s) pattern(s)
117 * params parameters
118 * pc pattern char
119 * r return value
120 * rd read
121 * rem remainder
122 * sc string char
123 * str string
124 * sz size
125 * wr write
127 /*----------------------------------------------------------------------------*/
128 #define CHAR_CLASS_MAX (sizeof("xdigit") - 1)
129 #define STREQ(s1,s2) (strcmp(s1, s2) == 0)
130 /*----------------------------------------------------------------------------*/
131 /* return values */
132 /*#define ULINUX_MATCH_ERR_NOMEM -3*/
133 /*#define ULINUX_MATCH_ERR -2*/
134 #define NO_VALID_P -1
135 /*#define ULINUX_MATCH_MATCH 0*/
136 #define OK 0
137 /*#define ULINUX_MATCH_NOMATCH 1*/
138 /*----------------------------------------------------------------------------*/
140 * In the context a match call, it's used to store the location of the
141 * "ending" * following, at the same depth, a "starting" *.
142 * The matcher is locked in the star_handler between 2 stars at the
143 * same depth.
145 struct match_params {
146 void *p;
147 void *str;
150 static u8 fold(u8 c, u8 flgs)
152 if(!(flgs & CASEFOLD))
153 return c;
154 return ulinux_2lower(c);
158 * Recursively skip a p. Return a pointer right after it, or a pointer on the
159 *start of the p to skip if the 0 terminating char is encountered before the
160 * end of the p.
162 static u8 *end_p(u8 *p)
164 u8 *pc = p;
165 loop {
166 if (*++pc == '\0')
167 return p; /* this is an invalid p */
168 else if (*pc == '[') {/* handle brackets special */
170 * Skip the not sign. We have to recognize it because of
171 * a possibly following ']'.
173 if ((*++pc == '!') || (*pc == '^'))
174 ++pc;
175 if(*pc == ']')
176 ++pc; /* a leading ']' is recognized as such */
177 /* skip over all chars of the list */
178 loop {
179 if (*pc == ']')
180 break;
181 if(*pc++ == '\0')
182 return p; /* this is no valid p */
184 } else if (((*pc == '?') || (*pc == '*') || (*pc == '+')
185 || (*pc == '@') || (*pc == '!')) && (pc[1] == '(')) {
186 pc = end_p(pc + 1);
187 } else if (*pc == ')') {
188 break;
191 return pc + 1;
194 static s8 extended_match_try(u8 type, u8 *p, u8 *str, u8 *str_end, u8 flgs);
195 static s8 match(u8 *p, u8 *str, u8 *str_end, u8 flgs,
196 struct match_params *end_star);
198 /*----------------------------------------------------------------------------*/
199 /* return value for the following handlers */
200 #define BIT(x) (1 << x)
201 #define R_GET(r) ((s8)(r & 0xff))
202 #define R_SET(r) ((u16)(r & 0xff))
203 #define RETURN_R BIT(9)
204 #define MATCH_NORMAL BIT(10)
205 #define NEXT_STR_CHAR BIT(11)
206 #define NEXT_P_CHAR BIT(12)
207 #define TEST_STR_END_REACHED BIT(13)
208 /*----------------------------------------------------------------------------*/
210 static u16 star_handler(u8 *c, u8 **pc, u8 **sc, u8 *str_end, u8 flgs,
211 struct match_params *end_star)
213 struct match_params local_end_star;
215 if ((flgs & EXTMATCH) && (**pc == '(')) {
216 s8 r = extended_match_try(*c, *pc, *sc, str_end, flgs);
217 if (r != NO_VALID_P)
218 return RETURN_R | R_SET(r);
219 } else if (end_star) {
220 /* this star will exit the current star context matching */
221 end_star->p = *pc - 1;
222 end_star->str = *sc;
223 return RETURN_R | R_SET(MATCH);
226 loop {
227 *c = *(*pc)++;
230 * do "absorb" in the current * the following sequence of
231 * ?,*,?() and *()
233 if ((*c != '?') && (*c != '*'))
234 break;
237 * ?() and *() are "zero or one|more" occurences of matched
238 * patterns, then absorbed them in the *
240 if ((**pc == '(') && (flgs & EXTMATCH)) {
241 u8 *endp = end_p(*pc);
242 if (endp != *pc) { /* This is a p. Skip over it */
243 *pc = endp;
244 continue;
248 if (*c == '?') { /* match one any char from the string */
249 /* a ? needs to match one char */
250 if (*sc == str_end) {
251 /* there isn't another char; no match */
252 return RETURN_R | R_SET(NOMATCH);
253 } else {
255 * one char of the str is consumed in matching
256 * this ? wildcard, so *??? won't match if there
257 * are less than three chars
259 ++(*sc);
264 /* the wildcard(s) is/are the last element of the p flag is set */
265 if (*c == '\0')
266 return RETURN_R | R_SET(MATCH);
268 local_end_star.p = 0;
270 if ((*c == '[') || ((flgs & EXTMATCH) && ((*c == '@') || (*c == '+')
271 || (*c=='!')) && (**pc == '('))) {
272 /* the * is followed by a bracket or an "not absorbed" ep */
273 (*pc)--;
275 * See explanation of star context matching right below. Instead
276 * of a char like below, it's an ep.
278 loop {
279 s8 r;
280 if (*sc == str_end)
281 break;
282 r = match(*pc, *sc, str_end, flgs, &local_end_star);
283 if (r == MATCH) {
284 if (local_end_star.p == 0)
285 return RETURN_R | R_SET(MATCH);
286 break;
288 ++(*sc);
290 } else { /* the * is followed by a "normal" char */
291 if (*c == '\\')
292 *c=**pc;
293 *c = fold(*c, flgs);
294 (*pc)--;
296 * scan the str till we find a char which is the same than the
297 * first p char right after the *, then start to match in star
298 * context, namely till we reach the end, or we find another
299 * will will end the matching in that context
301 loop {
302 if (*sc == str_end)
303 break;
304 if(fold(**sc, flgs) == *c) {
305 s8 r = match(*pc, *sc, str_end, flgs,
306 &local_end_star);
307 if (r == MATCH) {
308 if (local_end_star.p == 0)
309 return RETURN_R | R_SET(MATCH);
310 break;
313 ++(*sc);
315 if (local_end_star.p != 0) {
316 /* we are finish to match the str in star context */
317 *pc = local_end_star.p;
318 *sc = local_end_star.str;
319 return NEXT_P_CHAR;
322 /* if we come here no match is possible with the wildcard */
323 return RETURN_R | R_SET(NOMATCH);
326 static u16 bracket_skip(u8 *c, u8 **pc)
328 loop {
329 ignore_next:
330 *c = *(*pc)++;
332 if (*c == '\0') {
333 /* [... (unterminated) loses */
334 return RETURN_R | R_SET(NOMATCH);
337 if (*c == '\\') {
338 if (**pc == '\0')
339 return RETURN_R | R_SET(NOMATCH);
340 /* XXX 1003.2d11 is unclear if this is right */
341 ++(*pc);
342 } else if ((*c == '[') && (**pc == ':')) {
343 u8 char_class_n = 0;
344 u8 *char_class_start_pc = *pc;
346 loop {
347 *c = *++(*pc);
348 if (++char_class_n == (CHAR_CLASS_MAX + 1))
349 return RETURN_R | R_SET(NOMATCH);
351 if ((**pc == ':') && ((*pc)[1] == ']'))
352 break;
354 if ((*c < 'a') || (*c >= 'z')) {
355 /* is_lower(c) && is_weaker_alpha(c) */
356 *pc = char_class_start_pc;
357 goto ignore_next; /* XXX: goto here */
360 (*pc) += 2;
361 *c = *(*pc)++;
363 if (*c == ']')
364 break;
366 return OK;
369 static u16 bracket_matched(u8 *c, u8 **pc, u8 not)
371 u16 r = bracket_skip(c, pc);
372 if (r != OK)
373 return r;
375 if (not)
376 return RETURN_R | R_SET(NOMATCH);
377 return NEXT_STR_CHAR;
380 /* input:*c may not be folded */
381 static u16 bracket_normal(u8 *c, u8 **pc, u8 not, u8 sc_folded)
383 u8 c_old;
384 u8 is_range = ((**pc=='-') && ((*pc)[1] != '\0') && ((*pc)[1] != ']'));
386 if (!is_range && (*c == sc_folded))
387 return bracket_matched(c, pc, not);
389 c_old = *c;
390 *c = *(*pc)++;
392 if ((*c == '-') && (**pc != ']')) {
393 u8 c_end = *(*pc)++;
395 if (c_end == '\\')
396 c_end = *(*pc)++;
397 if (c_end == '\0')
398 return RETURN_R | R_SET(NOMATCH);
400 /* it is a range, ascii collation */
401 if ((c_old <= sc_folded) && (sc_folded <= c_end))
402 return bracket_matched(c, pc, not);
403 *c = *(*pc)++;
405 return OK;
408 static u16 bracket_char_class_get(u8 *c, u8 **pc, u8 not, u8 sc_folded,
409 u8 *char_class)
411 u8 char_class_n = 0;
412 u8 *char_class_start_pc = *pc;
414 loop {
415 if (char_class_n == (CHAR_CLASS_MAX + 1))
416 return RETURN_R | R_SET(NOMATCH);
418 *c = *++(*pc);
419 if ((*c == ':') && ((*pc)[1] == ']')) {
420 (*pc)+=2;
421 break;
424 if ((*c < 'a') || (*c >= 'z')) {
425 /* is_lower(c) && is_weaker_alpha(c) */
426 u16 r;
428 * This cannot possibly be a char class name. Rewind and
429 * try to match it as a normal range.
431 *pc = char_class_start_pc;
432 *c = '[';
433 r = bracket_normal(c, pc, not, sc_folded);
434 if (r != OK)
435 return r;
437 char_class[char_class_n++] = *c;
439 char_class[char_class_n] = '\0';
440 return OK;
443 static u16 bracket_escape(u8 *c, u8 **pc, u8 flgs, u8 not, u8 sc_folded)
445 u16 r;
447 if (**pc == '\0')
448 return RETURN_R | R_SET(NOMATCH);
449 *c = fold(**pc, flgs);
450 ++(*pc);
451 r = bracket_normal(c, pc, not, sc_folded);
452 if (r != OK)
453 return r;
454 return OK;
457 /* input: *c=='[' **pc==':' */
458 static u16 bracket_class(u8 *c, u8 **pc, u8 **sc, u8 not, u8 sc_folded)
460 /*don't forget the 0 terminating char*/
461 u8 char_class[CHAR_CLASS_MAX + 1];
462 u16 r = bracket_char_class_get(c, pc, not, sc_folded, &char_class[0]);
463 if (r != OK)
464 return r;
466 if ( (STREQ(char_class,"alnum") && is_alnum(**sc))
467 || (STREQ(char_class,"alpha") && is_alpha(**sc))
468 || (STREQ(char_class,"blank") && is_blank(**sc))
469 || (STREQ(char_class,"cntrl") && is_cntrl(**sc))
470 || (STREQ(char_class,"digit") && is_digit(**sc))
471 || (STREQ(char_class,"graph") && is_graph(**sc))
472 || (STREQ(char_class,"lower") && is_lower(**sc))
473 || (STREQ(char_class,"print") && is_print(**sc))
474 || (STREQ(char_class,"punct") && is_punct(**sc))
475 || (STREQ(char_class,"space") && is_space(**sc))
476 || (STREQ(char_class,"upper") && is_upper(**sc))
477 || (STREQ(char_class,"xdigit") && is_xdigit(**sc)))
478 return bracket_matched(c, pc, not);
479 *c = *(*pc)++;
480 return OK;
483 static u16 bracket_handler(u8 *c, u8 **pc, u8 **sc, void *str_end, u8 flgs)
485 u8 not;
486 u8 sc_folded;
488 u8 *pc_init = *pc;
489 if (*sc == str_end)
490 return RETURN_R | R_SET(NOMATCH);
492 not =((**pc == '!') || (**pc == '^'));
493 if (not)
494 ++(*pc);
496 sc_folded = fold(**sc, flgs);
498 *c = *(*pc)++;
499 loop {
500 if (*c == '\\') {
501 u16 r = bracket_escape(c, pc, flgs, not, sc_folded);
502 if (r != OK)
503 return r;
504 } else if ((*c == '[') && (**pc == ':')) {
505 u16 r = bracket_class(c, pc, sc, not, sc_folded);
506 if (r != OK)
507 return r;
508 } else if (c == '\0') {
510 * [ unterminated, rewind and tell above to match normal
512 *pc = pc_init;
513 *c = '[';
514 return MATCH_NORMAL;
515 } else {
516 u16 r;
517 *c = fold(*c, flgs);
518 r = bracket_normal(c, pc, not, sc_folded);
519 if (r != 0)
520 return r;
522 if (*c == ']')
523 break;
525 if (!not)
526 return RETURN_R | R_SET(NOMATCH);
527 return NEXT_STR_CHAR;
530 static u16 question_mark_handler(u8 c, u8 *pc, u8 *sc, void *str_end, u8 flgs)
532 if ((flgs & EXTMATCH) && (*pc=='(')) {
533 s8 r = extended_match_try(c, pc, sc, str_end, flgs);
534 if (r != NO_VALID_P)
535 return RETURN_R | R_SET(r);
537 return TEST_STR_END_REACHED;
540 static u16 misc_handler(u8 c, u8 *pc, u8 *sc, void *str_end, u8 flgs)
542 if ((flgs & EXTMATCH) && (*pc=='(')) {
543 s8 r = extended_match_try(c, pc, sc, str_end, flgs);
544 if (r != NO_VALID_P)
545 return RETURN_R | R_SET(r);
547 return MATCH_NORMAL;
550 static u16 escape_handler(u8 *c, u8 **pc, u8 flgs)
552 u16 r = MATCH_NORMAL;
553 *c = *(*pc)++;
554 if (c=='\0')
555 return r | RETURN_R | R_SET(NOMATCH); /*railing \ loses*/
556 *c = fold(*c, flgs);
557 return r;
560 static u16 special_char(u8 *c, u8 **pc, u8 **sc, void *str_end, u8 flgs,
561 struct match_params *end_star)
563 u16 r = RETURN_R | R_SET(ERR);
564 switch (*c) {
565 case '[':
566 r = bracket_handler(c, pc, sc, str_end, flgs);
567 break;
568 case '*':
569 r = star_handler(c, pc, sc, str_end, flgs, end_star);
570 break;
571 case '?':
572 r = question_mark_handler(*c, *pc, *sc, str_end, flgs);
573 break;
574 case '+':
575 case '@':
576 case '!':
577 r = misc_handler(*c, *pc, *sc, str_end, flgs);
578 break;
579 case '\\':
580 r = escape_handler(c, pc, flgs);
581 break;
583 return r;
586 /* str_end points right after the last char of str, the '\0' char */
587 static s8 match(u8 *p, u8 *str, u8 *str_end, u8 flgs,
588 struct match_params *end_star)
590 u8 c;
592 u8 *pc = p;
593 u8 *sc = str;
595 loop {
596 u16 r;
598 c = *pc++;
599 if (c=='\0')
600 break;
602 c = fold(c, flgs);
604 switch (c) {
605 case '[':
606 case '*':
607 case '?':
608 case '+':
609 case '@':
610 case '!':
611 case '\\':
612 r = special_char(&c, &pc, &sc, str_end, flgs, end_star);
613 break;
614 default:
615 r = MATCH_NORMAL;
618 if (!(r & NEXT_STR_CHAR)) {
619 if (r & RETURN_R)
620 return R_GET(r);
621 if (r & NEXT_P_CHAR)
622 continue;
623 if ((r & MATCH_NORMAL) || (r & TEST_STR_END_REACHED))
624 if (sc == str_end)
625 return NOMATCH;
626 if(r & MATCH_NORMAL)
627 if (c != fold(*sc, flgs))
628 return NOMATCH;
630 ++sc;
633 if (sc == str_end)
634 return MATCH;
635 return NOMATCH;
638 /*============================================================================*/
639 /* ep */
640 struct p {
641 u64 sz; /* not accounting the 0 terminating char */
642 u8 str[];
645 struct ep {
646 u64 full_p_sz; /* the p sz from which the ep belongs to */
647 struct p *ps;
648 s64 last; /* offset */
649 u64 sz;
650 u8 *p_rem; /* remainder of the p (right and side of the ep) */
651 u8 type;
655 * based on the ep type, we may need to book enough room to put a worst case
656 * scenario of a concatenated sub-p with the ep p rem
658 static s8 ep_p_new(struct ep *ep,u8 *p_str,u64 p_sz)
661 * We will do some p concatenation for the following p types. Then book
662 * room for the worst case scenario.
664 u64 map_sz;
665 sl addr;
666 struct p *p;
668 u64 str_sz = (ep->type == '?') || (ep->type == '@') ? ep->full_p_sz
669 : p_sz;
671 if (!ep->ps) {/* first allocation */
672 /* count the 0 terminating char */
673 map_sz = sizeof(*ep->ps) + str_sz + 1;
674 addr = mmap(map_sz, RD | WR,PRIVATE | ANONYMOUS);
675 if (!addr || ISERR(addr))
676 return ERR_NOMEM;
678 ep->last=0;
679 } else {
680 struct p *p_last;
681 /* count the 0 terminating char */
682 u64 new_p_sz = sizeof(*ep->ps) + str_sz + 1;
683 map_sz = ep->sz + new_p_sz;
684 addr = mremap(ep->ps, ep->sz, map_sz);
685 if (!addr || ISERR(addr))
686 return ERR_NOMEM; /* ep is unmapped elsewhere */
688 p_last=(struct p*)((u8*)(ep->ps) + ep->last);
689 /* count the 0 terminating char */
690 ep->last += sizeof(*p_last) + p_last->sz + 1;
692 ep->ps = (struct p*)addr;
693 ep->sz = map_sz;
694 p = (struct p*)((u8*)(ep->ps) + ep->last);
695 p->sz = str_sz;
696 memcpy(&p->str[0], p_str, p_sz); /* copy only p_sz chars */
697 p->str[p_sz] = 0; /* set the 0 terminating char */
698 return OK;
701 static struct p *ep_p_get(struct ep *ep, s64 of)
703 return (struct p*)((u8*)(ep->ps) + of);
706 #define NO_MORE_P -1
707 static s64 ep_p_next(struct ep *ep, s64 p_of)
709 struct p *p = (struct p*)((u8*)(ep->ps) + p_of);
710 if (ep->sz == p_of + sizeof(*p) + p->sz + 1)
711 return NO_MORE_P;
712 return p_of + sizeof(*p) + p->sz + 1;
714 /*ep end*/
715 /*============================================================================*/
718 * This function splits the ep in sub-ps which use '|' char as a separator. Deal
719 * with nested eps. Sub-ps are stored (copied) in a list (the ep struct)
720 * input parameters:
721 * - p to point on the opening ( of the ep
722 * - ep is where the sub-patterns will be stored (copied)
723 * output parameters:
724 * - p will point right after the closing ) fo the ep
725 * - ep will contain copies of all sub-ps of the ep
726 * if successful ep will contain at least one p
728 static s8 ep_split(u8 *p, struct ep *ep)
730 void *sub_p_start;
731 s64 depth; /* depth, if we have nested eps */
733 s8 r = OK;
734 u8 *pc = p;
736 ++pc; /* skip the opening ( */
737 sub_p_start=pc;
738 depth = 0;
740 loop {
741 if (depth < 0)
742 break;
744 if (*pc == '\0') {
745 r = NO_VALID_P;
746 goto out;
747 }else if (*pc == '[') { /* handle brackets special */
749 * Skip the not sign. We have to recognize it because of
750 * a possibly following ']'.
752 ++pc;
753 if ((*pc == '!') || (*pc == '^'))
754 ++pc;
756 if (*pc == ']')
757 ++pc; /* a leading ']' is recognized as such */
759 loop { /* skip over all chars of the list */
760 if (*pc == ']')
761 break;
762 if (*pc == '\0') {
763 r = NO_VALID_P;
764 goto out;
766 ++pc;
768 } else if (((*pc == '?') || (*pc == '*') || (*pc == '+')
769 || (*pc == '@') || (*pc == '!')) && (pc[1] == '(')) {
770 ++depth; /* remember the nesting depth of nested eps */
771 } else if (*pc==')') {
772 /* this means we found the end of the ep */
773 if (depth ==0 ) {
774 r = ep_p_new(ep, sub_p_start,
775 pc - (u8*)sub_p_start);
776 if (r != OK)
777 goto out;
779 depth--;
780 } else if(*pc == '|') {
781 if (depth == 0) {
782 r = ep_p_new(ep, sub_p_start,
783 pc - (u8*)sub_p_start);
784 if (r != OK)
785 goto out;
786 sub_p_start =pc + 1;
789 ++pc;
791 /* store a pointer on the rem of the p, right after the closing ) */
792 ep->p_rem = pc;
793 out:
794 return r;
797 static s8 match_at_least_once(struct ep *ep, u8 *p, u8 *str, u8 *str_end,
798 u8 flgs)
800 s64 cur_p_of = 0;
801 loop {
802 struct p *cur_p = ep_p_get(ep, cur_p_of);
803 u8 *sc = str;
805 loop {
806 s8 r;
808 if (sc > (u8*)str_end)
809 break;
811 /* first match the prefix with the cur p */
812 r = match(cur_p->str, str, sc, flgs, 0);
813 if (r == MATCH) {
815 * This was successful. Now match the str rem
816 * with the p rem.
818 r = match(ep->p_rem, sc, str_end, flgs, 0);
819 if (r == NOMATCH) {
821 * Unable to match the p rem with the
822 * str rem, then try to match again the
823 * ep by rewinding the p from the start.
824 * We can because we are to match the ep
825 * more than once
827 if (sc != str) {
828 r = match((u8*)p - 1, sc,
829 str_end, flgs, 0);
830 if (r == MATCH)
831 return MATCH;
833 } else if (r == MATCH) {
834 return MATCH;
835 } else
836 return r; /* oops! */
838 ++sc;
840 cur_p_of = ep_p_next(ep, cur_p_of);
841 if (cur_p_of == NO_MORE_P)
842 break;
844 return NOMATCH; /* none of the ps lead to a match */
847 static s8 match_only_once(struct ep *ep, u8 *str, u8 *str_end, u8 flgs)
849 s64 cur_p_of = 0; /* offset */
850 loop {
851 struct p *cur_p = ep_p_get(ep, cur_p_of);
853 * I cannot believe it but `strcat' is actually acceptable here.
854 * Match the entire str with the prefix from the ep and the
855 * rem of the p following the ep. We made room in the p str
856 * buffer in order to store the concatened p.
858 s8 r = match(strcat(cur_p->str, ep->p_rem), str, str_end, flgs,
860 if (r == MATCH)
861 return MATCH;
863 cur_p_of = ep_p_next(ep, cur_p_of);
864 if (cur_p_of == NO_MORE_P)
865 break;
867 return NOMATCH; /* none of the ps lead to a match */
870 static s8 match_none(struct ep *ep, u8 *str, u8 *str_end, u8 flgs)
872 u8 *sc = str;
873 loop {
874 s64 cur_p_of;
876 if (sc > (u8*)str_end)
877 break;
879 cur_p_of = 0;
880 loop {
881 s8 r;
883 struct p *cur_p = ep_p_get(ep, cur_p_of);
885 r = match(cur_p->str, str, sc, flgs, 0);
886 if (r == MATCH)
887 break;
889 cur_p_of = ep_p_next(ep, cur_p_of);
890 if (cur_p_of == NO_MORE_P)
891 break;
895 * if none of the ps matched see whether the p rem match the str
896 * rem
898 if (cur_p_of == NO_MORE_P) {
899 s8 r = match(ep->p_rem, sc, str_end, flgs, 0);
900 if (r == MATCH)
901 return MATCH;
903 ++sc;
905 /* none of the ps together with the rem of the p lead to a match */
906 return NOMATCH;
909 /* will return WATCH/NOMATCH/NO_VALID_P *and* misc errors */
910 static s8 extended_match_try(u8 type, u8 *p, u8 *str, u8 *str_end, u8 flgs)
912 struct ep ep;
913 s8 r;
915 ep.type = type;
916 /* count the ep type char right befor the opening ( */
917 ep.full_p_sz = strlen(p) + 1;
918 ep.ps = 0;
919 ep.last = 0;
920 ep.sz = 0;
921 ep.p_rem = 0;
923 r = ep_split(p, &ep);
924 if (r != OK)
925 goto out;
927 /* from here, p_rem points right after the closing ) of the ep */
929 r = MATCH;
930 switch (type) {
931 case '*':
932 if (match(ep.p_rem, str, str_end, flgs, 0) == MATCH)
933 break;
934 /* FALLTHROUGH to at least once sub-p matched */
935 case '+':
936 r = match_at_least_once(&ep, p, str, str_end, flgs);
937 break;
938 case '?':
939 if (match(ep.p_rem, str, str_end, flgs, 0) == MATCH)
940 break;
941 /* FALLTHROUGH to only once sub-p matched */
942 case '@':
943 r = match_only_once(&ep, str, str_end, flgs);
944 break;
945 case '!':
946 r = match_none(&ep, str, str_end, flgs);
947 break;
948 default:
949 r = NO_VALID_P;
951 out:
952 if (ep.ps)
953 munmap(ep.ps, ep.sz);
954 return r;
957 ULINUX_EXPORT s8 ulinux_match(u8 *pattern, u8 *str, u8 flgs)
959 return match(pattern, str,(u8*)str + strlen(str), flgs, 0);
962 /*----------------------------------------------------------------------------*/
963 /* local cleanup */
964 #undef loop
965 #undef ANONYMOUS
966 #undef CASEFOLD
967 #undef ERR
968 #undef ERR_NOMEM
969 #undef EXTMATCH
970 #undef is_alnum
971 #undef is_alpha
972 #undef is_blank
973 #undef is_cntrl
974 #undef is_digit
975 #undef is_graph
976 #undef is_lower
977 #undef is_print
978 #undef is_punct
979 #undef is_space
980 #undef is_upper
981 #undef is_xdigit
982 #undef ISERR
983 #undef sl
984 #undef MATCH
985 #undef memchr
986 #undef memcpy
987 #undef mmap
988 #undef mremap
989 #undef munmap
990 #undef NOMATCH
991 #undef PRIVATE
992 #undef RD
993 #undef s64
994 #undef s8
995 #undef u16
996 #undef u64
997 #undef u8
998 #undef strcat
999 #undef strcmp
1000 #undef strlen
1001 #undef WR
1002 /*----------------------------------------------------------------------------*/
1003 #undef CHAR_CLASS_MAX
1004 #undef STREQ
1005 #undef NO_VALID_P
1006 #undef OK
1007 /*----------------------------------------------------------------------------*/
1008 #undef BIT
1009 #undef R_GET
1010 #undef R_SET
1011 #undef RETURN_R
1012 #undef MATCH_NORMAL
1013 #undef NEXT_STR_CHAR
1014 #undef NEXT_P_CHAR
1015 #undef TEST_STR_END_REACHED
1016 /*----------------------------------------------------------------------------*/
1017 #undef NO_MORE_P
1018 /*----------------------------------------------------------------------------*/
1019 #undef ULINUX_EXPORT
1020 #endif