ulinux/utils/ascii/match/match.c

   1 #ifndef ULINUX_UTILS_ASCII_MATCH_MATCH_C
   2 #define ULINUX_UTILS_ASCII_MATCH_MATCH_C
   3 /*
   4  * This is a derived work based on GNU glibc implementation on 20130409.
   5  * Switch to GNU Lesser GPLv3 protection.
   6  * author:Sylvain BERTRAND
   7  */
   8 /******************************************************************************/
   9 /*
  10  * NOTE:do *NOT* even try to read that code without ksh extended glob
  11  * specifications, and keeping an eye on the abbreviations below
  12  *
  13  * upstream code is hard to grasp
  14  */
  15 /******************************************************************************/
  16 #include <stdbool.h>
  17 #include <ulinux/compiler_types.h>
  18 #include <ulinux/types.h>
  19 #include <ulinux/sysc.h>
  20 #include <ulinux/error.h>
  21
  22 #include <ulinux/utils/mem.h>
  23 #include <ulinux/utils/ascii/ascii.h>
  24 #include <ulinux/utils/ascii/string/string.h>
  25 #include <ulinux/utils/ascii/match/match.h>
  26 #include <ulinux/mmap.h>
  27
  28 /*----------------------------------------------------------------------------*/
  29 /* "One Compilation Unit" support */
  30 #ifdef ULINUX_UTILS_EXTERNAL
  31 #define ULINUX_EXPORT
  32 #else
  33 #define ULINUX_EXPORT static
  34 #endif
  35 /*----------------------------------------------------------------------------*/
  36
  37 /*----------------------------------------------------------------------------*/
  38 /* ulinux namespace */
  39 #define loop for(;;)
  40 #define ANONYMOUS ULINUX_MAP_ANONYMOUS
  41 #define CASEFOLD ULINUX_MATCH_CASEFOLD
  42 #define ERR ULINUX_MATCH_ERR
  43 #define ERR_NOMEM ULINUX_MATCH_ERR_NOMEM
  44 #define EXTMATCH ULINUX_MATCH_EXTMATCH
  45 #define is_alnum ulinux_is_alnum
  46 #define is_alpha ulinux_is_alpha
  47 #define is_blank ulinux_is_blank
  48 #define is_cntrl ulinux_is_cntrl
  49 #define is_digit ulinux_is_digit
  50 #define is_graph ulinux_is_graph
  51 #define is_lower ulinux_is_lower
  52 #define is_print ulinux_is_print
  53 #define is_punct ulinux_is_punct
  54 #define is_space ulinux_is_space
  55 #define is_upper ulinux_is_upper
  56 #define is_xdigit ulinux_is_xdigit
  57 #define ISERR ULINUX_ISERR
  58 #define sl ulinux_sl
  59 #define MATCH ULINUX_MATCH_MATCH
  60 #define memchr ulinux_memchr
  61 #define memcpy ulinux_memcpy
  62 #define mmap(a,b,c) ulinux_sysc(mmap,6,0,a,b,c,-1,0)
  63 #define mremap(a,b,c) ulinux_sysc(mremap,4,a,b,c,0)
  64 #define munmap(a,b) ulinux_sysc(munmap,2,a,b)
  65 #define NOMATCH ULINUX_MATCH_NOMATCH
  66 #define PRIVATE ULINUX_MAP_PRIVATE
  67 #define RD ULINUX_PROT_READ
  68 #define s64 ulinux_s64
  69 #define s8 ulinux_s8
  70 #define u16 ulinux_u16
  71 #define u64 ulinux_u64
  72 #define u8 ulinux_u8
  73 #define strcat ulinux_strcat
  74 #define strcmp ulinux_strcmp
  75 #define strlen ulinux_strlen
  76 #define WR ULINUX_PROT_WRITE
  77 /*----------------------------------------------------------------------------*/
  78 /* local symbols and types */
  79 #define match_params ulinux_utils_ascii_match_match_params
  80 #define fold ulinux_utils_ascii_match_fold
  81 #define endp ulinux_utils_ascii_match_endp
  82 #define star_handler ulinux_utils_ascii_match_star_handler
  83 #define bracket_skip ulinux_utils_ascii_match_bracket_skip
  84 #define bracket_matched ulinux_utils_ascii_match_bracket_matched
  85 #define bracket_normal ulinux_utils_ascii_match_bracket_normal
  86 #define bracket_char_class_get ulinux_utils_ascii_match_bracket_char_class_get
  87 #define bracket_escape ulinux_utils_ascii_match_bracket_escape
  88 #define bracket_class ulinux_utils_ascii_match_bracket_class
  89 #define bracket_handler ulinux_utils_ascii_match_bracket_handler
  90 #define question_mark_handler ulinux_utils_ascii_match_question_mark_handler
  91 #define misc_handler ulinux_utils_ascii_match_misc_handler
  92 #define escape_handler ulinux_utils_ascii_match_escape_handler
  93 #define special_char ulinux_utils_ascii_match_special_char
  94 #define match ulinux_utils_ascii_match_match
  95 #define p ulinux_utils_ascii_match_p
  96 #define ep ulinux_utils_ascii_match_ep
  97 #define ep_p_new ulinux_utils_ascii_match_ep_p_new
  98 #define ep_p_get ulinux_utils_ascii_match_ep_p_get
  99 #define ep_p_next ulinux_utils_ascii_match_ep_p_next
 100 #define ep_split ulinux_utils_ascii_match_ep_split
 101 #define match_at_least_once ulinux_utils_ascii_match_match_at_least_once
 102 #define match_only_once ulinux_utils_ascii_match_match_only_once
 103 #define match_none ulinux_utils_ascii_match_match_none
 104 #define extended_match_try ulinux_utils_ascii_match_extended_match_try
 105 /*----------------------------------------------------------------------------*/
 106 /*
 107  * ABBREVIATIONS
 108  * addr    address
 109  * c       character
 110  * char(s) character(s)
 111  * cur     current
 112  * ep(s)   extended pattern(s)
 113  * err     error
 114  * nomem   no memory
 115  * of      offset
 116  * p(s)    pattern(s)
 117  * params  parameters
 118  * pc      pattern char
 119  * r       return value
 120  * rd      read
 121  * rem     remainder
 122  * sc      string char
 123  * str     string
 124  * sz      size
 125  * wr      write
 126  */
 127 /*----------------------------------------------------------------------------*/
 128 #define CHAR_CLASS_MAX (sizeof("xdigit") - 1)
 129 #define STREQ(s1,s2) (strcmp(s1, s2) == 0)
 130 /*----------------------------------------------------------------------------*/
 131 /* return values */
 132 /*#define ULINUX_MATCH_ERR_NOMEM -3*/
 133 /*#define ULINUX_MATCH_ERR       -2*/
 134 #define NO_VALID_P               -1
 135 /*#define ULINUX_MATCH_MATCH      0*/
 136 #define OK                        0
 137 /*#define ULINUX_MATCH_NOMATCH    1*/
 138 /*----------------------------------------------------------------------------*/
 139 /*
 140  * In the context a match call, it's used to store the location of the
 141  * "ending" * following, at the same depth, a "starting" *.
 142  * The matcher is locked in the star_handler between 2 stars at the
 143  * same depth.
 144  */
 145 struct match_params {
 146         void *p;
 147         void *str;
 148 };
 149
 150 static u8 fold(u8 c, u8 flgs)
 151 {
 152         if(!(flgs & CASEFOLD))
 153                 return c;
 154         return ulinux_2lower(c);
 155 }
 156
 157 /*
 158  * Recursively skip a p. Return a pointer right after it, or a pointer on the
 159  *start of the p to skip if the 0 terminating char is encountered before the
 160  * end of the p.
 161  */
 162 static u8 *end_p(u8 *p)
 163 {
 164         u8 *pc = p;
 165         loop {
 166                 if (*++pc == '\0')
 167                         return p; /* this is an invalid p */
 168                 else if (*pc == '[') {/* handle brackets special */
 169                         /*
 170                          * Skip the not sign. We have to recognize it because of
 171                          * a possibly following ']'.
 172                          */
 173                         if ((*++pc == '!') || (*pc == '^'))
 174                                 ++pc;
 175                         if(*pc == ']')
 176                                 ++pc; /* a leading ']' is recognized as such */
 177                         /* skip over all chars of the list */
 178                         loop {
 179                                 if (*pc == ']')
 180                                         break;
 181                                 if(*pc++ == '\0')
 182                                         return p; /* this is no valid p */
 183                         }
 184                 } else if (((*pc == '?') || (*pc == '*') || (*pc == '+')
 185                         || (*pc == '@') || (*pc == '!')) && (pc[1] == '(')) {
 186                         pc = end_p(pc + 1);
 187                 } else if (*pc == ')') {
 188                         break;
 189                 }
 190         }
 191         return pc + 1;
 192 }
 193
 194 static s8 extended_match_try(u8 type, u8 *p, u8 *str, u8 *str_end, u8 flgs);
 195 static s8 match(u8 *p, u8 *str, u8 *str_end, u8 flgs,
 196                                                  struct match_params *end_star);
 197
 198 /*----------------------------------------------------------------------------*/
 199 /* return value for the following handlers */
 200 #define BIT(x)               (1 << x)
 201 #define R_GET(r)             ((s8)(r & 0xff))
 202 #define R_SET(r)             ((u16)(r & 0xff))
 203 #define RETURN_R             BIT(9)
 204 #define MATCH_NORMAL         BIT(10)
 205 #define NEXT_STR_CHAR        BIT(11)
 206 #define NEXT_P_CHAR          BIT(12)
 207 #define TEST_STR_END_REACHED BIT(13)
 208 /*----------------------------------------------------------------------------*/
 209
 210 static u16 star_handler(u8 *c, u8 **pc, u8 **sc, u8 *str_end, u8 flgs,
 211                                                   struct match_params *end_star)
 212 {
 213         struct match_params local_end_star;
 214
 215         if ((flgs & EXTMATCH) && (**pc == '(')) {
 216                 s8 r = extended_match_try(*c, *pc, *sc, str_end, flgs);
 217                 if (r != NO_VALID_P)
 218                         return RETURN_R | R_SET(r);
 219         } else if (end_star) {
 220                 /* this star will exit the current star context matching */
 221                 end_star->p = *pc - 1;
 222                 end_star->str = *sc;
 223                 return RETURN_R | R_SET(MATCH);
 224         }
 225
 226         loop {
 227                 *c = *(*pc)++;
 228
 229                 /*
 230                  * do "absorb" in the current * the following sequence of
 231                  * ?,*,?() and *()
 232                  */
 233                 if ((*c != '?') && (*c != '*'))
 234                         break;
 235
 236                 /*
 237                  * ?() and *() are "zero or one|more" occurences of matched
 238                  * patterns, then absorbed them in the *
 239                  */
 240                 if ((**pc == '(') && (flgs & EXTMATCH)) {
 241                         u8 *endp = end_p(*pc);
 242                         if (endp != *pc) { /* This is a p. Skip over it */
 243                                 *pc = endp;
 244                                 continue;
 245                         }
 246                 }
 247
 248                 if (*c == '?') { /* match one any char from the string */
 249                         /* a ? needs to match one char */
 250                         if (*sc == str_end) {
 251                                 /* there isn't another char; no match */
 252                                 return RETURN_R | R_SET(NOMATCH);
 253                         } else {
 254                                 /*
 255                                  * one char of the str is consumed in matching
 256                                  * this ? wildcard, so *??? won't match if there
 257                                  * are less than three chars
 258                                  */
 259                                 ++(*sc);
 260                         }
 261                 }
 262         }
 263
 264         /* the wildcard(s) is/are the last element of the p flag is set */
 265         if (*c == '\0')
 266                 return RETURN_R | R_SET(MATCH);
 267
 268         local_end_star.p = 0;
 269
 270         if ((*c == '[') || ((flgs & EXTMATCH) && ((*c == '@') || (*c == '+')
 271                                         || (*c=='!')) && (**pc == '('))) {
 272                 /* the * is followed by a bracket or an "not absorbed" ep */
 273                 (*pc)--;
 274                 /*
 275                  * See explanation of star context matching right below. Instead
 276                  * of a char like below, it's an ep.
 277                  */
 278                 loop {
 279                         s8 r;
 280                         if (*sc == str_end)
 281                                 break;
 282                         r = match(*pc, *sc, str_end, flgs, &local_end_star);
 283                         if (r == MATCH) {
 284                                 if (local_end_star.p == 0)
 285                                         return RETURN_R | R_SET(MATCH);
 286                                 break;
 287                         }
 288                         ++(*sc);
 289                 }
 290         } else { /* the * is followed by a "normal" char */
 291                 if (*c == '\\')
 292                         *c=**pc;
 293                 *c = fold(*c, flgs);
 294                 (*pc)--;
 295                 /*
 296                  * scan the str till we find a char which is the same than the
 297                  * first p char right after the *, then start to match in star
 298                  * context, namely till we reach the end, or we find another
 299                  * will will end the matching in that context
 300                  */
 301                 loop {
 302                         if (*sc == str_end)
 303                                 break;
 304                         if(fold(**sc, flgs) == *c) {
 305                                 s8 r = match(*pc, *sc, str_end, flgs,
 306                                                         &local_end_star);
 307                                 if (r == MATCH) {
 308                                         if (local_end_star.p == 0)
 309                                                 return RETURN_R | R_SET(MATCH);
 310                                         break;
 311                                 }
 312                         }
 313                         ++(*sc);
 314                 }
 315                 if (local_end_star.p != 0) {
 316                         /* we are finish to match the str in star context */
 317                         *pc = local_end_star.p;
 318                         *sc = local_end_star.str;
 319                         return NEXT_P_CHAR;
 320                 }
 321         }
 322         /* if we come here no match is possible with the wildcard */
 323         return RETURN_R | R_SET(NOMATCH);
 324 }
 325
 326 static u16 bracket_skip(u8 *c, u8 **pc)
 327 {
 328         loop {
 329 ignore_next:
 330                 *c = *(*pc)++;
 331
 332                 if (*c == '\0') {
 333                         /* [... (unterminated) loses */
 334                         return RETURN_R | R_SET(NOMATCH);
 335                 }
 336
 337                 if (*c == '\\') {
 338                         if (**pc == '\0')
 339                                 return RETURN_R | R_SET(NOMATCH);
 340                         /* XXX 1003.2d11 is unclear if this is right */
 341                         ++(*pc);
 342                 } else if ((*c == '[') && (**pc == ':')) {
 343                         u8 char_class_n = 0;
 344                         u8 *char_class_start_pc = *pc;
 345
 346                         loop {
 347                                 *c = *++(*pc);
 348                                 if (++char_class_n == (CHAR_CLASS_MAX + 1))
 349                                         return RETURN_R | R_SET(NOMATCH);
 350
 351                                 if ((**pc == ':') && ((*pc)[1] == ']'))
 352                                         break;
 353
 354                                 if ((*c < 'a') || (*c >= 'z')) {
 355                                         /* is_lower(c) && is_weaker_alpha(c) */
 356                                         *pc = char_class_start_pc;
 357                                         goto ignore_next; /* XXX: goto here  */
 358                                 }
 359                         }
 360                         (*pc) += 2;
 361                         *c = *(*pc)++;
 362                 }
 363                 if (*c == ']')
 364                         break;
 365         }
 366         return OK;
 367 }
 368
 369 static u16 bracket_matched(u8 *c, u8 **pc, u8 not)
 370 {
 371         u16 r = bracket_skip(c, pc);
 372         if (r != OK)
 373                 return r;
 374
 375         if (not)
 376                 return RETURN_R | R_SET(NOMATCH);
 377         return NEXT_STR_CHAR;
 378 }
 379
 380 /* input:*c may not be folded */
 381 static u16 bracket_normal(u8 *c, u8 **pc, u8 not, u8 sc_folded)
 382 {
 383         u8 c_old;
 384         u8 is_range = ((**pc=='-') && ((*pc)[1] != '\0') && ((*pc)[1] != ']'));
 385
 386         if (!is_range && (*c == sc_folded))
 387                 return bracket_matched(c, pc, not);
 388
 389         c_old = *c;
 390         *c = *(*pc)++;
 391
 392         if ((*c == '-') && (**pc != ']')) {
 393                 u8 c_end = *(*pc)++;
 394
 395                 if (c_end == '\\')
 396                         c_end = *(*pc)++;
 397                 if (c_end == '\0')
 398                         return RETURN_R | R_SET(NOMATCH);
 399
 400                 /* it is a range, ascii collation */
 401                 if ((c_old <= sc_folded) && (sc_folded <= c_end))
 402                         return bracket_matched(c, pc, not);
 403                 *c = *(*pc)++;
 404         }
 405         return OK;
 406 }
 407
 408 static u16 bracket_char_class_get(u8 *c, u8 **pc, u8 not, u8 sc_folded,
 409                                                                  u8 *char_class)
 410 {
 411         u8 char_class_n = 0;
 412         u8 *char_class_start_pc = *pc;
 413
 414         loop {
 415                 if (char_class_n == (CHAR_CLASS_MAX + 1))
 416                         return RETURN_R | R_SET(NOMATCH);
 417
 418                 *c = *++(*pc);
 419                 if ((*c == ':') && ((*pc)[1] == ']')) {
 420                         (*pc)+=2;
 421                         break;
 422                 }
 423
 424                 if ((*c < 'a') || (*c >= 'z')) {
 425                         /* is_lower(c) && is_weaker_alpha(c) */
 426                         u16 r;
 427                         /*
 428                          * This cannot possibly be a char class name. Rewind and
 429                          * try to match it as a normal range.
 430                          */
 431                         *pc = char_class_start_pc;
 432                         *c = '[';
 433                         r = bracket_normal(c, pc, not, sc_folded);
 434                         if (r != OK)
 435                                 return r;
 436                 }
 437                 char_class[char_class_n++] = *c;
 438         }
 439         char_class[char_class_n] = '\0';
 440         return OK;
 441 }
 442
 443 static u16 bracket_escape(u8 *c, u8 **pc, u8 flgs, u8 not, u8 sc_folded)
 444 {
 445         u16 r;
 446
 447         if (**pc == '\0')
 448                 return RETURN_R | R_SET(NOMATCH);
 449         *c = fold(**pc, flgs);
 450         ++(*pc);
 451         r = bracket_normal(c, pc, not, sc_folded);
 452         if (r != OK)
 453                 return r;
 454         return OK;
 455 }
 456
 457 /* input: *c=='[' **pc==':' */
 458 static u16 bracket_class(u8 *c, u8 **pc, u8 **sc, u8 not, u8 sc_folded)
 459 {
 460         /*don't forget the 0 terminating char*/
 461         u8 char_class[CHAR_CLASS_MAX + 1];
 462         u16 r = bracket_char_class_get(c, pc, not, sc_folded, &char_class[0]);
 463         if (r != OK)
 464                 return r;
 465
 466         if (            (STREQ(char_class,"alnum") && is_alnum(**sc))
 467                 ||      (STREQ(char_class,"alpha") && is_alpha(**sc))
 468                 ||      (STREQ(char_class,"blank") && is_blank(**sc))
 469                 ||      (STREQ(char_class,"cntrl") && is_cntrl(**sc))
 470                 ||      (STREQ(char_class,"digit") && is_digit(**sc))
 471                 ||      (STREQ(char_class,"graph") && is_graph(**sc))
 472                 ||      (STREQ(char_class,"lower") && is_lower(**sc))
 473                 ||      (STREQ(char_class,"print") && is_print(**sc))
 474                 ||      (STREQ(char_class,"punct") && is_punct(**sc))
 475                 ||      (STREQ(char_class,"space") && is_space(**sc))
 476                 ||      (STREQ(char_class,"upper") && is_upper(**sc))
 477                 ||      (STREQ(char_class,"xdigit") && is_xdigit(**sc)))
 478                 return bracket_matched(c, pc, not);
 479         *c = *(*pc)++;
 480         return OK;
 481 }
 482
 483 static u16 bracket_handler(u8 *c, u8 **pc, u8 **sc, void *str_end, u8 flgs)
 484 {
 485         u8 not;
 486         u8 sc_folded;
 487
 488         u8 *pc_init = *pc;
 489         if (*sc == str_end)
 490                 return RETURN_R | R_SET(NOMATCH);
 491
 492         not =((**pc == '!') || (**pc == '^'));
 493         if (not)
 494                 ++(*pc);
 495
 496         sc_folded = fold(**sc, flgs);
 497
 498         *c = *(*pc)++;
 499         loop {
 500                 if (*c == '\\') {
 501                         u16 r = bracket_escape(c, pc, flgs, not, sc_folded);
 502                         if (r != OK)
 503                                 return r;
 504                 } else if ((*c == '[') && (**pc == ':')) {
 505                         u16 r = bracket_class(c, pc, sc, not, sc_folded);
 506                         if (r != OK)
 507                                 return r;
 508                 } else if (c == '\0') {
 509                         /*
 510                          * [ unterminated, rewind and tell above to match normal
 511                          */
 512                         *pc = pc_init;
 513                         *c = '[';
 514                         return MATCH_NORMAL;
 515                 } else {
 516                         u16 r;
 517                         *c = fold(*c, flgs);
 518                         r = bracket_normal(c, pc, not, sc_folded);
 519                         if (r != 0)
 520                                 return r;
 521                 }
 522                 if (*c == ']')
 523                         break;
 524         }
 525         if (!not)
 526                 return RETURN_R | R_SET(NOMATCH);
 527         return NEXT_STR_CHAR;
 528 }
 529
 530 static u16 question_mark_handler(u8 c, u8 *pc, u8 *sc, void *str_end, u8 flgs)
 531 {
 532         if ((flgs & EXTMATCH) && (*pc=='(')) {
 533                 s8 r = extended_match_try(c, pc, sc, str_end, flgs);
 534                 if (r != NO_VALID_P)
 535                         return RETURN_R | R_SET(r);
 536         }
 537         return TEST_STR_END_REACHED;
 538 }
 539
 540 static u16 misc_handler(u8 c, u8 *pc, u8 *sc, void *str_end, u8 flgs)
 541 {
 542         if ((flgs & EXTMATCH) && (*pc=='(')) {
 543                 s8 r = extended_match_try(c, pc, sc, str_end, flgs);
 544                 if (r != NO_VALID_P)
 545                         return RETURN_R | R_SET(r);
 546         }
 547         return MATCH_NORMAL;
 548 }
 549
 550 static u16 escape_handler(u8 *c, u8 **pc, u8 flgs)
 551 {
 552         u16 r = MATCH_NORMAL;
 553         *c = *(*pc)++;
 554         if (c=='\0')
 555                 return r | RETURN_R | R_SET(NOMATCH); /*railing \ loses*/
 556         *c = fold(*c, flgs);
 557         return r;
 558 }
 559
 560 static u16 special_char(u8 *c, u8 **pc, u8 **sc, void *str_end, u8 flgs,
 561                                                   struct match_params *end_star)
 562 {
 563         u16 r = RETURN_R | R_SET(ERR);
 564         switch (*c) {
 565         case '[':
 566                 r = bracket_handler(c, pc, sc, str_end, flgs);
 567                 break;
 568         case '*':
 569                 r = star_handler(c, pc, sc, str_end, flgs, end_star);
 570                 break;
 571         case '?':
 572                 r = question_mark_handler(*c, *pc, *sc, str_end, flgs);
 573                 break;
 574         case '+':
 575         case '@':
 576         case '!':
 577                 r = misc_handler(*c, *pc, *sc, str_end, flgs);
 578                 break;
 579         case '\\':
 580                 r = escape_handler(c, pc, flgs);
 581                 break;
 582         }
 583         return r;
 584 }
 585
 586 /* str_end points right after the last char of str, the '\0' char */
 587 static s8 match(u8 *p, u8 *str, u8 *str_end, u8 flgs,
 588                                                 struct match_params *end_star)
 589 {
 590         u8 c;
 591
 592         u8 *pc = p;
 593         u8 *sc = str;
 594
 595         loop {
 596                 u16 r;
 597
 598                 c = *pc++;
 599                 if (c=='\0')
 600                         break;
 601
 602                 c = fold(c, flgs);
 603
 604                 switch (c) {
 605                 case '[':
 606                 case '*':
 607                 case '?':
 608                 case '+':
 609                 case '@':
 610                 case '!':
 611                 case '\\':
 612                         r = special_char(&c, &pc, &sc, str_end, flgs, end_star);
 613                         break;
 614                 default:
 615                         r = MATCH_NORMAL;
 616                 }
 617
 618                 if (!(r & NEXT_STR_CHAR)) {
 619                         if (r & RETURN_R)
 620                                 return R_GET(r);
 621                         if (r & NEXT_P_CHAR)
 622                                 continue;
 623                         if ((r & MATCH_NORMAL) || (r & TEST_STR_END_REACHED))
 624                                 if (sc == str_end)
 625                                         return NOMATCH;
 626                         if(r & MATCH_NORMAL)
 627                                 if (c != fold(*sc, flgs))
 628                                         return NOMATCH;
 629                 }
 630                 ++sc;
 631         }
 632
 633         if (sc == str_end)
 634                 return MATCH;
 635         return NOMATCH;
 636 }
 637
 638 /*============================================================================*/
 639 /* ep */
 640 struct p {
 641         u64 sz; /* not accounting the 0 terminating char */
 642         u8 str[];
 643 };
 644
 645 struct ep {
 646         u64 full_p_sz;  /* the p sz from which the ep belongs to */
 647         struct p *ps;
 648         s64 last;       /* offset */
 649         u64 sz;
 650         u8 *p_rem;      /* remainder of the p (right and side of the ep) */
 651         u8 type;
 652 };
 653
 654 /*
 655  * based on the ep type, we may need to book enough room to put a worst case
 656  * scenario of a concatenated sub-p with the ep p rem
 657  */
 658 static s8 ep_p_new(struct ep *ep,u8 *p_str,u64 p_sz)
 659 {
 660         /*
 661          * We will do some p concatenation for the following p types. Then book
 662          * room for the worst case scenario.
 663          */
 664         u64 map_sz;
 665         sl addr;
 666         struct p *p;
 667
 668         u64 str_sz = (ep->type == '?') || (ep->type == '@') ? ep->full_p_sz
 669                                                                         : p_sz;
 670
 671         if (!ep->ps) {/* first allocation */
 672                 /* count the 0 terminating char */
 673                 map_sz = sizeof(*ep->ps) + str_sz + 1;
 674                 addr = mmap(map_sz, RD | WR,PRIVATE | ANONYMOUS);
 675                 if (!addr || ISERR(addr))
 676                         return ERR_NOMEM;
 677
 678                 ep->last=0;
 679         } else {
 680                 struct p *p_last;
 681                 /* count the 0 terminating char */
 682                 u64 new_p_sz = sizeof(*ep->ps) + str_sz + 1;
 683                 map_sz = ep->sz + new_p_sz;
 684                 addr = mremap(ep->ps, ep->sz, map_sz);
 685                 if (!addr || ISERR(addr))
 686                         return ERR_NOMEM; /* ep is unmapped elsewhere */
 687
 688                 p_last=(struct p*)((u8*)(ep->ps) + ep->last);
 689                 /* count the 0 terminating char */
 690                 ep->last += sizeof(*p_last) + p_last->sz + 1;
 691         }
 692         ep->ps = (struct p*)addr;
 693         ep->sz = map_sz;
 694         p = (struct p*)((u8*)(ep->ps) + ep->last);
 695         p->sz = str_sz;
 696         memcpy(&p->str[0], p_str, p_sz); /* copy only p_sz chars */
 697         p->str[p_sz] = 0; /* set the 0 terminating char */
 698         return OK;
 699 }
 700
 701 static struct p *ep_p_get(struct ep *ep, s64 of)
 702 {
 703         return (struct p*)((u8*)(ep->ps) + of);
 704 }
 705
 706 #define NO_MORE_P -1
 707 static s64 ep_p_next(struct ep *ep, s64 p_of)
 708 {
 709         struct p *p = (struct p*)((u8*)(ep->ps) + p_of);
 710         if (ep->sz == p_of + sizeof(*p) + p->sz + 1)
 711                 return NO_MORE_P;
 712         return p_of + sizeof(*p) + p->sz + 1;
 713 }
 714 /*ep end*/
 715 /*============================================================================*/
 716
 717 /*
 718  * This function splits the ep in sub-ps which use '|' char as a separator. Deal
 719  * with nested eps. Sub-ps are stored (copied) in a list (the ep struct)
 720  * input parameters:
 721  *   - p to point on the opening ( of the ep
 722  *   - ep is where the sub-patterns will be stored (copied)
 723  * output parameters:
 724  *   - p will point right after the closing ) fo the ep
 725  *   - ep will contain copies of all sub-ps of the ep
 726  * if successful ep  will contain at least one p
 727  */
 728 static s8 ep_split(u8 *p, struct ep *ep)
 729 {
 730         void *sub_p_start;
 731         s64 depth; /* depth, if we have nested eps */
 732
 733         s8 r = OK;
 734         u8 *pc = p;
 735
 736         ++pc; /* skip the opening ( */
 737         sub_p_start=pc;
 738         depth = 0;
 739
 740         loop {
 741                 if (depth < 0)
 742                         break;
 743
 744                 if (*pc == '\0') {
 745                         r = NO_VALID_P;
 746                         goto out;
 747                 }else if (*pc == '[') { /* handle brackets special */
 748                         /*
 749                          * Skip the not sign. We have to recognize it because of
 750                          * a possibly following ']'.
 751                          */
 752                         ++pc;
 753                         if ((*pc == '!') || (*pc == '^'))
 754                                 ++pc;
 755
 756                         if (*pc == ']')
 757                                 ++pc; /* a leading ']' is recognized as such */
 758
 759                         loop { /* skip over all chars of the list */
 760                                 if (*pc == ']')
 761                                         break;
 762                                 if (*pc == '\0') {
 763                                         r = NO_VALID_P;
 764                                         goto out;
 765                                 }
 766                                 ++pc;
 767                         }
 768                 } else if (((*pc == '?') || (*pc == '*') || (*pc == '+')
 769                         || (*pc == '@') || (*pc == '!')) && (pc[1] == '(')) {
 770                         ++depth; /* remember the nesting depth of nested eps */
 771                 } else if (*pc==')') {
 772                         /* this means we found the end of the ep */
 773                         if (depth ==0 ) {
 774                                 r = ep_p_new(ep, sub_p_start,
 775                                                         pc - (u8*)sub_p_start);
 776                                 if (r != OK)
 777                                         goto out;
 778                         }
 779                         depth--;
 780                 } else if(*pc == '|') {
 781                         if (depth == 0) {
 782                                 r = ep_p_new(ep, sub_p_start,
 783                                                         pc - (u8*)sub_p_start);
 784                                 if (r != OK)
 785                                         goto out;
 786                                 sub_p_start =pc + 1;
 787                         }
 788                 }
 789                 ++pc;
 790         }
 791         /* store a pointer on the rem of the p, right after the closing ) */
 792         ep->p_rem = pc;
 793 out:
 794         return r;
 795 }
 796
 797 static s8 match_at_least_once(struct ep *ep, u8 *p, u8 *str, u8 *str_end,
 798                                                                         u8 flgs)
 799 {
 800         s64 cur_p_of = 0;
 801         loop {
 802                 struct p *cur_p = ep_p_get(ep, cur_p_of);
 803                 u8 *sc = str;
 804
 805                 loop {
 806                         s8 r;
 807
 808                         if (sc > (u8*)str_end)
 809                                 break;
 810
 811                         /* first match the prefix with the cur p */
 812                         r = match(cur_p->str, str, sc, flgs, 0);
 813                         if (r == MATCH) {
 814                                 /*
 815                                  * This was successful. Now match the str rem
 816                                  * with the p rem.
 817                                  */
 818                                 r = match(ep->p_rem, sc, str_end, flgs, 0);
 819                                 if (r == NOMATCH) {
 820                                         /*
 821                                          * Unable to match the p rem with the
 822                                          * str rem, then try to match again the
 823                                          * ep by rewinding the p from the start.
 824                                          * We can because we are to match the ep
 825                                          * more than once
 826                                          */
 827                                         if (sc != str) {
 828                                                 r = match((u8*)p - 1, sc,
 829                                                         str_end, flgs, 0);
 830                                                 if (r == MATCH)
 831                                                         return MATCH;
 832                                         }
 833                                 } else if (r == MATCH) {
 834                                         return MATCH;
 835                                 } else
 836                                         return r; /* oops! */
 837                         }
 838                         ++sc;
 839                 }
 840                 cur_p_of = ep_p_next(ep, cur_p_of);
 841                 if (cur_p_of == NO_MORE_P)
 842                         break;
 843         }
 844         return NOMATCH; /* none of the ps lead to a match */
 845 }
 846
 847 static s8 match_only_once(struct ep *ep, u8 *str, u8 *str_end, u8 flgs)
 848 {
 849         s64 cur_p_of = 0; /* offset */
 850         loop {
 851                 struct p *cur_p = ep_p_get(ep, cur_p_of);
 852                 /*
 853                  * I cannot believe it but `strcat' is actually acceptable here.
 854                  * Match the entire str with the prefix from the ep and the
 855                  * rem of the p following the ep. We made room in the p str
 856                  * buffer in order to store the concatened p.
 857                  */
 858                 s8 r = match(strcat(cur_p->str, ep->p_rem), str, str_end, flgs,
 859                                                                         0);
 860                 if (r == MATCH)
 861                         return MATCH;
 862
 863                 cur_p_of = ep_p_next(ep, cur_p_of);
 864                 if (cur_p_of == NO_MORE_P)
 865                         break;
 866         }
 867         return NOMATCH; /* none of the ps lead to a match */
 868 }
 869
 870 static s8 match_none(struct ep *ep, u8 *str, u8 *str_end, u8 flgs)
 871 {
 872         u8 *sc = str;
 873         loop {
 874                 s64 cur_p_of;
 875
 876                 if (sc > (u8*)str_end)
 877                         break;
 878
 879                 cur_p_of = 0;
 880                 loop {
 881                         s8 r;
 882
 883                         struct p *cur_p = ep_p_get(ep, cur_p_of);
 884
 885                         r = match(cur_p->str, str, sc, flgs, 0);
 886                         if (r == MATCH)
 887                         break;
 888
 889                         cur_p_of = ep_p_next(ep, cur_p_of);
 890                         if (cur_p_of == NO_MORE_P)
 891                                 break;
 892                 }
 893
 894                 /*
 895                  * if none of the ps matched see whether the p rem match the str
 896                  * rem
 897                  */
 898                 if (cur_p_of == NO_MORE_P) {
 899                         s8 r = match(ep->p_rem, sc, str_end, flgs, 0);
 900                         if (r == MATCH)
 901                                 return MATCH;
 902                 }
 903                 ++sc;
 904         }
 905         /* none of the ps together with the rem of the p lead to a match */
 906         return NOMATCH;
 907 }
 908
 909 /* will return WATCH/NOMATCH/NO_VALID_P *and* misc errors */
 910 static s8 extended_match_try(u8 type, u8 *p, u8 *str, u8 *str_end, u8 flgs)
 911 {
 912         struct ep ep;
 913         s8 r;
 914
 915         ep.type = type;
 916         /* count the ep type char right befor the opening ( */
 917         ep.full_p_sz = strlen(p) + 1;
 918         ep.ps = 0;
 919         ep.last = 0;
 920         ep.sz = 0;
 921         ep.p_rem = 0;
 922
 923         r = ep_split(p, &ep);
 924         if (r != OK)
 925                 goto out;
 926
 927         /* from here, p_rem points right after the closing ) of the ep */
 928
 929         r = MATCH;
 930         switch (type) {
 931         case '*':
 932                 if (match(ep.p_rem, str, str_end, flgs, 0) == MATCH)
 933                         break;
 934                 /* FALLTHROUGH to at least once sub-p matched */
 935                 case '+':
 936                         r = match_at_least_once(&ep, p, str, str_end, flgs);
 937                         break;
 938                 case '?':
 939                         if (match(ep.p_rem, str, str_end, flgs, 0) == MATCH)
 940                                 break;
 941                         /* FALLTHROUGH to only once sub-p matched */
 942                 case '@':
 943                         r = match_only_once(&ep, str, str_end, flgs);
 944                         break;
 945                 case '!':
 946                         r = match_none(&ep, str, str_end, flgs);
 947                         break;
 948                 default:
 949                         r = NO_VALID_P;
 950         }
 951 out:
 952         if (ep.ps)
 953                 munmap(ep.ps, ep.sz);
 954         return r;
 955 }
 956
 957 ULINUX_EXPORT s8 ulinux_match(u8 *pattern, u8 *str, u8 flgs)
 958 {
 959         return match(pattern, str,(u8*)str + strlen(str), flgs, 0);
 960 }
 961
 962 /*----------------------------------------------------------------------------*/
 963 /* local cleanup */
 964 #undef loop
 965 #undef ANONYMOUS
 966 #undef CASEFOLD
 967 #undef ERR
 968 #undef ERR_NOMEM
 969 #undef EXTMATCH
 970 #undef is_alnum
 971 #undef is_alpha
 972 #undef is_blank
 973 #undef is_cntrl
 974 #undef is_digit
 975 #undef is_graph
 976 #undef is_lower
 977 #undef is_print
 978 #undef is_punct
 979 #undef is_space
 980 #undef is_upper
 981 #undef is_xdigit
 982 #undef ISERR
 983 #undef sl
 984 #undef MATCH
 985 #undef memchr
 986 #undef memcpy
 987 #undef mmap
 988 #undef mremap
 989 #undef munmap
 990 #undef NOMATCH
 991 #undef PRIVATE
 992 #undef RD
 993 #undef s64
 994 #undef s8
 995 #undef u16
 996 #undef u64
 997 #undef u8
 998 #undef strcat
 999 #undef strcmp
1000 #undef strlen
1001 #undef WR
1002 /*----------------------------------------------------------------------------*/
1003 #undef CHAR_CLASS_MAX
1004 #undef STREQ
1005 #undef NO_VALID_P
1006 #undef OK
1007 /*----------------------------------------------------------------------------*/
1008 #undef BIT
1009 #undef R_GET
1010 #undef R_SET
1011 #undef RETURN_R
1012 #undef MATCH_NORMAL
1013 #undef NEXT_STR_CHAR
1014 #undef NEXT_P_CHAR
1015 #undef TEST_STR_END_REACHED
1016 /*----------------------------------------------------------------------------*/
1017 #undef NO_MORE_P
1018 /*----------------------------------------------------------------------------*/
1019 #undef ULINUX_EXPORT
1020 #endif