6069 libdisasm: instrlen arch op should have a sane default
[illumos-gate.git] / usr / src / lib / libc / port / regex / regex.c
blob769761daef4dcc8cc71d5568d99eded17a118e81
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #pragma ident "%Z%%M% %I% %E% SMI"
33 * IMPORTANT NOTE:
35 * regex() WORKS **ONLY** WITH THE ASCII AND THE Solaris EUC CHARACTER SETS.
36 * IT IS **NOT** CHARACTER SET INDEPENDENT.
40 #pragma weak _regex = regex
42 #include "lint.h"
43 /* CONSTANTS SHARED WITH regcmp() */
44 #include "regex.h"
45 #include "mtlib.h"
46 #include <limits.h>
47 #include <stdarg.h>
48 #include <stdlib.h>
49 #include <thread.h>
50 #include <widec.h>
51 #include "tsd.h"
54 /* PRIVATE CONSTANTS */
56 #define ADD_256_TO_GROUP_LENGTH 0x1
57 #define ADD_512_TO_GROUP_LENGTH 0x2
58 #define ADD_768_TO_GROUP_LENGTH 0x3
59 #define ADDED_LENGTH_BITS 0x3
60 #define SINGLE_BYTE_MASK 0xff
61 #define STRINGP_STACK_SIZE 50
64 /* PRIVATE TYPE DEFINITIONS */
66 typedef enum {
67 NOT_IN_CLASS = 0,
68 IN_CLASS
69 } char_test_condition_t;
71 typedef enum {
72 TESTING_CHAR = 0,
73 CONDITION_TRUE,
74 CONDITION_FALSE,
75 CHAR_TEST_ERROR
76 } char_test_result_t;
79 /* PRIVATE GLOBAL VARIABLES */
81 static mutex_t regex_lock = DEFAULTMUTEX;
82 static int return_arg_number[NSUBSTRINGS];
83 static const char *substring_endp[NSUBSTRINGS];
84 static const char *substring_startp[NSUBSTRINGS];
85 static const char *stringp_stack[STRINGP_STACK_SIZE];
86 static const char **stringp_stackp;
89 /* DECLARATIONS OF PRIVATE FUNCTIONS */
91 static int
92 get_wchar(wchar_t *wcharp,
93 const char *stringp);
95 static void
96 get_match_counts(int *nmust_matchp,
97 int *nextra_matches_allowedp,
98 const char *count_stringp);
100 static boolean_t
101 in_wchar_range(wchar_t test_char,
102 wchar_t lower_char,
103 wchar_t upper_char);
105 static const char *
106 pop_stringp(void);
108 static const char *
109 previous_charp(const char *current_charp);
111 static const char *
112 push_stringp(const char *stringp);
114 static char_test_result_t
115 test_char_against_ascii_class(char test_char,
116 const char *classp,
117 char_test_condition_t test_condition);
119 static char_test_result_t
120 test_char_against_multibyte_class(wchar_t test_char,
121 const char *classp,
122 char_test_condition_t test_condition);
125 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
127 static char_test_result_t
128 test_char_against_old_ascii_class(char test_char,
129 const char *classp,
130 char_test_condition_t test_condition);
132 static const char *
133 test_repeated_ascii_char(const char *repeat_startp,
134 const char *stringp,
135 const char *regexp);
137 static const char *
138 test_repeated_multibyte_char(const char *repeat_startp,
139 const char *stringp,
140 const char *regexp);
142 static const char *
143 test_repeated_group(const char *repeat_startp,
144 const char *stringp,
145 const char *regexp);
147 static const char *
148 test_string(const char *stringp,
149 const char *regexp);
152 /* DEFINITIONS OF PUBLIC VARIABLES */
154 char *__loc1;
157 * reserve thread-specific storage for __loc1
159 char **
160 ____loc1(void)
162 if (thr_main())
163 return (&__loc1);
164 return ((char **)tsdalloc(_T_REGEX_LOC1, sizeof (char *), NULL));
167 #define __loc1 (*(____loc1()))
169 /* DEFINITION OF regex() */
171 extern char *
172 regex(const char *regexp, const char *stringp, ...)
174 va_list arg_listp;
175 int char_size;
176 const char *end_of_matchp;
177 wchar_t regex_wchar;
178 char *return_argp[NSUBSTRINGS];
179 char *returned_substringp;
180 int substringn;
181 const char *substringp;
182 wchar_t string_wchar;
184 if (____loc1() == (char **)0) {
185 return ((char *)0);
186 } else {
187 lmutex_lock(&regex_lock);
188 __loc1 = (char *)0;
191 if ((stringp == (char *)0) || (regexp == (char *)0)) {
192 lmutex_unlock(&regex_lock);
193 return ((char *)0);
197 /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */
199 substringn = 0;
200 va_start(arg_listp, stringp);
201 while (substringn < NSUBSTRINGS) {
202 return_argp[substringn] = va_arg(arg_listp, char *);
203 substring_startp[substringn] = (char *)0;
204 return_arg_number[substringn] = -1;
205 substringn++;
207 va_end(arg_listp);
210 /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */
212 end_of_matchp = (char *)0;
213 stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE];
215 if ((int)*regexp == (int)START_OF_STRING_MARK) {
218 * the match must start at the beginning of the string
221 __loc1 = (char *)stringp;
222 regexp++;
223 end_of_matchp = test_string(stringp, regexp);
225 } else if ((int)*regexp == (int)ASCII_CHAR) {
228 * test a string against a regular expression
229 * that starts with a single ASCII character:
231 * move to each character in the string that matches
232 * the first character in the regular expression
233 * and test the remaining string
236 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
237 stringp++;
239 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
240 end_of_matchp = test_string(stringp, regexp);
241 if (end_of_matchp != (char *)0) {
242 __loc1 = (char *)stringp;
243 } else {
244 stringp++;
245 while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) {
246 stringp++;
251 } else if (!multibyte) {
254 * if the value of the "multibyte" macro defined in <euc.h>
255 * is false, regex() is running in an ASCII locale;
256 * test an ASCII string against an ASCII regular expression
257 * that doesn't start with a single ASCII character:
259 * move forward in the string one byte at a time, testing
260 * the remaining string against the regular expression
263 end_of_matchp = test_string(stringp, regexp);
264 while ((end_of_matchp == (char *)0) && (*stringp != '\0')) {
265 stringp++;
266 end_of_matchp = test_string(stringp, regexp);
268 if (end_of_matchp != (char *)0) {
269 __loc1 = (char *)stringp;
272 } else if ((int)*regexp == (int)MULTIBYTE_CHAR) {
275 * test a multibyte string against a multibyte regular expression
276 * that starts with a single multibyte character:
278 * move to each character in the string that matches
279 * the first character in the regular expression
280 * and test the remaining string
283 (void) get_wchar(&regex_wchar, regexp + 1);
284 char_size = get_wchar(&string_wchar, stringp);
285 while ((string_wchar != regex_wchar) && (char_size > 0)) {
286 stringp += char_size;
287 char_size = get_wchar(&string_wchar, stringp);
289 while ((end_of_matchp == (char *)0) && (char_size > 0)) {
290 end_of_matchp = test_string(stringp, regexp);
291 if (end_of_matchp != (char *)0) {
292 __loc1 = (char *)stringp;
293 } else {
294 stringp += char_size;
295 char_size = get_wchar(&string_wchar, stringp);
296 while ((string_wchar != regex_wchar) && (char_size > 0)) {
297 stringp += char_size;
298 char_size = get_wchar(&string_wchar, stringp);
303 } else {
306 * test a multibyte string against a multibyte regular expression
307 * that doesn't start with a single multibyte character
309 * move forward in the string one multibyte character at a time,
310 * testing the remaining string against the regular expression
313 end_of_matchp = test_string(stringp, regexp);
314 char_size = get_wchar(&string_wchar, stringp);
315 while ((end_of_matchp == (char *)0) && (char_size > 0)) {
316 stringp += char_size;
317 end_of_matchp = test_string(stringp, regexp);
318 char_size = get_wchar(&string_wchar, stringp);
320 if (end_of_matchp != (char *)0) {
321 __loc1 = (char *)stringp;
326 * Return substrings that matched subexpressions for which
327 * matching substrings are to be returned.
329 * NOTE:
331 * According to manual page regcmp(3G), regex() returns substrings
332 * that match subexpressions even when no substring matches the
333 * entire regular expression.
336 substringn = 0;
337 while (substringn < NSUBSTRINGS) {
338 substringp = substring_startp[substringn];
339 if ((substringp != (char *)0) &&
340 (return_arg_number[substringn] >= 0)) {
341 returned_substringp =
342 return_argp[return_arg_number[substringn]];
343 if (returned_substringp != (char *)0) {
344 while (substringp < substring_endp[substringn]) {
345 *returned_substringp = (char)*substringp;
346 returned_substringp++;
347 substringp++;
349 *returned_substringp = '\0';
352 substringn++;
354 lmutex_unlock(&regex_lock);
355 return ((char *)end_of_matchp);
356 } /* regex() */
359 /* DEFINITIONS OF PRIVATE FUNCTIONS */
361 static int
362 get_wchar(wchar_t *wcharp,
363 const char *stringp)
365 int char_size;
367 if (stringp == (char *)0) {
368 char_size = 0;
369 *wcharp = (wchar_t)((unsigned int)'\0');
370 } else if (*stringp == '\0') {
371 char_size = 0;
372 *wcharp = (wchar_t)((unsigned int)*stringp);
373 } else if ((unsigned char)*stringp <= (unsigned char)0x7f) {
374 char_size = 1;
375 *wcharp = (wchar_t)((unsigned int)*stringp);
376 } else {
377 char_size = mbtowc(wcharp, stringp, MB_LEN_MAX);
379 return (char_size);
382 static void
383 get_match_counts(int *nmust_matchp,
384 int *nextra_matches_allowedp,
385 const char *count_stringp)
387 int minimum_match_count;
388 int maximum_match_count;
390 minimum_match_count =
391 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
392 *nmust_matchp = minimum_match_count;
394 count_stringp++;
395 maximum_match_count =
396 (int)((unsigned int)*count_stringp & SINGLE_BYTE_MASK);
397 if (maximum_match_count == (int)UNLIMITED) {
398 *nextra_matches_allowedp = (int)UNLIMITED;
399 } else {
400 *nextra_matches_allowedp =
401 maximum_match_count - minimum_match_count;
403 return;
405 } /* get_match_counts() */
407 static boolean_t
408 in_wchar_range(wchar_t test_char,
409 wchar_t lower_char,
410 wchar_t upper_char)
412 return (((lower_char <= 0x7f) && (upper_char <= 0x7f) &&
413 (lower_char <= test_char) && (test_char <= upper_char)) ||
414 (((test_char & WCHAR_CSMASK) == (lower_char & WCHAR_CSMASK)) &&
415 ((test_char & WCHAR_CSMASK) == (upper_char & WCHAR_CSMASK)) &&
416 (lower_char <= test_char) && (test_char <= upper_char)));
418 } /* in_wchar_range() */
420 static const char *
421 pop_stringp(void)
423 const char *stringp;
425 if (stringp_stackp >= &stringp_stack[STRINGP_STACK_SIZE]) {
426 return ((char *)0);
427 } else {
428 stringp = *stringp_stackp;
429 stringp_stackp++;
430 return (stringp);
435 static const char *
436 previous_charp(const char *current_charp)
439 * returns the pointer to the previous character in
440 * a string of multibyte characters
443 const char *prev_cs0 = current_charp - 1;
444 const char *prev_cs1 = current_charp - eucw1;
445 const char *prev_cs2 = current_charp - eucw2 - 1;
446 const char *prev_cs3 = current_charp - eucw3 - 1;
447 const char *prev_charp;
449 if ((unsigned char)*prev_cs0 <= 0x7f) {
450 prev_charp = prev_cs0;
451 } else if ((unsigned char)*prev_cs2 == SS2) {
452 prev_charp = prev_cs2;
453 } else if ((unsigned char)*prev_cs3 == SS3) {
454 prev_charp = prev_cs3;
455 } else {
456 prev_charp = prev_cs1;
458 return (prev_charp);
460 } /* previous_charp() */
462 static const char *
463 push_stringp(const char *stringp)
465 if (stringp_stackp <= &stringp_stack[0]) {
466 return ((char *)0);
467 } else {
468 stringp_stackp--;
469 *stringp_stackp = stringp;
470 return (stringp);
475 static char_test_result_t
476 test_char_against_ascii_class(char test_char,
477 const char *classp,
478 char_test_condition_t test_condition)
481 * tests a character for membership in an ASCII character class compiled
482 * by the internationalized version of regcmp();
484 * NOTE: The internationalized version of regcmp() compiles
485 * the range a-z in an ASCII character class to aTHRUz.
488 int nbytes_to_check;
490 nbytes_to_check = (int)*classp;
491 classp++;
492 nbytes_to_check--;
494 while (nbytes_to_check > 0) {
495 if (test_char == *classp) {
496 if (test_condition == IN_CLASS)
497 return (CONDITION_TRUE);
498 else
499 return (CONDITION_FALSE);
500 } else if (*classp == THRU) {
501 if ((*(classp - 1) <= test_char) &&
502 (test_char <= *(classp + 1))) {
503 if (test_condition == IN_CLASS)
504 return (CONDITION_TRUE);
505 else
506 return (CONDITION_FALSE);
507 } else {
508 classp += 2;
509 nbytes_to_check -= 2;
511 } else {
512 classp++;
513 nbytes_to_check--;
516 if (test_condition == NOT_IN_CLASS) {
517 return (CONDITION_TRUE);
518 } else {
519 return (CONDITION_FALSE);
521 } /* test_char_against_ascii_class() */
523 static char_test_result_t
524 test_char_against_multibyte_class(wchar_t test_char,
525 const char *classp,
526 char_test_condition_t test_condition)
529 * tests a character for membership in a multibyte character class;
531 * NOTE: The range a-z in a multibyte character class compiles to
532 * aTHRUz.
535 int char_size;
536 wchar_t current_char;
537 int nbytes_to_check;
538 wchar_t previous_char;
540 nbytes_to_check = (int)*classp;
541 classp++;
542 nbytes_to_check--;
544 char_size = get_wchar(&current_char, classp);
545 if (char_size <= 0) {
546 return (CHAR_TEST_ERROR);
547 } else if (test_char == current_char) {
548 if (test_condition == IN_CLASS) {
549 return (CONDITION_TRUE);
550 } else {
551 return (CONDITION_FALSE);
553 } else {
554 classp += char_size;
555 nbytes_to_check -= char_size;
558 while (nbytes_to_check > 0) {
559 previous_char = current_char;
560 char_size = get_wchar(&current_char, classp);
561 if (char_size <= 0) {
562 return (CHAR_TEST_ERROR);
563 } else if (test_char == current_char) {
564 if (test_condition == IN_CLASS) {
565 return (CONDITION_TRUE);
566 } else {
567 return (CONDITION_FALSE);
569 } else if (current_char == THRU) {
570 classp += char_size;
571 nbytes_to_check -= char_size;
572 char_size = get_wchar(&current_char, classp);
573 if (char_size <= 0) {
574 return (CHAR_TEST_ERROR);
575 } else if (in_wchar_range(test_char, previous_char,
576 current_char)) {
577 if (test_condition == IN_CLASS) {
578 return (CONDITION_TRUE);
579 } else {
580 return (CONDITION_FALSE);
582 } else {
583 classp += char_size;
584 nbytes_to_check -= char_size;
586 } else {
587 classp += char_size;
588 nbytes_to_check -= char_size;
591 if (test_condition == NOT_IN_CLASS) {
592 return (CONDITION_TRUE);
593 } else {
594 return (CONDITION_FALSE);
596 } /* test_char_against_multibyte_class() */
599 /* FOR COMPATIBILITY WITH PREVIOUS ASCII VERSIONS OF regcmp() */
601 static char_test_result_t
602 test_char_against_old_ascii_class(char test_char,
603 const char *classp,
604 char_test_condition_t test_condition)
607 * tests a character for membership in an ASCII character class compiled
608 * by the ASCII version of regcmp();
610 * NOTE: ASCII versions of regcmp() compile the range a-z in an
611 * ASCII character class to THRUaz. The internationalized
612 * version compiles the same range to aTHRUz.
615 int nbytes_to_check;
617 nbytes_to_check = (int)*classp;
618 classp++;
619 nbytes_to_check--;
621 while (nbytes_to_check > 0) {
622 if (test_char == *classp) {
623 if (test_condition == IN_CLASS) {
624 return (CONDITION_TRUE);
625 } else {
626 return (CONDITION_FALSE);
628 } else if (*classp == THRU) {
629 if ((*(classp + 1) <= test_char) &&
630 (test_char <= *(classp + 2))) {
631 if (test_condition == IN_CLASS) {
632 return (CONDITION_TRUE);
633 } else {
634 return (CONDITION_FALSE);
636 } else {
637 classp += 3;
638 nbytes_to_check -= 3;
640 } else {
641 classp++;
642 nbytes_to_check--;
645 if (test_condition == NOT_IN_CLASS) {
646 return (CONDITION_TRUE);
647 } else {
648 return (CONDITION_FALSE);
650 } /* test_char_against_old_ascii_class() */
652 static const char *
653 test_repeated_ascii_char(const char *repeat_startp,
654 const char *stringp,
655 const char *regexp)
657 const char *end_of_matchp;
659 end_of_matchp = test_string(stringp, regexp);
660 while ((end_of_matchp == (char *)0) &&
661 (stringp > repeat_startp)) {
662 stringp--;
663 end_of_matchp = test_string(stringp, regexp);
665 return (end_of_matchp);
668 static const char *
669 test_repeated_multibyte_char(const char *repeat_startp,
670 const char *stringp,
671 const char *regexp)
673 const char *end_of_matchp;
675 end_of_matchp = test_string(stringp, regexp);
676 while ((end_of_matchp == (char *)0) &&
677 (stringp > repeat_startp)) {
678 stringp = previous_charp(stringp);
679 end_of_matchp = test_string(stringp, regexp);
681 return (end_of_matchp);
684 static const char *
685 test_repeated_group(const char *repeat_startp,
686 const char *stringp,
687 const char *regexp)
689 const char *end_of_matchp;
691 end_of_matchp = test_string(stringp, regexp);
692 while ((end_of_matchp == (char *)0) &&
693 (stringp > repeat_startp)) {
694 stringp = pop_stringp();
695 if (stringp == (char *)0) {
696 return ((char *)0);
698 end_of_matchp = test_string(stringp, regexp);
700 return (end_of_matchp);
703 static const char *
704 test_string(const char *stringp,
705 const char *regexp)
708 * returns a pointer to the first character following the first
709 * substring of the string addressed by stringp that matches
710 * the compiled regular expression addressed by regexp
713 unsigned int group_length;
714 int nextra_matches_allowed;
715 int nmust_match;
716 wchar_t regex_wchar;
717 int regex_char_size;
718 const char *repeat_startp;
719 unsigned int return_argn;
720 wchar_t string_wchar;
721 int string_char_size;
722 unsigned int substringn;
723 char_test_condition_t test_condition;
724 const char *test_stringp;
726 for (;;) {
729 * Exit the loop via a return whenever there's a match
730 * or it's clear that there can be no match.
733 switch ((int)*regexp) {
736 * No fall-through.
737 * Each case ends with either a return or with stringp
738 * addressing the next character to be tested and regexp
739 * addressing the next compiled regular expression
741 * NOTE: The comments for each case give the meaning
742 * of the compiled regular expression decoded by the case
743 * and the character string that the compiled regular
744 * expression uses to encode the case. Each single
745 * character encoded in the compiled regular expression
746 * is shown enclosed in angle brackets (<>). Each
747 * compiled regular expression begins with a marker
748 * character which is shown as a named constant
749 * (e.g. <ASCII_CHAR>). Character constants are shown
750 * enclosed in single quotes (e.g. <'$'>). All other
751 * single characters encoded in the compiled regular
752 * expression are shown as lower case variable names
753 * (e.g. <ascii_char> or <multibyte_char>). Multicharacter
754 * strings encoded in the compiled regular expression
755 * are shown as variable names followed by elipses
756 * (e.g. <compiled_regex...>).
759 case ASCII_CHAR: /* single ASCII char */
761 /* encoded as <ASCII_CHAR><ascii_char> */
763 regexp++;
764 if (*regexp == *stringp) {
765 regexp++;
766 stringp++;
767 } else {
768 return ((char *)0);
770 break; /* end case ASCII_CHAR */
772 case MULTIBYTE_CHAR: /* single multibyte char */
774 /* encoded as <MULTIBYTE_CHAR><multibyte_char> */
776 regexp++;
777 regex_char_size = get_wchar(&regex_wchar, regexp);
778 string_char_size = get_wchar(&string_wchar, stringp);
779 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
780 return ((char *)0);
781 } else {
782 regexp += regex_char_size;
783 stringp += string_char_size;
785 break; /* end case MULTIBYTE_CHAR */
787 case ANY_CHAR: /* any single ASCII or multibyte char */
789 /* encoded as <ANY_CHAR> */
791 if (!multibyte) {
792 if (*stringp == '\0') {
793 return ((char *)0);
794 } else {
795 regexp++;
796 stringp++;
798 } else {
799 string_char_size = get_wchar(&string_wchar, stringp);
800 if (string_char_size <= 0) {
801 return ((char *)0);
802 } else {
803 regexp++;
804 stringp += string_char_size;
807 break; /* end case ANY_CHAR */
809 case IN_ASCII_CHAR_CLASS: /* [.....] */
810 case NOT_IN_ASCII_CHAR_CLASS:
813 * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...>
814 * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...>
816 * NOTE: <class_length> includes the <class_length> byte
819 if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) {
820 test_condition = IN_CLASS;
821 } else {
822 test_condition = NOT_IN_CLASS;
824 regexp++; /* point to the <class_length> byte */
826 if ((*stringp != '\0') &&
827 (test_char_against_ascii_class(*stringp, regexp,
828 test_condition) == CONDITION_TRUE)) {
829 regexp += (int)*regexp; /* add the class length to regexp */
830 stringp++;
831 } else {
832 return ((char *)0);
834 break; /* end case IN_ASCII_CHAR_CLASS */
836 case IN_MULTIBYTE_CHAR_CLASS: /* [....] */
837 case NOT_IN_MULTIBYTE_CHAR_CLASS:
840 * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
841 * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...>
843 * NOTE: <class_length> includes the <class_length> byte
846 if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) {
847 test_condition = IN_CLASS;
848 } else {
849 test_condition = NOT_IN_CLASS;
851 regexp++; /* point to the <class_length> byte */
853 string_char_size = get_wchar(&string_wchar, stringp);
854 if ((string_char_size > 0) &&
855 (test_char_against_multibyte_class(string_wchar, regexp,
856 test_condition) == CONDITION_TRUE)) {
857 regexp += (int)*regexp; /* add the class length to regexp */
858 stringp += string_char_size;
859 } else {
860 return ((char *)0);
862 break; /* end case IN_MULTIBYTE_CHAR_CLASS */
864 case IN_OLD_ASCII_CHAR_CLASS: /* [...] */
865 case NOT_IN_OLD_ASCII_CHAR_CLASS:
868 * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
869 * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...>
871 * NOTE: <class_length> includes the <class_length> byte
874 if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) {
875 test_condition = IN_CLASS;
876 } else {
877 test_condition = NOT_IN_CLASS;
879 regexp++; /* point to the <class_length> byte */
881 if ((*stringp != '\0') &&
882 (test_char_against_old_ascii_class(*stringp, regexp,
883 test_condition) == CONDITION_TRUE)) {
884 regexp += (int)*regexp; /* add the class length to regexp */
885 stringp++;
886 } else {
887 return ((char *)0);
889 break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */
891 case SIMPLE_GROUP: /* (.....) */
893 /* encoded as <SIMPLE_GROUP><group_length> */
895 regexp += 2;
896 break; /* end case SIMPLE_GROUP */
898 case END_GROUP: /* (.....) */
900 /* encoded as <END_GROUP><groupn> */
902 regexp += 2;
903 break; /* end case END_GROUP */
905 case SAVED_GROUP: /* (.....)$0-9 */
907 /* encoded as <SAVED_GROUP><substringn> */
909 regexp++;
910 substringn = (unsigned int)*regexp;
911 if (substringn >= NSUBSTRINGS)
912 return ((char *)0);
913 substring_startp[substringn] = stringp;
914 regexp++;
915 break; /* end case SAVED_GROUP */
917 case END_SAVED_GROUP: /* (.....)$0-9 */
920 * encoded as <END_SAVED_GROUP><substringn>\
921 * <return_arg_number[substringn]>
924 regexp++;
925 substringn = (unsigned int)*regexp;
926 if (substringn >= NSUBSTRINGS)
927 return ((char *)0);
928 substring_endp[substringn] = stringp;
929 regexp++;
930 return_argn = (unsigned int)*regexp;
931 if (return_argn >= NSUBSTRINGS)
932 return ((char *)0);
933 return_arg_number[substringn] = return_argn;
934 regexp++;
935 break; /* end case END_SAVED_GROUP */
937 case ASCII_CHAR|ZERO_OR_MORE: /* char* */
939 /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */
941 regexp++;
942 repeat_startp = stringp;
943 while (*stringp == *regexp) {
944 stringp++;
946 regexp++;
947 return (test_repeated_ascii_char(repeat_startp,
948 stringp, regexp));
950 /* end case ASCII_CHAR|ZERO_OR_MORE */
952 case ASCII_CHAR|ONE_OR_MORE: /* char+ */
954 /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */
956 regexp++;
957 if (*stringp != *regexp) {
958 return ((char *)0);
959 } else {
960 stringp++;
961 repeat_startp = stringp;
962 while (*stringp == *regexp) {
963 stringp++;
965 regexp++;
966 return (test_repeated_ascii_char(repeat_startp, stringp,
967 regexp));
969 /* end case ASCII_CHAR|ONE_OR_MORE */
971 case ASCII_CHAR|COUNT: /* char{min_count,max_count} */
974 * encoded as <ASCII_CHAR|COUNT><ascii_char>\
975 * <minimum_match_count><maximum_match_count>
978 regexp++;
979 get_match_counts(&nmust_match, &nextra_matches_allowed,
980 regexp + 1);
981 while ((*stringp == *regexp) && (nmust_match > 0)) {
982 nmust_match--;
983 stringp++;
985 if (nmust_match > 0) {
986 return ((char *)0);
987 } else if (nextra_matches_allowed == UNLIMITED) {
988 repeat_startp = stringp;
989 while (*stringp == *regexp) {
990 stringp++;
992 regexp += 3;
993 return (test_repeated_ascii_char(repeat_startp, stringp,
994 regexp));
995 } else {
996 repeat_startp = stringp;
997 while ((*stringp == *regexp) &&
998 (nextra_matches_allowed > 0)) {
999 nextra_matches_allowed--;
1000 stringp++;
1002 regexp += 3;
1003 return (test_repeated_ascii_char(repeat_startp, stringp,
1004 regexp));
1006 /* end case ASCII_CHAR|COUNT */
1008 case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */
1010 /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */
1012 regexp++;
1013 regex_char_size = get_wchar(&regex_wchar, regexp);
1014 repeat_startp = stringp;
1015 string_char_size = get_wchar(&string_wchar, stringp);
1016 while ((string_char_size > 0) &&
1017 (string_wchar == regex_wchar)) {
1018 stringp += string_char_size;
1019 string_char_size = get_wchar(&string_wchar, stringp);
1021 regexp += regex_char_size;
1022 return (test_repeated_multibyte_char(repeat_startp, stringp,
1023 regexp));
1025 /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */
1027 case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */
1029 /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */
1031 regexp++;
1032 regex_char_size = get_wchar(&regex_wchar, regexp);
1033 string_char_size = get_wchar(&string_wchar, stringp);
1034 if ((string_char_size <= 0) || (string_wchar != regex_wchar)) {
1035 return ((char *)0);
1036 } else {
1037 stringp += string_char_size;
1038 repeat_startp = stringp;
1039 string_char_size = get_wchar(&string_wchar, stringp);
1040 while ((string_char_size > 0) &&
1041 (string_wchar == regex_wchar)) {
1042 stringp += string_char_size;
1043 string_char_size = get_wchar(&string_wchar, stringp);
1045 regexp += regex_char_size;
1046 return (test_repeated_multibyte_char(repeat_startp, stringp,
1047 regexp));
1049 /* end case MULTIBYTE_CHAR|ONE_OR_MORE */
1051 case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */
1054 * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\
1055 * <minimum_match_count><maximum_match_count>
1058 regexp++;
1059 regex_char_size = get_wchar(&regex_wchar, regexp);
1060 get_match_counts(&nmust_match, &nextra_matches_allowed,
1061 regexp + regex_char_size);
1062 string_char_size = get_wchar(&string_wchar, stringp);
1063 while ((string_char_size > 0) &&
1064 (string_wchar == regex_wchar) &&
1065 (nmust_match > 0)) {
1067 nmust_match--;
1068 stringp += string_char_size;
1069 string_char_size = get_wchar(&string_wchar, stringp);
1071 if (nmust_match > 0) {
1072 return ((char *)0);
1073 } else if (nextra_matches_allowed == UNLIMITED) {
1074 repeat_startp = stringp;
1075 while ((string_char_size > 0) &&
1076 (string_wchar == regex_wchar)) {
1077 stringp += string_char_size;
1078 string_char_size = get_wchar(&string_wchar, stringp);
1080 regexp += regex_char_size + 2;
1081 return (test_repeated_multibyte_char(repeat_startp, stringp,
1082 regexp));
1083 } else {
1084 repeat_startp = stringp;
1085 while ((string_char_size > 0) &&
1086 (string_wchar == regex_wchar) &&
1087 (nextra_matches_allowed > 0)) {
1088 nextra_matches_allowed--;
1089 stringp += string_char_size;
1090 string_char_size = get_wchar(&string_wchar, stringp);
1092 regexp += regex_char_size + 2;
1093 return (test_repeated_multibyte_char(repeat_startp, stringp,
1094 regexp));
1096 /* end case MULTIBYTE_CHAR|COUNT */
1098 case ANY_CHAR|ZERO_OR_MORE: /* .* */
1100 /* encoded as <ANY_CHAR|ZERO_OR_MORE> */
1102 repeat_startp = stringp;
1103 if (!multibyte) {
1104 while (*stringp != '\0') {
1105 stringp++;
1107 regexp++;
1108 return (test_repeated_ascii_char(repeat_startp, stringp,
1109 regexp));
1110 } else {
1111 string_char_size = get_wchar(&string_wchar, stringp);
1112 while (string_char_size > 0) {
1113 stringp += string_char_size;
1114 string_char_size = get_wchar(&string_wchar, stringp);
1116 regexp++;
1117 return (test_repeated_multibyte_char(repeat_startp, stringp,
1118 regexp));
1120 /* end case <ANY_CHAR|ZERO_OR_MORE> */
1122 case ANY_CHAR|ONE_OR_MORE: /* .+ */
1124 /* encoded as <ANY_CHAR|ONE_OR_MORE> */
1126 if (!multibyte) {
1127 if (*stringp == '\0') {
1128 return ((char *)0);
1129 } else {
1130 stringp++;
1131 repeat_startp = stringp;
1132 while (*stringp != '\0') {
1133 stringp++;
1135 regexp++;
1136 return (test_repeated_ascii_char(repeat_startp, stringp,
1137 regexp));
1139 } else {
1140 string_char_size = get_wchar(&string_wchar, stringp);
1141 if (string_char_size <= 0) {
1142 return ((char *)0);
1143 } else {
1144 stringp += string_char_size;
1145 repeat_startp = stringp;
1146 string_char_size = get_wchar(&string_wchar, stringp);
1147 while (string_char_size > 0) {
1148 stringp += string_char_size;
1149 string_char_size =
1150 get_wchar(&string_wchar, stringp);
1152 regexp++;
1153 return (test_repeated_multibyte_char(repeat_startp,
1154 stringp, regexp));
1157 /* end case <ANY_CHAR|ONE_OR_MORE> */
1159 case ANY_CHAR|COUNT: /* .{min_count,max_count} */
1162 * encoded as <ANY_CHAR|COUNT>\
1163 * <minimum_match_count><maximum_match_count>
1166 get_match_counts(&nmust_match, &nextra_matches_allowed,
1167 regexp + 1);
1168 if (!multibyte) {
1169 while ((*stringp != '\0') && (nmust_match > 0)) {
1170 nmust_match--;
1171 stringp++;
1173 if (nmust_match > 0) {
1174 return ((char *)0);
1175 } else if (nextra_matches_allowed == UNLIMITED) {
1176 repeat_startp = stringp;
1177 while (*stringp != '\0') {
1178 stringp++;
1180 regexp += 3;
1181 return (test_repeated_ascii_char(repeat_startp, stringp,
1182 regexp));
1183 } else {
1184 repeat_startp = stringp;
1185 while ((*stringp != '\0') &&
1186 (nextra_matches_allowed > 0)) {
1187 nextra_matches_allowed--;
1188 stringp++;
1190 regexp += 3;
1191 return (test_repeated_ascii_char(repeat_startp, stringp,
1192 regexp));
1194 } else { /* multibyte character */
1196 string_char_size = get_wchar(&string_wchar, stringp);
1197 while ((string_char_size > 0) && (nmust_match > 0)) {
1198 nmust_match--;
1199 stringp += string_char_size;
1200 string_char_size = get_wchar(&string_wchar, stringp);
1202 if (nmust_match > 0) {
1203 return ((char *)0);
1204 } else if (nextra_matches_allowed == UNLIMITED) {
1205 repeat_startp = stringp;
1206 while (string_char_size > 0) {
1207 stringp += string_char_size;
1208 string_char_size =
1209 get_wchar(&string_wchar, stringp);
1211 regexp += 3;
1212 return (test_repeated_multibyte_char(repeat_startp,
1213 stringp, regexp));
1214 } else {
1215 repeat_startp = stringp;
1216 while ((string_char_size > 0) &&
1217 (nextra_matches_allowed > 0)) {
1218 nextra_matches_allowed--;
1219 stringp += string_char_size;
1220 string_char_size =
1221 get_wchar(&string_wchar, stringp);
1223 regexp += 3;
1224 return (test_repeated_multibyte_char(repeat_startp,
1225 stringp, regexp));
1227 } /* end case ANY_CHAR|COUNT */
1229 case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1230 case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1233 * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1234 * <class_length><class ...>
1235 * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1236 * <class_length><class ...>
1238 * NOTE: <class_length> includes the <class_length> byte
1241 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1242 test_condition = IN_CLASS;
1243 } else {
1244 test_condition = NOT_IN_CLASS;
1246 regexp++; /* point to the <class_length> byte */
1248 repeat_startp = stringp;
1249 while ((*stringp != '\0') &&
1250 (test_char_against_ascii_class(*stringp, regexp,
1251 test_condition) == CONDITION_TRUE)) {
1252 stringp++;
1254 regexp += (int)*regexp; /* add the class length to regexp */
1255 return (test_repeated_ascii_char(repeat_startp, stringp,
1256 regexp));
1258 /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1260 case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1261 case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE:
1264 * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1265 * <class_length><class ...>
1266 * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1267 * <class_length><class ...>
1269 * NOTE: <class_length> includes the <class_length> byte
1272 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1273 test_condition = IN_CLASS;
1274 } else {
1275 test_condition = NOT_IN_CLASS;
1277 regexp++; /* point to the <class_length> byte */
1279 if ((*stringp == '\0') ||
1280 (test_char_against_ascii_class(*stringp, regexp,
1281 test_condition) != CONDITION_TRUE)) {
1282 return ((char *)0);
1283 } else {
1284 stringp++;
1285 repeat_startp = stringp;
1286 while ((*stringp != '\0') &&
1287 (test_char_against_ascii_class(*stringp, regexp,
1288 test_condition) == CONDITION_TRUE)) {
1289 stringp++;
1291 regexp += (int)*regexp; /* add the class length to regexp */
1292 return (test_repeated_ascii_char(repeat_startp, stringp,
1293 regexp));
1295 /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */
1297 case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */
1298 case NOT_IN_ASCII_CHAR_CLASS | COUNT:
1301 * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1302 * <class ...><minimum_match_count>\
1303 * <maximum_match_count>
1304 * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\
1305 * <class ...><minimum_match_count>\
1306 * <maximum_match_count>
1308 * NOTE: <class_length> includes the <class_length> byte,
1309 * but not the <minimum_match_count> or
1310 * <maximum_match_count> bytes
1313 if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) {
1314 test_condition = IN_CLASS;
1315 } else {
1316 test_condition = NOT_IN_CLASS;
1318 regexp++; /* point to the <class_length> byte */
1320 get_match_counts(&nmust_match, &nextra_matches_allowed,
1321 regexp + (int)*regexp);
1322 while ((*stringp != '\0') &&
1323 (test_char_against_ascii_class(*stringp, regexp,
1324 test_condition) == CONDITION_TRUE) &&
1325 (nmust_match > 0)) {
1326 nmust_match--;
1327 stringp++;
1329 if (nmust_match > 0) {
1330 return ((char *)0);
1331 } else if (nextra_matches_allowed == UNLIMITED) {
1332 repeat_startp = stringp;
1333 while ((*stringp != '\0') &&
1334 (test_char_against_ascii_class(*stringp, regexp,
1335 test_condition) == CONDITION_TRUE)) {
1336 stringp++;
1338 regexp += (int)*regexp + 2;
1339 return (test_repeated_ascii_char(repeat_startp, stringp,
1340 regexp));
1341 } else {
1342 repeat_startp = stringp;
1343 while ((*stringp != '\0') &&
1344 (test_char_against_ascii_class(*stringp, regexp,
1345 test_condition) == CONDITION_TRUE) &&
1346 (nextra_matches_allowed > 0)) {
1347 nextra_matches_allowed--;
1348 stringp++;
1350 regexp += (int)*regexp + 2;
1351 return (test_repeated_ascii_char(repeat_startp, stringp,
1352 regexp));
1354 /* end case IN_ASCII_CHAR_CLASS|COUNT */
1356 case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1357 case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE:
1360 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1361 * <class_length><class ...>
1362 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\
1363 * <class_length><class ...>
1365 * NOTE: <class_length> includes the <class_length> byte
1368 if ((int)*regexp ==
1369 (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) {
1370 test_condition = IN_CLASS;
1371 } else {
1372 test_condition = NOT_IN_CLASS;
1374 regexp++; /* point to the <class_length> byte */
1376 repeat_startp = stringp;
1377 string_char_size = get_wchar(&string_wchar, stringp);
1378 while ((string_char_size > 0) &&
1379 (test_char_against_multibyte_class(string_wchar, regexp,
1380 test_condition) == CONDITION_TRUE)) {
1381 stringp += string_char_size;
1382 string_char_size = get_wchar(&string_wchar, stringp);
1384 regexp += (int)*regexp; /* add the class length to regexp */
1385 return (test_repeated_multibyte_char(repeat_startp, stringp,
1386 regexp));
1388 /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */
1390 case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1391 case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE:
1394 * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1395 * <class_length><class ...>
1396 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\
1397 * <class_length><class ...>
1399 * NOTE: <class_length> includes the <class_length> byte
1402 if ((int)*regexp ==
1403 (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) {
1404 test_condition = IN_CLASS;
1405 } else {
1406 test_condition = NOT_IN_CLASS;
1408 regexp++; /* point to the <class_length> byte */
1410 string_char_size = get_wchar(&string_wchar, stringp);
1411 if ((string_char_size <= 0) ||
1412 (test_char_against_multibyte_class(string_wchar, regexp,
1413 test_condition) != CONDITION_TRUE)) {
1414 return ((char *)0);
1415 } else {
1416 stringp += string_char_size;
1417 repeat_startp = stringp;
1418 string_char_size = get_wchar(&string_wchar, stringp);
1419 while ((string_char_size > 0) &&
1420 (test_char_against_multibyte_class(string_wchar,
1421 regexp, test_condition) == CONDITION_TRUE)) {
1422 stringp += string_char_size;
1423 string_char_size = get_wchar(&string_wchar, stringp);
1425 regexp += (int)*regexp; /* add the class length to regexp */
1426 return (test_repeated_multibyte_char(repeat_startp, stringp,
1427 regexp));
1429 /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */
1431 case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1432 case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT:
1435 * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1436 * <class_length><class ...><min_count><max_count>
1437 * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\
1438 * <class_length><class ...><min_count><max_count>
1440 * NOTE: <class_length> includes the <class_length> byte
1441 * but not the <minimum_match_count> or
1442 * <maximum_match_count> bytes
1445 if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) {
1446 test_condition = IN_CLASS;
1447 } else {
1448 test_condition = NOT_IN_CLASS;
1450 regexp++; /* point to the <class_length> byte */
1452 get_match_counts(&nmust_match, &nextra_matches_allowed,
1453 regexp + (int)*regexp);
1454 string_char_size = get_wchar(&string_wchar, stringp);
1455 while ((string_char_size > 0) &&
1456 (test_char_against_multibyte_class(string_wchar, regexp,
1457 test_condition) == CONDITION_TRUE) &&
1458 (nmust_match > 0)) {
1459 nmust_match--;
1460 stringp += string_char_size;
1461 string_char_size = get_wchar(&string_wchar, stringp);
1463 if (nmust_match > 0) {
1464 return ((char *)0);
1465 } else if (nextra_matches_allowed == UNLIMITED) {
1466 repeat_startp = stringp;
1467 while ((string_char_size > 0) &&
1468 (test_char_against_multibyte_class(string_wchar,
1469 regexp, test_condition) == CONDITION_TRUE)) {
1470 stringp += string_char_size;
1471 string_char_size = get_wchar(&string_wchar, stringp);
1473 regexp += (int)*regexp + 2;
1474 return (test_repeated_multibyte_char(repeat_startp, stringp,
1475 regexp));
1476 } else {
1477 repeat_startp = stringp;
1478 while ((string_char_size > 0) &&
1479 (test_char_against_multibyte_class(string_wchar,
1480 regexp, test_condition) == CONDITION_TRUE) &&
1481 (nextra_matches_allowed > 0)) {
1482 nextra_matches_allowed--;
1483 stringp += string_char_size;
1484 string_char_size = get_wchar(&string_wchar, stringp);
1486 regexp += (int)*regexp + 2;
1487 return (test_repeated_multibyte_char(repeat_startp, stringp,
1488 regexp));
1490 /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */
1492 case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */
1493 case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE:
1496 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1497 * <class_length><class ...>
1498 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\
1499 * <class_length><class ...>
1501 * NOTE: <class_length> includes the <class_length> byte
1504 if ((int)*regexp ==
1505 (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) {
1506 test_condition = IN_CLASS;
1507 } else {
1508 test_condition = NOT_IN_CLASS;
1510 regexp++; /* point to the <class_length> byte */
1512 repeat_startp = stringp;
1513 while ((*stringp != '\0') &&
1514 (test_char_against_old_ascii_class(*stringp, regexp,
1515 test_condition) == CONDITION_TRUE)) {
1516 stringp++;
1518 regexp += (int)*regexp; /* add the class length to regexp */
1519 return (test_repeated_ascii_char(repeat_startp, stringp,
1520 regexp));
1522 /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */
1524 case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */
1525 case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE:
1528 * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1529 * <class_length><class ...>
1530 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\
1531 * <class_length><class ...>
1533 * NOTE: <class length> includes the <class_length> byte
1536 if ((int)*regexp ==
1537 (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) {
1538 test_condition = IN_CLASS;
1539 } else {
1540 test_condition = NOT_IN_CLASS;
1542 regexp++; /* point to the <class_length> byte */
1544 if ((*stringp == '\0') ||
1545 (test_char_against_old_ascii_class(*stringp, regexp,
1546 test_condition) != CONDITION_TRUE)) {
1547 return ((char *)0);
1548 } else {
1549 stringp++;
1550 repeat_startp = stringp;
1551 while ((*stringp != '\0') &&
1552 (test_char_against_old_ascii_class(*stringp, regexp,
1553 test_condition) == CONDITION_TRUE)) {
1554 stringp++;
1556 regexp += (int)*regexp; /* add the class length to regexp */
1557 return (test_repeated_ascii_char(repeat_startp, stringp,
1558 regexp));
1560 /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */
1562 case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */
1563 case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT:
1566 * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\
1567 * <class ...><minimum_match_count>\
1568 * <maximum_match_count>
1569 * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\
1570 * <class_length><class ...><minimum_match_count>\
1571 * <maximum_match_count>
1573 * NOTE: <class_length> includes the <class_length> byte
1574 * but not the <minimum_match_count> or
1575 * <maximum_match_count> bytes
1578 if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) {
1579 test_condition = IN_CLASS;
1580 } else {
1581 test_condition = NOT_IN_CLASS;
1583 regexp++; /* point to the <class_length> byte */
1585 get_match_counts(&nmust_match, &nextra_matches_allowed,
1586 regexp + (int)*regexp);
1587 while ((*stringp != '\0') &&
1588 (test_char_against_old_ascii_class(*stringp, regexp,
1589 test_condition) == CONDITION_TRUE) &&
1590 (nmust_match > 0)) {
1591 nmust_match--;
1592 stringp++;
1594 if (nmust_match > 0) {
1595 return ((char *)0);
1596 } else if (nextra_matches_allowed == UNLIMITED) {
1597 repeat_startp = stringp;
1598 while ((*stringp != '\0') &&
1599 (test_char_against_old_ascii_class(*stringp, regexp,
1600 test_condition) == CONDITION_TRUE)) {
1601 stringp++;
1603 regexp += (int)*regexp + 2;
1604 return (test_repeated_ascii_char(repeat_startp, stringp,
1605 regexp));
1606 } else {
1607 repeat_startp = stringp;
1608 while ((*stringp != '\0') &&
1609 (test_char_against_old_ascii_class(*stringp, regexp,
1610 test_condition) == CONDITION_TRUE) &&
1611 (nextra_matches_allowed > 0)) {
1612 nextra_matches_allowed--;
1613 stringp++;
1615 regexp += (int)*regexp + 2;
1616 return (test_repeated_ascii_char(repeat_startp, stringp,
1617 regexp));
1619 /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */
1621 case ZERO_OR_MORE_GROUP: /* (.....)* */
1622 case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1623 case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1624 case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1627 * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1628 * <group_length><compiled_regex...>\
1629 * <END_GROUP|ZERO_OR_MORE><groupn>
1631 * NOTE:
1633 * group_length + (256 * ADDED_LENGTH_BITS) ==
1634 * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\
1635 * <groupn>)
1639 group_length =
1640 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1641 TIMES_256_SHIFT);
1642 regexp++;
1643 group_length += (unsigned int)*regexp;
1644 regexp++;
1645 repeat_startp = stringp;
1646 test_stringp = test_string(stringp, regexp);
1647 while (test_stringp != (char *)0) {
1648 if (push_stringp(stringp) == (char *)0)
1649 return ((char *)0);
1650 stringp = test_stringp;
1651 test_stringp = test_string(stringp, regexp);
1653 regexp += group_length;
1654 return (test_repeated_group(repeat_startp, stringp, regexp));
1656 /* end case ZERO_OR_MORE_GROUP */
1658 case END_GROUP|ZERO_OR_MORE: /* (.....)* */
1660 /* encoded as <END_GROUP|ZERO_OR_MORE> */
1662 /* return from recursive call to test_string() */
1664 return ((char *)stringp);
1666 /* end case END_GROUP|ZERO_OR_MORE */
1668 case ONE_OR_MORE_GROUP: /* (.....)+ */
1669 case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH:
1670 case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH:
1671 case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH:
1674 * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\
1675 * <group_length><compiled_regex...>\
1676 * <END_GROUP|ONE_OR_MORE><groupn>
1678 * NOTE:
1680 * group_length + (256 * ADDED_LENGTH_BITS) ==
1681 * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\
1682 * <groupn>)
1685 group_length =
1686 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1687 TIMES_256_SHIFT);
1688 regexp++;
1689 group_length += (unsigned int)*regexp;
1690 regexp++;
1691 stringp = test_string(stringp, regexp);
1692 if (stringp == (char *)0)
1693 return ((char *)0);
1694 repeat_startp = stringp;
1695 test_stringp = test_string(stringp, regexp);
1696 while (test_stringp != (char *)0) {
1697 if (push_stringp(stringp) == (char *)0)
1698 return ((char *)0);
1699 stringp = test_stringp;
1700 test_stringp = test_string(stringp, regexp);
1702 regexp += group_length;
1703 return (test_repeated_group(repeat_startp, stringp, regexp));
1705 /* end case ONE_OR_MORE_GROUP */
1707 case END_GROUP|ONE_OR_MORE: /* (.....)+ */
1709 /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */
1711 /* return from recursive call to test_string() */
1713 return ((char *)stringp);
1715 /* end case END_GROUP|ONE_OR_MORE */
1717 case COUNTED_GROUP: /* (.....){max_count,min_count} */
1718 case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH:
1719 case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH:
1720 case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH:
1723 * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\
1724 * <compiled_regex...>\<END_GROUP|COUNT><groupn>\
1725 * <minimum_match_count><maximum_match_count>
1727 * NOTE:
1729 * group_length + (256 * ADDED_LENGTH_BITS) ==
1730 * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>)
1732 * but does not include the <minimum_match_count> or
1733 * <maximum_match_count> bytes
1736 group_length =
1737 (((unsigned int)*regexp & ADDED_LENGTH_BITS) <<
1738 TIMES_256_SHIFT);
1739 regexp++;
1740 group_length += (unsigned int)*regexp;
1741 regexp++;
1742 get_match_counts(&nmust_match, &nextra_matches_allowed,
1743 regexp + group_length);
1744 test_stringp = test_string(stringp, regexp);
1745 while ((test_stringp != (char *)0) && (nmust_match > 0)) {
1746 stringp = test_stringp;
1747 nmust_match--;
1748 test_stringp = test_string(stringp, regexp);
1750 if (nmust_match > 0) {
1751 return ((char *)0);
1752 } else if (nextra_matches_allowed == UNLIMITED) {
1753 repeat_startp = stringp;
1754 while (test_stringp != (char *)0) {
1755 if (push_stringp(stringp) == (char *)0)
1756 return ((char *)0);
1757 stringp = test_stringp;
1758 test_stringp = test_string(stringp, regexp);
1760 regexp += group_length + 2;
1761 return (test_repeated_group(repeat_startp, stringp,
1762 regexp));
1763 } else {
1764 repeat_startp = stringp;
1765 while ((test_stringp != (char *)0) &&
1766 (nextra_matches_allowed > 0)) {
1767 nextra_matches_allowed--;
1768 if (push_stringp(stringp) == (char *)0)
1769 return ((char *)0);
1770 stringp = test_stringp;
1771 test_stringp = test_string(stringp, regexp);
1773 regexp += group_length + 2;
1774 return (test_repeated_group(repeat_startp, stringp,
1775 regexp));
1777 /* end case COUNTED_GROUP */
1779 case END_GROUP|COUNT: /* (.....){max_count,min_count} */
1781 /* encoded as <END_GROUP|COUNT> */
1783 /* return from recursive call to test_string() */
1785 return (stringp);
1787 /* end case END_GROUP|COUNT */
1789 case END_OF_STRING_MARK:
1791 /* encoded as <END_OF_STRING_MARK><END_REGEX> */
1793 if (*stringp == '\0') {
1794 regexp++;
1795 } else {
1796 return ((char *)0);
1798 break; /* end case END_OF_STRING_MARK */
1800 case END_REGEX: /* end of the compiled regular expression */
1802 /* encoded as <END_REGEX> */
1804 return (stringp);
1806 /* end case END_REGEX */
1808 default:
1810 return ((char *)0);
1812 } /* end switch (*regexp) */
1814 } /* end for (;;) */
1816 } /* test_string() */