1 /* Extended regular expression matching and search library, version
2 0.12. (Implements POSIX draft P10003.2/D11.2, except for
3 internationalization features.)
5 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
22 /* AIX requires this to be the first thing in the file. */
23 #if defined (_AIX) && !defined (REGEX_MALLOC)
31 /* Converts the pointer to the char to BEG-based offset from the start. */
32 #define PTR_TO_OFFSET(d) \
33 POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \
34 ? (d) - string1 : (d) - (string2 - size1))
35 #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
37 #define PTR_TO_OFFSET(d) 0
44 /* We need this for `regex.h', and perhaps for the Emacs include files. */
45 #include <sys/types.h>
47 /* This is for other GNU distributions with internationalized messages. */
48 #if HAVE_LIBINTL_H || defined (_LIBC)
51 # define gettext(msgid) (msgid)
55 /* This define is so xgettext can find the internationalizable
57 #define gettext_noop(String) String
60 /* The `emacs' switch turns on certain matching commands
61 that make sense only in Emacs. */
67 /* Make syntax table lookup grant data in gl_state. */
68 #define SYNTAX_ENTRY_VIA_PROPERTY
74 #define malloc xmalloc
75 #define realloc xrealloc
80 /* If we are not linking with Emacs proper,
81 we can't use the relocating allocator
82 even if config.h says that we can. */
85 #if defined (STDC_HEADERS) || defined (_LIBC)
92 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
93 If nothing else has been done, use the method below. */
94 #ifdef INHIBIT_STRING_HEADER
95 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
96 #if !defined (bzero) && !defined (bcopy)
97 #undef INHIBIT_STRING_HEADER
102 /* This is the normal way of making sure we have a bcopy and a bzero.
103 This is used in most programs--a few other programs avoid this
104 by defining INHIBIT_STRING_HEADER. */
105 #ifndef INHIBIT_STRING_HEADER
106 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
109 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
112 #define bcopy(s, d, n) memcpy ((d), (s), (n))
115 #define bzero(s, n) memset ((s), 0, (n))
122 /* Define the syntax stuff for \<, \>, etc. */
124 /* This must be nonzero for the wordchar and notwordchar pattern
125 commands in re_match_2. */
130 #ifdef SWITCH_ENUM_BUG
131 #define SWITCH_ENUM_CAST(x) ((int)(x))
133 #define SWITCH_ENUM_CAST(x) (x)
138 extern char *re_syntax_table
;
140 #else /* not SYNTAX_TABLE */
142 /* How many characters in the character set. */
143 #define CHAR_SET_SIZE 256
145 static char re_syntax_table
[CHAR_SET_SIZE
];
156 bzero (re_syntax_table
, sizeof re_syntax_table
);
158 for (c
= 'a'; c
<= 'z'; c
++)
159 re_syntax_table
[c
] = Sword
;
161 for (c
= 'A'; c
<= 'Z'; c
++)
162 re_syntax_table
[c
] = Sword
;
164 for (c
= '0'; c
<= '9'; c
++)
165 re_syntax_table
[c
] = Sword
;
167 re_syntax_table
['_'] = Sword
;
172 #endif /* not SYNTAX_TABLE */
174 #define SYNTAX(c) re_syntax_table[c]
176 /* Dummy macros for non-Emacs environments. */
177 #define BASE_LEADING_CODE_P(c) (0)
178 #define WORD_BOUNDARY_P(c1, c2) (0)
179 #define CHAR_HEAD_P(p) (1)
180 #define SINGLE_BYTE_CHAR_P(c) (1)
181 #define SAME_CHARSET_P(c1, c2) (1)
182 #define MULTIBYTE_FORM_LENGTH(p, s) (1)
183 #define STRING_CHAR(p, s) (*(p))
184 #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
185 #define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
186 (c = ((p) == (end1) ? *(str2) : *(p)))
187 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
188 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
189 #endif /* not emacs */
191 /* Get the interface, including the syntax bits. */
194 /* isalpha etc. are used for the character classes. */
199 /* 1 if C is an ASCII character. */
200 #define IS_REAL_ASCII(c) ((c) < 0200)
202 /* 1 if C is a unibyte character. */
203 #define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
205 /* The Emacs definitions should not be directly affected by locales. */
207 /* In Emacs, these are only used for single-byte characters. */
208 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
209 #define ISCNTRL(c) ((c) < ' ')
210 #define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \
211 || ((c) >= 'a' && (c) <= 'f') \
212 || ((c) >= 'A' && (c) <= 'F'))
214 /* This is only used for single-byte characters. */
215 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
217 /* The rest must handle multibyte characters. */
219 #define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
220 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
223 #define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
224 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
227 #define ISALNUM(c) (IS_REAL_ASCII (c) \
228 ? (((c) >= 'a' && (c) <= 'z') \
229 || ((c) >= 'A' && (c) <= 'Z') \
230 || ((c) >= '0' && (c) <= '9')) \
231 : SYNTAX (c) == Sword)
233 #define ISALPHA(c) (IS_REAL_ASCII (c) \
234 ? (((c) >= 'a' && (c) <= 'z') \
235 || ((c) >= 'A' && (c) <= 'Z')) \
236 : SYNTAX (c) == Sword)
238 #define ISLOWER(c) (LOWERCASEP (c))
240 #define ISPUNCT(c) (IS_REAL_ASCII (c) \
241 ? ((c) > ' ' && (c) < 0177 \
242 && !(((c) >= 'a' && (c) <= 'z') \
243 || ((c) >= 'A' && (c) <= 'Z') \
244 || ((c) >= '0' && (c) <= '9'))) \
245 : SYNTAX (c) != Sword)
247 #define ISSPACE(c) (SYNTAX (c) == Swhitespace)
249 #define ISUPPER(c) (UPPERCASEP (c))
251 #define ISWORD(c) (SYNTAX (c) == Sword)
253 #else /* not emacs */
255 /* Jim Meyering writes:
257 "... Some ctype macros are valid only for character codes that
258 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
259 using /bin/cc or gcc but without giving an ansi option). So, all
260 ctype uses should be through macros like ISPRINT... If
261 STDC_HEADERS is defined, then autoconf has verified that the ctype
262 macros don't need to be guarded with references to isascii. ...
263 Defining isascii to 1 should let any compiler worth its salt
264 eliminate the && through constant folding." */
266 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
269 #define ISASCII(c) isascii(c)
272 /* 1 if C is an ASCII character. */
273 #define IS_REAL_ASCII(c) ((c) < 0200)
275 /* This distinction is not meaningful, except in Emacs. */
276 #define ISUNIBYTE(c) 1
278 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
279 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
280 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
283 #define ISBLANK(c) (ISASCII (c) && isblank (c))
285 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
288 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
290 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
293 #define ISPRINT(c) (ISASCII (c) && isprint (c))
294 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
295 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
296 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
297 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
298 #define ISLOWER(c) (ISASCII (c) && islower (c))
299 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
300 #define ISSPACE(c) (ISASCII (c) && isspace (c))
301 #define ISUPPER(c) (ISASCII (c) && isupper (c))
302 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
304 #define ISWORD(c) ISALPHA(c)
306 #endif /* not emacs */
309 #define NULL (void *)0
312 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
313 since ours (we hope) works properly with all combinations of
314 machines, compilers, `char' and `unsigned char' argument types.
315 (Per Bothner suggested the basic approach.) */
316 #undef SIGN_EXTEND_CHAR
318 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
319 #else /* not __STDC__ */
320 /* As in Harbison and Steele. */
321 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
324 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
325 use `alloca' instead of `malloc'. This is because using malloc in
326 re_search* or re_match* could cause memory leaks when C-g is used in
327 Emacs; also, malloc is slower and causes storage fragmentation. On
328 the other hand, malloc is more portable, and easier to debug.
330 Because we sometimes use alloca, some routines have to be macros,
331 not functions -- `alloca'-allocated space disappears at the end of the
332 function it is called in. */
336 #define REGEX_ALLOCATE malloc
337 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
338 #define REGEX_FREE free
340 #else /* not REGEX_MALLOC */
342 /* Emacs already defines alloca, sometimes. */
345 /* Make alloca work the best possible way. */
347 #define alloca __builtin_alloca
348 #else /* not __GNUC__ */
351 #else /* not __GNUC__ or HAVE_ALLOCA_H */
352 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */
353 #ifndef _AIX /* Already did AIX, up at the top. */
355 #endif /* not _AIX */
357 #endif /* not HAVE_ALLOCA_H */
358 #endif /* not __GNUC__ */
360 #endif /* not alloca */
362 #define REGEX_ALLOCATE alloca
364 /* Assumes a `char *destination' variable. */
365 #define REGEX_REALLOCATE(source, osize, nsize) \
366 (destination = (char *) alloca (nsize), \
367 bcopy (source, destination, osize), \
370 /* No need to do anything to free, after alloca. */
371 #define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
373 #endif /* not REGEX_MALLOC */
375 /* Define how to allocate the failure stack. */
377 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
379 #define REGEX_ALLOCATE_STACK(size) \
380 r_alloc (&failure_stack_ptr, (size))
381 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
382 r_re_alloc (&failure_stack_ptr, (nsize))
383 #define REGEX_FREE_STACK(ptr) \
384 r_alloc_free (&failure_stack_ptr)
386 #else /* not using relocating allocator */
390 #define REGEX_ALLOCATE_STACK malloc
391 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
392 #define REGEX_FREE_STACK free
394 #else /* not REGEX_MALLOC */
396 #define REGEX_ALLOCATE_STACK alloca
398 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
399 REGEX_REALLOCATE (source, osize, nsize)
400 /* No need to explicitly free anything. */
401 #define REGEX_FREE_STACK(arg)
403 #endif /* not REGEX_MALLOC */
404 #endif /* not using relocating allocator */
407 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
408 `string1' or just past its end. This works if PTR is NULL, which is
410 #define FIRST_STRING_P(ptr) \
411 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
413 /* (Re)Allocate N items of type T using malloc, or fail. */
414 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
415 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
416 #define RETALLOC_IF(addr, n, t) \
417 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
418 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
420 #define BYTEWIDTH 8 /* In bits. */
422 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
426 #define MAX(a, b) ((a) > (b) ? (a) : (b))
427 #define MIN(a, b) ((a) < (b) ? (a) : (b))
429 typedef char boolean
;
433 static int re_match_2_internal ();
435 /* These are the command codes that appear in compiled regular
436 expressions. Some opcodes are followed by argument bytes. A
437 command code can specify any interpretation whatsoever for its
438 arguments. Zero bytes may appear in the compiled regular expression. */
444 /* Succeed right away--no more backtracking. */
447 /* Followed by one byte giving n, then by n literal bytes. */
450 /* Matches any (more or less) character. */
453 /* Matches any one char belonging to specified set. First
454 following byte is number of bitmap bytes. Then come bytes
455 for a bitmap saying which chars are in. Bits in each byte
456 are ordered low-bit-first. A character is in the set if its
457 bit is 1. A character too large to have a bit in the map is
458 automatically not in the set.
460 If the length byte has the 0x80 bit set, then that stuff
461 is followed by a range table:
462 2 bytes of flags for character sets (low 8 bits, high 8 bits)
463 See RANGE_TABLE_WORK_BITS below.
464 2 bytes, the number of pairs that follow
465 pairs, each 2 multibyte characters,
466 each multibyte character represented as 3 bytes. */
469 /* Same parameters as charset, but match any character that is
470 not one of those specified. */
473 /* Start remembering the text that is matched, for storing in a
474 register. Followed by one byte with the register number, in
475 the range 0 to one less than the pattern buffer's re_nsub
476 field. Then followed by one byte with the number of groups
477 inner to this one. (This last has to be part of the
478 start_memory only because we need it in the on_failure_jump
482 /* Stop remembering the text that is matched and store it in a
483 memory register. Followed by one byte with the register
484 number, in the range 0 to one less than `re_nsub' in the
485 pattern buffer, and one byte with the number of inner groups,
486 just like `start_memory'. (We need the number of inner
487 groups here because we don't have any easy way of finding the
488 corresponding start_memory when we're at a stop_memory.) */
491 /* Match a duplicate of something remembered. Followed by one
492 byte containing the register number. */
495 /* Fail unless at beginning of line. */
498 /* Fail unless at end of line. */
501 /* Succeeds if at beginning of buffer (if emacs) or at beginning
502 of string to be matched (if not). */
505 /* Analogously, for end of buffer/string. */
508 /* Followed by two byte relative address to which to jump. */
511 /* Same as jump, but marks the end of an alternative. */
514 /* Followed by two-byte relative address of place to resume at
515 in case of failure. */
518 /* Like on_failure_jump, but pushes a placeholder instead of the
519 current string position when executed. */
520 on_failure_keep_string_jump
,
522 /* Throw away latest failure point and then jump to following
523 two-byte relative address. */
526 /* Change to pop_failure_jump if know won't have to backtrack to
527 match; otherwise change to jump. This is used to jump
528 back to the beginning of a repeat. If what follows this jump
529 clearly won't match what the repeat does, such that we can be
530 sure that there is no use backtracking out of repetitions
531 already matched, then we change it to a pop_failure_jump.
532 Followed by two-byte address. */
535 /* Jump to following two-byte address, and push a dummy failure
536 point. This failure point will be thrown away if an attempt
537 is made to use it for a failure. A `+' construct makes this
538 before the first repeat. Also used as an intermediary kind
539 of jump when compiling an alternative. */
542 /* Push a dummy failure point and continue. Used at the end of
546 /* Followed by two-byte relative address and two-byte number n.
547 After matching N times, jump to the address upon failure. */
550 /* Followed by two-byte relative address, and two-byte number n.
551 Jump to the address N times, then fail. */
554 /* Set the following two-byte relative address to the
555 subsequent two-byte number. The address *includes* the two
559 wordchar
, /* Matches any word-constituent character. */
560 notwordchar
, /* Matches any char that is not a word-constituent. */
562 wordbeg
, /* Succeeds if at word beginning. */
563 wordend
, /* Succeeds if at word end. */
565 wordbound
, /* Succeeds if at a word boundary. */
566 notwordbound
/* Succeeds if not at a word boundary. */
569 ,before_dot
, /* Succeeds if before point. */
570 at_dot
, /* Succeeds if at point. */
571 after_dot
, /* Succeeds if after point. */
573 /* Matches any character whose syntax is specified. Followed by
574 a byte which contains a syntax code, e.g., Sword. */
577 /* Matches any character whose syntax is not that specified. */
580 /* Matches any character whose category-set contains the specified
581 category. The operator is followed by a byte which contains a
582 category code (mnemonic ASCII character). */
585 /* Matches any character whose category-set does not contain the
586 specified category. The operator is followed by a byte which
587 contains the category code (mnemonic ASCII character). */
592 /* Common operations on the compiled pattern. */
594 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
596 #define STORE_NUMBER(destination, number) \
598 (destination)[0] = (number) & 0377; \
599 (destination)[1] = (number) >> 8; \
602 /* Same as STORE_NUMBER, except increment DESTINATION to
603 the byte after where the number is stored. Therefore, DESTINATION
604 must be an lvalue. */
606 #define STORE_NUMBER_AND_INCR(destination, number) \
608 STORE_NUMBER (destination, number); \
609 (destination) += 2; \
612 /* Put into DESTINATION a number stored in two contiguous bytes starting
615 #define EXTRACT_NUMBER(destination, source) \
617 (destination) = *(source) & 0377; \
618 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
623 extract_number (dest
, source
)
625 unsigned char *source
;
627 int temp
= SIGN_EXTEND_CHAR (*(source
+ 1));
628 *dest
= *source
& 0377;
632 #ifndef EXTRACT_MACROS /* To debug the macros. */
633 #undef EXTRACT_NUMBER
634 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
635 #endif /* not EXTRACT_MACROS */
639 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
640 SOURCE must be an lvalue. */
642 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
644 EXTRACT_NUMBER (destination, source); \
650 extract_number_and_incr (destination
, source
)
652 unsigned char **source
;
654 extract_number (destination
, *source
);
658 #ifndef EXTRACT_MACROS
659 #undef EXTRACT_NUMBER_AND_INCR
660 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
661 extract_number_and_incr (&dest, &src)
662 #endif /* not EXTRACT_MACROS */
666 /* Store a multibyte character in three contiguous bytes starting
667 DESTINATION, and increment DESTINATION to the byte after where the
668 character is stored. Therefore, DESTINATION must be an lvalue. */
670 #define STORE_CHARACTER_AND_INCR(destination, character) \
672 (destination)[0] = (character) & 0377; \
673 (destination)[1] = ((character) >> 8) & 0377; \
674 (destination)[2] = (character) >> 16; \
675 (destination) += 3; \
678 /* Put into DESTINATION a character stored in three contiguous bytes
679 starting at SOURCE. */
681 #define EXTRACT_CHARACTER(destination, source) \
683 (destination) = ((source)[0] \
684 | ((source)[1] << 8) \
685 | ((source)[2] << 16)); \
689 /* Macros for charset. */
691 /* Size of bitmap of charset P in bytes. P is a start of charset,
692 i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not. */
693 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
695 /* Nonzero if charset P has range table. */
696 #define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80)
698 /* Return the address of range table of charset P. But not the start
699 of table itself, but the before where the number of ranges is
700 stored. `2 +' means to skip re_opcode_t and size of bitmap,
701 and the 2 bytes of flags at the start of the range table. */
702 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
704 /* Extract the bit flags that start a range table. */
705 #define CHARSET_RANGE_TABLE_BITS(p) \
706 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
707 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
709 /* Test if C is listed in the bitmap of charset P. */
710 #define CHARSET_LOOKUP_BITMAP(p, c) \
711 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
712 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
714 /* Return the address of end of RANGE_TABLE. COUNT is number of
715 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
716 is start of range and end of range. `* 3' is size of each start
718 #define CHARSET_RANGE_TABLE_END(range_table, count) \
719 ((range_table) + (count) * 2 * 3)
721 /* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in.
722 COUNT is number of ranges in RANGE_TABLE. */
723 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
726 int range_start, range_end; \
728 unsigned char *range_table_end \
729 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \
731 for (p = (range_table); p < range_table_end; p += 2 * 3) \
733 EXTRACT_CHARACTER (range_start, p); \
734 EXTRACT_CHARACTER (range_end, p + 3); \
736 if (range_start <= (c) && (c) <= range_end) \
745 /* Test if C is in range table of CHARSET. The flag NOT is negated if
746 C is listed in it. */
747 #define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset) \
750 /* Number of ranges in range table. */ \
752 unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \
754 EXTRACT_NUMBER_AND_INCR (count, range_table); \
755 CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
759 /* If DEBUG is defined, Regex prints many voluminous messages about what
760 it is doing (if the variable `debug' is nonzero). If linked with the
761 main program in `iregex.c', you can enter patterns and strings
762 interactively. And if linked with the main program in `main.c' and
763 the other test files, you can run the already-written tests. */
767 /* We use standard I/O for debugging. */
770 /* It is useful to test things that ``must'' be true when debugging. */
773 static int debug
= 0;
775 #define DEBUG_STATEMENT(e) e
776 #define DEBUG_PRINT1(x) if (debug) printf (x)
777 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
778 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
779 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
780 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
781 if (debug) print_partial_compiled_pattern (s, e)
782 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
783 if (debug) print_double_string (w, s1, sz1, s2, sz2)
786 /* Print the fastmap in human-readable form. */
789 print_fastmap (fastmap
)
792 unsigned was_a_range
= 0;
795 while (i
< (1 << BYTEWIDTH
))
801 while (i
< (1 << BYTEWIDTH
) && fastmap
[i
])
817 /* Print a compiled pattern string in human-readable form, starting at
818 the START pointer into it and ending just before the pointer END. */
821 print_partial_compiled_pattern (start
, end
)
822 unsigned char *start
;
826 unsigned char *p
= start
;
827 unsigned char *pend
= end
;
835 /* Loop over pattern commands. */
838 printf ("%d:\t", p
- start
);
840 switch ((re_opcode_t
) *p
++)
848 printf ("/exactn/%d", mcnt
);
859 printf ("/start_memory/%d/%d", mcnt
, *p
++);
864 printf ("/stop_memory/%d/%d", mcnt
, *p
++);
868 printf ("/duplicate/%d", *p
++);
878 register int c
, last
= -100;
879 register int in_range
= 0;
880 int length
= *p
& 0x7f;
881 int has_range_table
= *p
& 0x80;
882 int range_length
= p
[length
+ 2] + p
[length
+ 3] * 0x100;
884 printf ("/charset [%s",
885 (re_opcode_t
) *(p
- 1) == charset_not
? "^" : "");
887 assert (p
+ *p
< pend
);
889 for (c
= 0; c
< 256; c
++)
891 && (p
[1 + (c
/8)] & (1 << (c
% 8))))
893 /* Are we starting a range? */
894 if (last
+ 1 == c
&& ! in_range
)
899 /* Have we broken a range? */
900 else if (last
+ 1 != c
&& in_range
)
920 printf ("has-range-table");
922 /* ??? Should print the range table; for now,
925 p
+= 4 + 6 * range_length
;
937 case on_failure_jump
:
938 extract_number_and_incr (&mcnt
, &p
);
939 printf ("/on_failure_jump to %d", p
+ mcnt
- start
);
942 case on_failure_keep_string_jump
:
943 extract_number_and_incr (&mcnt
, &p
);
944 printf ("/on_failure_keep_string_jump to %d", p
+ mcnt
- start
);
947 case dummy_failure_jump
:
948 extract_number_and_incr (&mcnt
, &p
);
949 printf ("/dummy_failure_jump to %d", p
+ mcnt
- start
);
952 case push_dummy_failure
:
953 printf ("/push_dummy_failure");
957 extract_number_and_incr (&mcnt
, &p
);
958 printf ("/maybe_pop_jump to %d", p
+ mcnt
- start
);
961 case pop_failure_jump
:
962 extract_number_and_incr (&mcnt
, &p
);
963 printf ("/pop_failure_jump to %d", p
+ mcnt
- start
);
967 extract_number_and_incr (&mcnt
, &p
);
968 printf ("/jump_past_alt to %d", p
+ mcnt
- start
);
972 extract_number_and_incr (&mcnt
, &p
);
973 printf ("/jump to %d", p
+ mcnt
- start
);
977 extract_number_and_incr (&mcnt
, &p
);
978 extract_number_and_incr (&mcnt2
, &p
);
979 printf ("/succeed_n to %d, %d times", p
+ mcnt
- start
, mcnt2
);
983 extract_number_and_incr (&mcnt
, &p
);
984 extract_number_and_incr (&mcnt2
, &p
);
985 printf ("/jump_n to %d, %d times", p
+ mcnt
- start
, mcnt2
);
989 extract_number_and_incr (&mcnt
, &p
);
990 extract_number_and_incr (&mcnt2
, &p
);
991 printf ("/set_number_at location %d to %d", p
+ mcnt
- start
, mcnt2
);
995 printf ("/wordbound");
999 printf ("/notwordbound");
1003 printf ("/wordbeg");
1007 printf ("/wordend");
1011 printf ("/before_dot");
1019 printf ("/after_dot");
1023 printf ("/syntaxspec");
1025 printf ("/%d", mcnt
);
1029 printf ("/notsyntaxspec");
1031 printf ("/%d", mcnt
);
1036 printf ("/wordchar");
1040 printf ("/notwordchar");
1052 printf ("?%d", *(p
-1));
1058 printf ("%d:\tend of pattern.\n", p
- start
);
1063 print_compiled_pattern (bufp
)
1064 struct re_pattern_buffer
*bufp
;
1066 unsigned char *buffer
= bufp
->buffer
;
1068 print_partial_compiled_pattern (buffer
, buffer
+ bufp
->used
);
1069 printf ("%d bytes used/%d bytes allocated.\n", bufp
->used
, bufp
->allocated
);
1071 if (bufp
->fastmap_accurate
&& bufp
->fastmap
)
1073 printf ("fastmap: ");
1074 print_fastmap (bufp
->fastmap
);
1077 printf ("re_nsub: %d\t", bufp
->re_nsub
);
1078 printf ("regs_alloc: %d\t", bufp
->regs_allocated
);
1079 printf ("can_be_null: %d\t", bufp
->can_be_null
);
1080 printf ("newline_anchor: %d\n", bufp
->newline_anchor
);
1081 printf ("no_sub: %d\t", bufp
->no_sub
);
1082 printf ("not_bol: %d\t", bufp
->not_bol
);
1083 printf ("not_eol: %d\t", bufp
->not_eol
);
1084 printf ("syntax: %d\n", bufp
->syntax
);
1085 /* Perhaps we should print the translate table? */
1090 print_double_string (where
, string1
, size1
, string2
, size2
)
1092 const char *string1
;
1093 const char *string2
;
1103 if (FIRST_STRING_P (where
))
1105 for (this_char
= where
- string1
; this_char
< size1
; this_char
++)
1106 putchar (string1
[this_char
]);
1111 for (this_char
= where
- string2
; this_char
< size2
; this_char
++)
1112 putchar (string2
[this_char
]);
1116 #else /* not DEBUG */
1121 #define DEBUG_STATEMENT(e)
1122 #define DEBUG_PRINT1(x)
1123 #define DEBUG_PRINT2(x1, x2)
1124 #define DEBUG_PRINT3(x1, x2, x3)
1125 #define DEBUG_PRINT4(x1, x2, x3, x4)
1126 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1127 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1129 #endif /* not DEBUG */
1131 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1132 also be assigned to arbitrarily: each pattern buffer stores its own
1133 syntax, so it can be changed between regex compilations. */
1134 /* This has no initializer because initialized variables in Emacs
1135 become read-only after dumping. */
1136 reg_syntax_t re_syntax_options
;
1139 /* Specify the precise syntax of regexps for compilation. This provides
1140 for compatibility for various utilities which historically have
1141 different, incompatible syntaxes.
1143 The argument SYNTAX is a bit mask comprised of the various bits
1144 defined in regex.h. We return the old syntax. */
1147 re_set_syntax (syntax
)
1148 reg_syntax_t syntax
;
1150 reg_syntax_t ret
= re_syntax_options
;
1152 re_syntax_options
= syntax
;
1156 /* This table gives an error message for each of the error codes listed
1157 in regex.h. Obviously the order here has to be same as there.
1158 POSIX doesn't require that we do anything for REG_NOERROR,
1159 but why not be nice? */
1161 static const char *re_error_msgid
[] =
1163 gettext_noop ("Success"), /* REG_NOERROR */
1164 gettext_noop ("No match"), /* REG_NOMATCH */
1165 gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1166 gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1167 gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1168 gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1169 gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1170 gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1171 gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1172 gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1173 gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1174 gettext_noop ("Invalid range end"), /* REG_ERANGE */
1175 gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1176 gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1177 gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1178 gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1179 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1182 /* Avoiding alloca during matching, to placate r_alloc. */
1184 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1185 searching and matching functions should not call alloca. On some
1186 systems, alloca is implemented in terms of malloc, and if we're
1187 using the relocating allocator routines, then malloc could cause a
1188 relocation, which might (if the strings being searched are in the
1189 ralloc heap) shift the data out from underneath the regexp
1192 Here's another reason to avoid allocation: Emacs
1193 processes input from X in a signal handler; processing X input may
1194 call malloc; if input arrives while a matching routine is calling
1195 malloc, then we're scrod. But Emacs can't just block input while
1196 calling matching routines; then we don't notice interrupts when
1197 they come in. So, Emacs blocks input around all regexp calls
1198 except the matching calls, which it leaves unprotected, in the
1199 faith that they will not malloc. */
1201 /* Normally, this is fine. */
1202 #define MATCH_MAY_ALLOCATE
1204 /* When using GNU C, we are not REALLY using the C alloca, no matter
1205 what config.h may say. So don't take precautions for it. */
1210 /* The match routines may not allocate if (1) they would do it with malloc
1211 and (2) it's not safe for them to use malloc.
1212 Note that if REL_ALLOC is defined, matching would not use malloc for the
1213 failure stack, but we would still use it for the register vectors;
1214 so REL_ALLOC should not affect this. */
1215 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1216 #undef MATCH_MAY_ALLOCATE
1220 /* Failure stack declarations and macros; both re_compile_fastmap and
1221 re_match_2 use a failure stack. These have to be macros because of
1222 REGEX_ALLOCATE_STACK. */
1225 /* Approximate number of failure points for which to initially allocate space
1226 when matching. If this number is exceeded, we allocate more
1227 space, so it is not a hard limit. */
1228 #ifndef INIT_FAILURE_ALLOC
1229 #define INIT_FAILURE_ALLOC 20
1232 /* Roughly the maximum number of failure points on the stack. Would be
1233 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1234 This is a variable only so users of regex can assign to it; we never
1235 change it ourselves. */
1236 #if defined (MATCH_MAY_ALLOCATE)
1237 /* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1238 whose default stack limit is 2mb. In order for a larger
1239 value to work reliably, you have to try to make it accord
1240 with the process stack limit. */
1241 int re_max_failures
= 40000;
1243 int re_max_failures
= 4000;
1246 union fail_stack_elt
1248 unsigned char *pointer
;
1252 typedef union fail_stack_elt fail_stack_elt_t
;
1256 fail_stack_elt_t
*stack
;
1258 unsigned avail
; /* Offset of next open position. */
1261 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1262 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1263 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1266 /* Define macros to initialize and free the failure stack.
1267 Do `return -2' if the alloc fails. */
1269 #ifdef MATCH_MAY_ALLOCATE
1270 #define INIT_FAIL_STACK() \
1272 fail_stack.stack = (fail_stack_elt_t *) \
1273 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
1274 * sizeof (fail_stack_elt_t)); \
1276 if (fail_stack.stack == NULL) \
1279 fail_stack.size = INIT_FAILURE_ALLOC; \
1280 fail_stack.avail = 0; \
1283 #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1285 #define INIT_FAIL_STACK() \
1287 fail_stack.avail = 0; \
1290 #define RESET_FAIL_STACK()
1294 /* Double the size of FAIL_STACK, up to a limit
1295 which allows approximately `re_max_failures' items.
1297 Return 1 if succeeds, and 0 if either ran out of memory
1298 allocating space for it or it was already too large.
1300 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1302 /* Factor to increase the failure stack size by
1303 when we increase it.
1304 This used to be 2, but 2 was too wasteful
1305 because the old discarded stacks added up to as much space
1306 were as ultimate, maximum-size stack. */
1307 #define FAIL_STACK_GROWTH_FACTOR 4
1309 #define GROW_FAIL_STACK(fail_stack) \
1310 (((fail_stack).size * sizeof (fail_stack_elt_t) \
1311 >= re_max_failures * TYPICAL_FAILURE_SIZE) \
1313 : ((fail_stack).stack \
1314 = (fail_stack_elt_t *) \
1315 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1316 (fail_stack).size * sizeof (fail_stack_elt_t), \
1317 MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1318 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1319 * FAIL_STACK_GROWTH_FACTOR))), \
1321 (fail_stack).stack == NULL \
1323 : ((fail_stack).size \
1324 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1325 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1326 * FAIL_STACK_GROWTH_FACTOR)) \
1327 / sizeof (fail_stack_elt_t)), \
1331 /* Push pointer POINTER on FAIL_STACK.
1332 Return 1 if was able to do so and 0 if ran out of memory allocating
1334 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1335 ((FAIL_STACK_FULL () \
1336 && !GROW_FAIL_STACK (FAIL_STACK)) \
1338 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1341 /* Push a pointer value onto the failure stack.
1342 Assumes the variable `fail_stack'. Probably should only
1343 be called from within `PUSH_FAILURE_POINT'. */
1344 #define PUSH_FAILURE_POINTER(item) \
1345 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1347 /* This pushes an integer-valued item onto the failure stack.
1348 Assumes the variable `fail_stack'. Probably should only
1349 be called from within `PUSH_FAILURE_POINT'. */
1350 #define PUSH_FAILURE_INT(item) \
1351 fail_stack.stack[fail_stack.avail++].integer = (item)
1353 /* Push a fail_stack_elt_t value onto the failure stack.
1354 Assumes the variable `fail_stack'. Probably should only
1355 be called from within `PUSH_FAILURE_POINT'. */
1356 #define PUSH_FAILURE_ELT(item) \
1357 fail_stack.stack[fail_stack.avail++] = (item)
1359 /* These three POP... operations complement the three PUSH... operations.
1360 All assume that `fail_stack' is nonempty. */
1361 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1362 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1363 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1365 /* Used to omit pushing failure point id's when we're not debugging. */
1367 #define DEBUG_PUSH PUSH_FAILURE_INT
1368 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1370 #define DEBUG_PUSH(item)
1371 #define DEBUG_POP(item_addr)
1375 /* Push the information about the state we will need
1376 if we ever fail back to it.
1378 Requires variables fail_stack, regstart, regend, reg_info, and
1379 num_regs be declared. GROW_FAIL_STACK requires `destination' be
1382 Does `return FAILURE_CODE' if runs out of memory. */
1384 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1386 char *destination; \
1387 /* Must be int, so when we don't save any registers, the arithmetic \
1388 of 0 + -1 isn't done as unsigned. */ \
1391 DEBUG_STATEMENT (failure_id++); \
1392 DEBUG_STATEMENT (nfailure_points_pushed++); \
1393 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1394 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1395 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1397 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
1398 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1400 /* Ensure we have enough space allocated for what we will push. */ \
1401 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1403 if (!GROW_FAIL_STACK (fail_stack)) \
1404 return failure_code; \
1406 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1407 (fail_stack).size); \
1408 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1411 /* Push the info, starting with the registers. */ \
1412 DEBUG_PRINT1 ("\n"); \
1415 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1418 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
1419 DEBUG_STATEMENT (num_regs_pushed++); \
1421 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1422 PUSH_FAILURE_POINTER (regstart[this_reg]); \
1424 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1425 PUSH_FAILURE_POINTER (regend[this_reg]); \
1427 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
1428 DEBUG_PRINT2 (" match_null=%d", \
1429 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1430 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1431 DEBUG_PRINT2 (" matched_something=%d", \
1432 MATCHED_SOMETHING (reg_info[this_reg])); \
1433 DEBUG_PRINT2 (" ever_matched=%d", \
1434 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1435 DEBUG_PRINT1 ("\n"); \
1436 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1439 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
1440 PUSH_FAILURE_INT (lowest_active_reg); \
1442 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
1443 PUSH_FAILURE_INT (highest_active_reg); \
1445 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1446 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1447 PUSH_FAILURE_POINTER (pattern_place); \
1449 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1450 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1452 DEBUG_PRINT1 ("'\n"); \
1453 PUSH_FAILURE_POINTER (string_place); \
1455 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1456 DEBUG_PUSH (failure_id); \
1459 /* This is the number of items that are pushed and popped on the stack
1460 for each register. */
1461 #define NUM_REG_ITEMS 3
1463 /* Individual items aside from the registers. */
1465 #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1467 #define NUM_NONREG_ITEMS 4
1470 /* Estimate the size of data pushed by a typical failure stack entry.
1471 An estimate is all we need, because all we use this for
1472 is to choose a limit for how big to make the failure stack. */
1474 #define TYPICAL_FAILURE_SIZE 20
1476 /* This is how many items we actually use for a failure point.
1477 It depends on the regexp. */
1478 #define NUM_FAILURE_ITEMS \
1480 ? 0 : highest_active_reg - lowest_active_reg + 1) \
1484 /* How many items can still be added to the stack without overflowing it. */
1485 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1488 /* Pops what PUSH_FAIL_STACK pushes.
1490 We restore into the parameters, all of which should be lvalues:
1491 STR -- the saved data position.
1492 PAT -- the saved pattern position.
1493 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1494 REGSTART, REGEND -- arrays of string positions.
1495 REG_INFO -- array of information about each subexpression.
1497 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1498 `pend', `string1', `size1', `string2', and `size2'. */
1500 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1502 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1504 const unsigned char *string_temp; \
1506 assert (!FAIL_STACK_EMPTY ()); \
1508 /* Remove failure points and point to how many regs pushed. */ \
1509 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1510 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1511 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1513 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1515 DEBUG_POP (&failure_id.integer); \
1516 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id.integer); \
1518 /* If the saved string location is NULL, it came from an \
1519 on_failure_keep_string_jump opcode, and we want to throw away the \
1520 saved NULL, thus retaining our current position in the string. */ \
1521 string_temp = POP_FAILURE_POINTER (); \
1522 if (string_temp != NULL) \
1523 str = (const char *) string_temp; \
1525 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
1526 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1527 DEBUG_PRINT1 ("'\n"); \
1529 pat = (unsigned char *) POP_FAILURE_POINTER (); \
1530 DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
1531 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1533 /* Restore register info. */ \
1534 high_reg = (unsigned) POP_FAILURE_INT (); \
1535 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
1537 low_reg = (unsigned) POP_FAILURE_INT (); \
1538 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1541 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1543 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1545 reg_info[this_reg].word = POP_FAILURE_ELT (); \
1546 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1548 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1549 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1551 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1552 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1556 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1558 reg_info[this_reg].word.integer = 0; \
1559 regend[this_reg] = 0; \
1560 regstart[this_reg] = 0; \
1562 highest_active_reg = high_reg; \
1565 set_regs_matched_done = 0; \
1566 DEBUG_STATEMENT (nfailure_points_popped++); \
1567 } /* POP_FAILURE_POINT */
1571 /* Structure for per-register (a.k.a. per-group) information.
1572 Other register information, such as the
1573 starting and ending positions (which are addresses), and the list of
1574 inner groups (which is a bits list) are maintained in separate
1577 We are making a (strictly speaking) nonportable assumption here: that
1578 the compiler will pack our bit fields into something that fits into
1579 the type of `word', i.e., is something that fits into one item on the
1584 fail_stack_elt_t word
;
1587 /* This field is one if this group can match the empty string,
1588 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1589 #define MATCH_NULL_UNSET_VALUE 3
1590 unsigned match_null_string_p
: 2;
1591 unsigned is_active
: 1;
1592 unsigned matched_something
: 1;
1593 unsigned ever_matched_something
: 1;
1595 } register_info_type
;
1597 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1598 #define IS_ACTIVE(R) ((R).bits.is_active)
1599 #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1600 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1603 /* Call this when have matched a real character; it sets `matched' flags
1604 for the subexpressions which we are currently inside. Also records
1605 that those subexprs have matched. */
1606 #define SET_REGS_MATCHED() \
1609 if (!set_regs_matched_done) \
1612 set_regs_matched_done = 1; \
1613 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1615 MATCHED_SOMETHING (reg_info[r]) \
1616 = EVER_MATCHED_SOMETHING (reg_info[r]) \
1623 /* Registers are set to a sentinel when they haven't yet matched. */
1624 static char reg_unset_dummy
;
1625 #define REG_UNSET_VALUE (®_unset_dummy)
1626 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1628 /* Subroutine declarations and macros for regex_compile. */
1630 static void store_op1 (), store_op2 ();
1631 static void insert_op1 (), insert_op2 ();
1632 static boolean
at_begline_loc_p (), at_endline_loc_p ();
1633 static boolean
group_in_compile_stack ();
1634 static reg_errcode_t
compile_range ();
1636 /* Fetch the next character in the uncompiled pattern---translating it
1637 if necessary. Also cast from a signed character in the constant
1638 string passed to us by the user to an unsigned char that we can use
1639 as an array index (in, e.g., `translate'). */
1641 #define PATFETCH(c) \
1642 do {if (p == pend) return REG_EEND; \
1643 c = (unsigned char) *p++; \
1644 if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \
1648 /* Fetch the next character in the uncompiled pattern, with no
1650 #define PATFETCH_RAW(c) \
1651 do {if (p == pend) return REG_EEND; \
1652 c = (unsigned char) *p++; \
1655 /* Go backwards one character in the pattern. */
1656 #define PATUNFETCH p--
1659 /* If `translate' is non-null, return translate[D], else just D. We
1660 cast the subscript to translate because some data is declared as
1661 `char *', to avoid warnings when a string constant is passed. But
1662 when we use a character as a subscript we must make it unsigned. */
1664 #define TRANSLATE(d) \
1665 (RE_TRANSLATE_P (translate) \
1666 ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
1670 /* Macros for outputting the compiled pattern into `buffer'. */
1672 /* If the buffer isn't allocated when it comes in, use this. */
1673 #define INIT_BUF_SIZE 32
1675 /* Make sure we have at least N more bytes of space in buffer. */
1676 #define GET_BUFFER_SPACE(n) \
1677 while (b - bufp->buffer + (n) > bufp->allocated) \
1680 /* Make sure we have one more byte of buffer space and then add C to it. */
1681 #define BUF_PUSH(c) \
1683 GET_BUFFER_SPACE (1); \
1684 *b++ = (unsigned char) (c); \
1688 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
1689 #define BUF_PUSH_2(c1, c2) \
1691 GET_BUFFER_SPACE (2); \
1692 *b++ = (unsigned char) (c1); \
1693 *b++ = (unsigned char) (c2); \
1697 /* As with BUF_PUSH_2, except for three bytes. */
1698 #define BUF_PUSH_3(c1, c2, c3) \
1700 GET_BUFFER_SPACE (3); \
1701 *b++ = (unsigned char) (c1); \
1702 *b++ = (unsigned char) (c2); \
1703 *b++ = (unsigned char) (c3); \
1707 /* Store a jump with opcode OP at LOC to location TO. We store a
1708 relative address offset by the three bytes the jump itself occupies. */
1709 #define STORE_JUMP(op, loc, to) \
1710 store_op1 (op, loc, (to) - (loc) - 3)
1712 /* Likewise, for a two-argument jump. */
1713 #define STORE_JUMP2(op, loc, to, arg) \
1714 store_op2 (op, loc, (to) - (loc) - 3, arg)
1716 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1717 #define INSERT_JUMP(op, loc, to) \
1718 insert_op1 (op, loc, (to) - (loc) - 3, b)
1720 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1721 #define INSERT_JUMP2(op, loc, to, arg) \
1722 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1725 /* This is not an arbitrary limit: the arguments which represent offsets
1726 into the pattern are two bytes long. So if 2^16 bytes turns out to
1727 be too small, many things would have to change. */
1728 #define MAX_BUF_SIZE (1L << 16)
1731 /* Extend the buffer by twice its current size via realloc and
1732 reset the pointers that pointed into the old block to point to the
1733 correct places in the new one. If extending the buffer results in it
1734 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1735 #define EXTEND_BUFFER() \
1737 unsigned char *old_buffer = bufp->buffer; \
1738 if (bufp->allocated == MAX_BUF_SIZE) \
1740 bufp->allocated <<= 1; \
1741 if (bufp->allocated > MAX_BUF_SIZE) \
1742 bufp->allocated = MAX_BUF_SIZE; \
1743 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1744 if (bufp->buffer == NULL) \
1745 return REG_ESPACE; \
1746 /* If the buffer moved, move all the pointers into it. */ \
1747 if (old_buffer != bufp->buffer) \
1749 b = (b - old_buffer) + bufp->buffer; \
1750 begalt = (begalt - old_buffer) + bufp->buffer; \
1751 if (fixup_alt_jump) \
1752 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1754 laststart = (laststart - old_buffer) + bufp->buffer; \
1755 if (pending_exact) \
1756 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
1761 /* Since we have one byte reserved for the register number argument to
1762 {start,stop}_memory, the maximum number of groups we can report
1763 things about is what fits in that byte. */
1764 #define MAX_REGNUM 255
1766 /* But patterns can have more than `MAX_REGNUM' registers. We just
1767 ignore the excess. */
1768 typedef unsigned regnum_t
;
1771 /* Macros for the compile stack. */
1773 /* Since offsets can go either forwards or backwards, this type needs to
1774 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1775 typedef int pattern_offset_t
;
1779 pattern_offset_t begalt_offset
;
1780 pattern_offset_t fixup_alt_jump
;
1781 pattern_offset_t inner_group_offset
;
1782 pattern_offset_t laststart_offset
;
1784 } compile_stack_elt_t
;
1789 compile_stack_elt_t
*stack
;
1791 unsigned avail
; /* Offset of next open position. */
1792 } compile_stack_type
;
1795 #define INIT_COMPILE_STACK_SIZE 32
1797 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1798 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1800 /* The next available element. */
1801 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1804 /* Structure to manage work area for range table. */
1805 struct range_table_work_area
1807 int *table
; /* actual work area. */
1808 int allocated
; /* allocated size for work area in bytes. */
1809 int used
; /* actually used size in words. */
1810 int bits
; /* flag to record character classes */
1813 /* Make sure that WORK_AREA can hold more N multibyte characters. */
1814 #define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \
1816 if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1818 (work_area).allocated += 16 * sizeof (int); \
1819 if ((work_area).table) \
1821 = (int *) realloc ((work_area).table, (work_area).allocated); \
1824 = (int *) malloc ((work_area).allocated); \
1825 if ((work_area).table == 0) \
1826 FREE_STACK_RETURN (REG_ESPACE); \
1830 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
1831 (work_area).bits |= (bit)
1833 /* These bits represent the various character classes such as [:alnum:]
1834 in a charset's range table. */
1835 #define BIT_ALNUM 0x1
1836 #define BIT_ALPHA 0x2
1837 #define BIT_WORD 0x4
1838 #define BIT_ASCII 0x8
1839 #define BIT_NONASCII 0x10
1840 #define BIT_GRAPH 0x20
1841 #define BIT_LOWER 0x40
1842 #define BIT_PRINT 0x80
1843 #define BIT_PUNCT 0x100
1844 #define BIT_SPACE 0x200
1845 #define BIT_UPPER 0x400
1846 #define BIT_UNIBYTE 0x800
1847 #define BIT_MULTIBYTE 0x1000
1849 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
1850 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
1852 EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \
1853 (work_area).table[(work_area).used++] = (range_start); \
1854 (work_area).table[(work_area).used++] = (range_end); \
1857 /* Free allocated memory for WORK_AREA. */
1858 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1860 if ((work_area).table) \
1861 free ((work_area).table); \
1864 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
1865 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1866 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1867 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1870 /* Set the bit for character C in a list. */
1871 #define SET_LIST_BIT(c) \
1872 (b[((unsigned char) (c)) / BYTEWIDTH] \
1873 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1876 /* Get the next unsigned number in the uncompiled pattern. */
1877 #define GET_UNSIGNED_NUMBER(num) \
1881 while (ISDIGIT (c)) \
1885 num = num * 10 + c - '0'; \
1893 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1895 #define IS_CHAR_CLASS(string) \
1896 (STREQ (string, "alpha") || STREQ (string, "upper") \
1897 || STREQ (string, "lower") || STREQ (string, "digit") \
1898 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1899 || STREQ (string, "space") || STREQ (string, "print") \
1900 || STREQ (string, "punct") || STREQ (string, "graph") \
1901 || STREQ (string, "cntrl") || STREQ (string, "blank") \
1902 || STREQ (string, "word") \
1903 || STREQ (string, "ascii") || STREQ (string, "nonascii") \
1904 || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
1906 #ifndef MATCH_MAY_ALLOCATE
1908 /* If we cannot allocate large objects within re_match_2_internal,
1909 we make the fail stack and register vectors global.
1910 The fail stack, we grow to the maximum size when a regexp
1912 The register vectors, we adjust in size each time we
1913 compile a regexp, according to the number of registers it needs. */
1915 static fail_stack_type fail_stack
;
1917 /* Size with which the following vectors are currently allocated.
1918 That is so we can make them bigger as needed,
1919 but never make them smaller. */
1920 static int regs_allocated_size
;
1922 static const char ** regstart
, ** regend
;
1923 static const char ** old_regstart
, ** old_regend
;
1924 static const char **best_regstart
, **best_regend
;
1925 static register_info_type
*reg_info
;
1926 static const char **reg_dummy
;
1927 static register_info_type
*reg_info_dummy
;
1929 /* Make the register vectors big enough for NUM_REGS registers,
1930 but don't make them smaller. */
1933 regex_grow_registers (num_regs
)
1936 if (num_regs
> regs_allocated_size
)
1938 RETALLOC_IF (regstart
, num_regs
, const char *);
1939 RETALLOC_IF (regend
, num_regs
, const char *);
1940 RETALLOC_IF (old_regstart
, num_regs
, const char *);
1941 RETALLOC_IF (old_regend
, num_regs
, const char *);
1942 RETALLOC_IF (best_regstart
, num_regs
, const char *);
1943 RETALLOC_IF (best_regend
, num_regs
, const char *);
1944 RETALLOC_IF (reg_info
, num_regs
, register_info_type
);
1945 RETALLOC_IF (reg_dummy
, num_regs
, const char *);
1946 RETALLOC_IF (reg_info_dummy
, num_regs
, register_info_type
);
1948 regs_allocated_size
= num_regs
;
1952 #endif /* not MATCH_MAY_ALLOCATE */
1954 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1955 Returns one of error codes defined in `regex.h', or zero for success.
1957 Assumes the `allocated' (and perhaps `buffer') and `translate'
1958 fields are set in BUFP on entry.
1960 If it succeeds, results are put in BUFP (if it returns an error, the
1961 contents of BUFP are undefined):
1962 `buffer' is the compiled pattern;
1963 `syntax' is set to SYNTAX;
1964 `used' is set to the length of the compiled pattern;
1965 `fastmap_accurate' is zero;
1966 `re_nsub' is the number of subexpressions in PATTERN;
1967 `not_bol' and `not_eol' are zero;
1969 The `fastmap' and `newline_anchor' fields are neither
1970 examined nor set. */
1972 /* Return, freeing storage we allocated. */
1973 #define FREE_STACK_RETURN(value) \
1975 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
1976 free (compile_stack.stack); \
1980 static reg_errcode_t
1981 regex_compile (pattern
, size
, syntax
, bufp
)
1982 const char *pattern
;
1984 reg_syntax_t syntax
;
1985 struct re_pattern_buffer
*bufp
;
1987 /* We fetch characters from PATTERN here. Even though PATTERN is
1988 `char *' (i.e., signed), we declare these variables as unsigned, so
1989 they can be reliably used as array indices. */
1990 register unsigned int c
, c1
;
1992 /* A random temporary spot in PATTERN. */
1995 /* Points to the end of the buffer, where we should append. */
1996 register unsigned char *b
;
1998 /* Keeps track of unclosed groups. */
1999 compile_stack_type compile_stack
;
2001 /* Points to the current (ending) position in the pattern. */
2003 /* `const' makes AIX compiler fail. */
2006 const char *p
= pattern
;
2008 const char *pend
= pattern
+ size
;
2010 /* How to translate the characters in the pattern. */
2011 RE_TRANSLATE_TYPE translate
= bufp
->translate
;
2013 /* Address of the count-byte of the most recently inserted `exactn'
2014 command. This makes it possible to tell if a new exact-match
2015 character can be added to that command or if the character requires
2016 a new `exactn' command. */
2017 unsigned char *pending_exact
= 0;
2019 /* Address of start of the most recently finished expression.
2020 This tells, e.g., postfix * where to find the start of its
2021 operand. Reset at the beginning of groups and alternatives. */
2022 unsigned char *laststart
= 0;
2024 /* Address of beginning of regexp, or inside of last group. */
2025 unsigned char *begalt
;
2027 /* Place in the uncompiled pattern (i.e., the {) to
2028 which to go back if the interval is invalid. */
2029 const char *beg_interval
;
2031 /* Address of the place where a forward jump should go to the end of
2032 the containing expression. Each alternative of an `or' -- except the
2033 last -- ends with a forward jump of this sort. */
2034 unsigned char *fixup_alt_jump
= 0;
2036 /* Counts open-groups as they are encountered. Remembered for the
2037 matching close-group on the compile stack, so the same register
2038 number is put in the stop_memory as the start_memory. */
2039 regnum_t regnum
= 0;
2041 /* Work area for range table of charset. */
2042 struct range_table_work_area range_table_work
;
2045 DEBUG_PRINT1 ("\nCompiling pattern: ");
2048 unsigned debug_count
;
2050 for (debug_count
= 0; debug_count
< size
; debug_count
++)
2051 putchar (pattern
[debug_count
]);
2056 /* Initialize the compile stack. */
2057 compile_stack
.stack
= TALLOC (INIT_COMPILE_STACK_SIZE
, compile_stack_elt_t
);
2058 if (compile_stack
.stack
== NULL
)
2061 compile_stack
.size
= INIT_COMPILE_STACK_SIZE
;
2062 compile_stack
.avail
= 0;
2064 range_table_work
.table
= 0;
2065 range_table_work
.allocated
= 0;
2067 /* Initialize the pattern buffer. */
2068 bufp
->syntax
= syntax
;
2069 bufp
->fastmap_accurate
= 0;
2070 bufp
->not_bol
= bufp
->not_eol
= 0;
2072 /* Set `used' to zero, so that if we return an error, the pattern
2073 printer (for debugging) will think there's no pattern. We reset it
2077 /* Always count groups, whether or not bufp->no_sub is set. */
2081 /* bufp->multibyte is set before regex_compile is called, so don't alter
2083 #else /* not emacs */
2084 /* Nothing is recognized as a multibyte character. */
2085 bufp
->multibyte
= 0;
2088 #if !defined (emacs) && !defined (SYNTAX_TABLE)
2089 /* Initialize the syntax table. */
2090 init_syntax_once ();
2093 if (bufp
->allocated
== 0)
2096 { /* If zero allocated, but buffer is non-null, try to realloc
2097 enough space. This loses if buffer's address is bogus, but
2098 that is the user's responsibility. */
2099 RETALLOC (bufp
->buffer
, INIT_BUF_SIZE
, unsigned char);
2102 { /* Caller did not allocate a buffer. Do it for them. */
2103 bufp
->buffer
= TALLOC (INIT_BUF_SIZE
, unsigned char);
2105 if (!bufp
->buffer
) FREE_STACK_RETURN (REG_ESPACE
);
2107 bufp
->allocated
= INIT_BUF_SIZE
;
2110 begalt
= b
= bufp
->buffer
;
2112 /* Loop through the uncompiled pattern until we're at the end. */
2121 if ( /* If at start of pattern, it's an operator. */
2123 /* If context independent, it's an operator. */
2124 || syntax
& RE_CONTEXT_INDEP_ANCHORS
2125 /* Otherwise, depends on what's come before. */
2126 || at_begline_loc_p (pattern
, p
, syntax
))
2136 if ( /* If at end of pattern, it's an operator. */
2138 /* If context independent, it's an operator. */
2139 || syntax
& RE_CONTEXT_INDEP_ANCHORS
2140 /* Otherwise, depends on what's next. */
2141 || at_endline_loc_p (p
, pend
, syntax
))
2151 if ((syntax
& RE_BK_PLUS_QM
)
2152 || (syntax
& RE_LIMITED_OPS
))
2156 /* If there is no previous pattern... */
2159 if (syntax
& RE_CONTEXT_INVALID_OPS
)
2160 FREE_STACK_RETURN (REG_BADRPT
);
2161 else if (!(syntax
& RE_CONTEXT_INDEP_OPS
))
2166 /* Are we optimizing this jump? */
2167 boolean keep_string_p
= false;
2169 /* 1 means zero (many) matches is allowed. */
2170 char zero_times_ok
= 0, many_times_ok
= 0;
2172 /* If there is a sequence of repetition chars, collapse it
2173 down to just one (the right one). We can't combine
2174 interval operators with these because of, e.g., `a{2}*',
2175 which should only match an even number of `a's. */
2179 zero_times_ok
|= c
!= '+';
2180 many_times_ok
|= c
!= '?';
2188 || (!(syntax
& RE_BK_PLUS_QM
) && (c
== '+' || c
== '?')))
2191 else if (syntax
& RE_BK_PLUS_QM
&& c
== '\\')
2193 if (p
== pend
) FREE_STACK_RETURN (REG_EESCAPE
);
2196 if (!(c1
== '+' || c1
== '?'))
2211 /* If we get here, we found another repeat character. */
2214 /* Star, etc. applied to an empty pattern is equivalent
2215 to an empty pattern. */
2219 /* Now we know whether or not zero matches is allowed
2220 and also whether or not two or more matches is allowed. */
2222 { /* More than one repetition is allowed, so put in at the
2223 end a backward relative jump from `b' to before the next
2224 jump we're going to put in below (which jumps from
2225 laststart to after this jump).
2227 But if we are at the `*' in the exact sequence `.*\n',
2228 insert an unconditional jump backwards to the .,
2229 instead of the beginning of the loop. This way we only
2230 push a failure point once, instead of every time
2231 through the loop. */
2232 assert (p
- 1 > pattern
);
2234 /* Allocate the space for the jump. */
2235 GET_BUFFER_SPACE (3);
2237 /* We know we are not at the first character of the pattern,
2238 because laststart was nonzero. And we've already
2239 incremented `p', by the way, to be the character after
2240 the `*'. Do we have to do something analogous here
2241 for null bytes, because of RE_DOT_NOT_NULL? */
2242 if (TRANSLATE ((unsigned char)*(p
- 2)) == TRANSLATE ('.')
2245 && TRANSLATE ((unsigned char)*p
) == TRANSLATE ('\n')
2246 && !(syntax
& RE_DOT_NEWLINE
))
2247 { /* We have .*\n. */
2248 STORE_JUMP (jump
, b
, laststart
);
2249 keep_string_p
= true;
2252 /* Anything else. */
2253 STORE_JUMP (maybe_pop_jump
, b
, laststart
- 3);
2255 /* We've added more stuff to the buffer. */
2259 /* On failure, jump from laststart to b + 3, which will be the
2260 end of the buffer after this jump is inserted. */
2261 GET_BUFFER_SPACE (3);
2262 INSERT_JUMP (keep_string_p
? on_failure_keep_string_jump
2270 /* At least one repetition is required, so insert a
2271 `dummy_failure_jump' before the initial
2272 `on_failure_jump' instruction of the loop. This
2273 effects a skip over that instruction the first time
2274 we hit that loop. */
2275 GET_BUFFER_SPACE (3);
2276 INSERT_JUMP (dummy_failure_jump
, laststart
, laststart
+ 6);
2291 CLEAR_RANGE_TABLE_WORK_USED (range_table_work
);
2293 if (p
== pend
) FREE_STACK_RETURN (REG_EBRACK
);
2295 /* Ensure that we have enough space to push a charset: the
2296 opcode, the length count, and the bitset; 34 bytes in all. */
2297 GET_BUFFER_SPACE (34);
2301 /* We test `*p == '^' twice, instead of using an if
2302 statement, so we only need one BUF_PUSH. */
2303 BUF_PUSH (*p
== '^' ? charset_not
: charset
);
2307 /* Remember the first position in the bracket expression. */
2310 /* Push the number of bytes in the bitmap. */
2311 BUF_PUSH ((1 << BYTEWIDTH
) / BYTEWIDTH
);
2313 /* Clear the whole map. */
2314 bzero (b
, (1 << BYTEWIDTH
) / BYTEWIDTH
);
2316 /* charset_not matches newline according to a syntax bit. */
2317 if ((re_opcode_t
) b
[-2] == charset_not
2318 && (syntax
& RE_HAT_LISTS_NOT_NEWLINE
))
2319 SET_LIST_BIT ('\n');
2321 /* Read in characters and ranges, setting map bits. */
2325 boolean escaped_char
= false;
2327 if (p
== pend
) FREE_STACK_RETURN (REG_EBRACK
);
2331 /* \ might escape characters inside [...] and [^...]. */
2332 if ((syntax
& RE_BACKSLASH_ESCAPE_IN_LISTS
) && c
== '\\')
2334 if (p
== pend
) FREE_STACK_RETURN (REG_EESCAPE
);
2337 escaped_char
= true;
2341 /* Could be the end of the bracket expression. If it's
2342 not (i.e., when the bracket expression is `[]' so
2343 far), the ']' character bit gets set way below. */
2344 if (c
== ']' && p
!= p1
+ 1)
2348 /* If C indicates start of multibyte char, get the
2349 actual character code in C, and set the pattern
2350 pointer P to the next character boundary. */
2351 if (bufp
->multibyte
&& BASE_LEADING_CODE_P (c
))
2354 c
= STRING_CHAR_AND_LENGTH (p
, pend
- p
, len
);
2357 /* What should we do for the character which is
2358 greater than 0x7F, but not BASE_LEADING_CODE_P?
2361 /* See if we're at the beginning of a possible character
2364 else if (!escaped_char
&&
2365 syntax
& RE_CHAR_CLASSES
&& c
== '[' && *p
== ':')
2367 /* Leave room for the null. */
2368 char str
[CHAR_CLASS_MAX_LENGTH
+ 1];
2373 /* If pattern is `[[:'. */
2374 if (p
== pend
) FREE_STACK_RETURN (REG_EBRACK
);
2379 if (c
== ':' || c
== ']' || p
== pend
2380 || c1
== CHAR_CLASS_MAX_LENGTH
)
2386 /* If isn't a word bracketed by `[:' and `:]':
2387 undo the ending character, the letters, and
2388 leave the leading `:' and `[' (but set bits for
2390 if (c
== ':' && *p
== ']')
2393 boolean is_alnum
= STREQ (str
, "alnum");
2394 boolean is_alpha
= STREQ (str
, "alpha");
2395 boolean is_ascii
= STREQ (str
, "ascii");
2396 boolean is_blank
= STREQ (str
, "blank");
2397 boolean is_cntrl
= STREQ (str
, "cntrl");
2398 boolean is_digit
= STREQ (str
, "digit");
2399 boolean is_graph
= STREQ (str
, "graph");
2400 boolean is_lower
= STREQ (str
, "lower");
2401 boolean is_multibyte
= STREQ (str
, "multibyte");
2402 boolean is_nonascii
= STREQ (str
, "nonascii");
2403 boolean is_print
= STREQ (str
, "print");
2404 boolean is_punct
= STREQ (str
, "punct");
2405 boolean is_space
= STREQ (str
, "space");
2406 boolean is_unibyte
= STREQ (str
, "unibyte");
2407 boolean is_upper
= STREQ (str
, "upper");
2408 boolean is_word
= STREQ (str
, "word");
2409 boolean is_xdigit
= STREQ (str
, "xdigit");
2411 if (!IS_CHAR_CLASS (str
))
2412 FREE_STACK_RETURN (REG_ECTYPE
);
2414 /* Throw away the ] at the end of the character
2418 if (p
== pend
) FREE_STACK_RETURN (REG_EBRACK
);
2420 /* Most character classes in a multibyte match
2421 just set a flag. Exceptions are is_blank,
2422 is_digit, is_cntrl, and is_xdigit, since
2423 they can only match ASCII characters. We
2424 don't need to handle them for multibyte. */
2426 if (bufp
->multibyte
)
2430 if (is_alnum
) bit
= BIT_ALNUM
;
2431 if (is_alpha
) bit
= BIT_ALPHA
;
2432 if (is_ascii
) bit
= BIT_ASCII
;
2433 if (is_graph
) bit
= BIT_GRAPH
;
2434 if (is_lower
) bit
= BIT_LOWER
;
2435 if (is_multibyte
) bit
= BIT_MULTIBYTE
;
2436 if (is_nonascii
) bit
= BIT_NONASCII
;
2437 if (is_print
) bit
= BIT_PRINT
;
2438 if (is_punct
) bit
= BIT_PUNCT
;
2439 if (is_space
) bit
= BIT_SPACE
;
2440 if (is_unibyte
) bit
= BIT_UNIBYTE
;
2441 if (is_upper
) bit
= BIT_UPPER
;
2442 if (is_word
) bit
= BIT_WORD
;
2444 SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work
,
2448 /* Handle character classes for ASCII characters. */
2449 for (ch
= 0; ch
< 1 << BYTEWIDTH
; ch
++)
2451 int translated
= TRANSLATE (ch
);
2452 /* This was split into 3 if's to
2453 avoid an arbitrary limit in some compiler. */
2454 if ( (is_alnum
&& ISALNUM (ch
))
2455 || (is_alpha
&& ISALPHA (ch
))
2456 || (is_blank
&& ISBLANK (ch
))
2457 || (is_cntrl
&& ISCNTRL (ch
)))
2458 SET_LIST_BIT (translated
);
2459 if ( (is_digit
&& ISDIGIT (ch
))
2460 || (is_graph
&& ISGRAPH (ch
))
2461 || (is_lower
&& ISLOWER (ch
))
2462 || (is_print
&& ISPRINT (ch
)))
2463 SET_LIST_BIT (translated
);
2464 if ( (is_punct
&& ISPUNCT (ch
))
2465 || (is_space
&& ISSPACE (ch
))
2466 || (is_upper
&& ISUPPER (ch
))
2467 || (is_xdigit
&& ISXDIGIT (ch
)))
2468 SET_LIST_BIT (translated
);
2469 if ( (is_ascii
&& IS_REAL_ASCII (ch
))
2470 || (is_nonascii
&& !IS_REAL_ASCII (ch
))
2471 || (is_unibyte
&& ISUNIBYTE (ch
))
2472 || (is_multibyte
&& !ISUNIBYTE (ch
)))
2473 SET_LIST_BIT (translated
);
2475 if ( (is_word
&& ISWORD (ch
)))
2476 SET_LIST_BIT (translated
);
2479 /* Repeat the loop. */
2489 /* Because the `:' may starts the range, we
2490 can't simply set bit and repeat the loop.
2491 Instead, just set it to C and handle below. */
2496 if (p
< pend
&& p
[0] == '-' && p
[1] != ']')
2499 /* Discard the `-'. */
2502 /* Fetch the character which ends the range. */
2504 if (bufp
->multibyte
&& BASE_LEADING_CODE_P (c1
))
2507 c1
= STRING_CHAR_AND_LENGTH (p
, pend
- p
, len
);
2511 if (SINGLE_BYTE_CHAR_P (c
)
2512 && ! SINGLE_BYTE_CHAR_P (c1
))
2514 /* Handle a range such as \177-\377 in multibyte mode.
2515 Split that into two ranges,,
2516 the low one ending at 0237, and the high one
2517 starting at ...040. */
2518 int c1_base
= (c1
& ~0177) | 040;
2519 SET_RANGE_TABLE_WORK_AREA (range_table_work
, c
, c1
);
2522 else if (!SAME_CHARSET_P (c
, c1
))
2523 FREE_STACK_RETURN (REG_ERANGE
);
2526 /* Range from C to C. */
2529 /* Set the range ... */
2530 if (SINGLE_BYTE_CHAR_P (c
))
2531 /* ... into bitmap. */
2534 int range_start
= c
, range_end
= c1
;
2536 /* If the start is after the end, the range is empty. */
2537 if (range_start
> range_end
)
2539 if (syntax
& RE_NO_EMPTY_RANGES
)
2540 FREE_STACK_RETURN (REG_ERANGE
);
2541 /* Else, repeat the loop. */
2545 for (this_char
= range_start
; this_char
<= range_end
;
2547 SET_LIST_BIT (TRANSLATE (this_char
));
2551 /* ... into range table. */
2552 SET_RANGE_TABLE_WORK_AREA (range_table_work
, c
, c1
);
2555 /* Discard any (non)matching list bytes that are all 0 at the
2556 end of the map. Decrease the map-length byte too. */
2557 while ((int) b
[-1] > 0 && b
[b
[-1] - 1] == 0)
2561 /* Build real range table from work area. */
2562 if (RANGE_TABLE_WORK_USED (range_table_work
)
2563 || RANGE_TABLE_WORK_BITS (range_table_work
))
2566 int used
= RANGE_TABLE_WORK_USED (range_table_work
);
2568 /* Allocate space for COUNT + RANGE_TABLE. Needs two
2569 bytes for flags, two for COUNT, and three bytes for
2571 GET_BUFFER_SPACE (4 + used
* 3);
2573 /* Indicate the existence of range table. */
2574 laststart
[1] |= 0x80;
2576 /* Store the character class flag bits into the range table.
2577 If not in emacs, these flag bits are always 0. */
2578 *b
++ = RANGE_TABLE_WORK_BITS (range_table_work
) & 0xff;
2579 *b
++ = RANGE_TABLE_WORK_BITS (range_table_work
) >> 8;
2581 STORE_NUMBER_AND_INCR (b
, used
/ 2);
2582 for (i
= 0; i
< used
; i
++)
2583 STORE_CHARACTER_AND_INCR
2584 (b
, RANGE_TABLE_WORK_ELT (range_table_work
, i
));
2591 if (syntax
& RE_NO_BK_PARENS
)
2598 if (syntax
& RE_NO_BK_PARENS
)
2605 if (syntax
& RE_NEWLINE_ALT
)
2612 if (syntax
& RE_NO_BK_VBAR
)
2619 if (syntax
& RE_INTERVALS
&& syntax
& RE_NO_BK_BRACES
)
2620 goto handle_interval
;
2626 if (p
== pend
) FREE_STACK_RETURN (REG_EESCAPE
);
2628 /* Do not translate the character after the \, so that we can
2629 distinguish, e.g., \B from \b, even if we normally would
2630 translate, e.g., B to b. */
2636 if (syntax
& RE_NO_BK_PARENS
)
2637 goto normal_backslash
;
2643 if (COMPILE_STACK_FULL
)
2645 RETALLOC (compile_stack
.stack
, compile_stack
.size
<< 1,
2646 compile_stack_elt_t
);
2647 if (compile_stack
.stack
== NULL
) return REG_ESPACE
;
2649 compile_stack
.size
<<= 1;
2652 /* These are the values to restore when we hit end of this
2653 group. They are all relative offsets, so that if the
2654 whole pattern moves because of realloc, they will still
2656 COMPILE_STACK_TOP
.begalt_offset
= begalt
- bufp
->buffer
;
2657 COMPILE_STACK_TOP
.fixup_alt_jump
2658 = fixup_alt_jump
? fixup_alt_jump
- bufp
->buffer
+ 1 : 0;
2659 COMPILE_STACK_TOP
.laststart_offset
= b
- bufp
->buffer
;
2660 COMPILE_STACK_TOP
.regnum
= regnum
;
2662 /* We will eventually replace the 0 with the number of
2663 groups inner to this one. But do not push a
2664 start_memory for groups beyond the last one we can
2665 represent in the compiled pattern. */
2666 if (regnum
<= MAX_REGNUM
)
2668 COMPILE_STACK_TOP
.inner_group_offset
= b
- bufp
->buffer
+ 2;
2669 BUF_PUSH_3 (start_memory
, regnum
, 0);
2672 compile_stack
.avail
++;
2677 /* If we've reached MAX_REGNUM groups, then this open
2678 won't actually generate any code, so we'll have to
2679 clear pending_exact explicitly. */
2685 if (syntax
& RE_NO_BK_PARENS
) goto normal_backslash
;
2687 if (COMPILE_STACK_EMPTY
)
2688 if (syntax
& RE_UNMATCHED_RIGHT_PAREN_ORD
)
2689 goto normal_backslash
;
2691 FREE_STACK_RETURN (REG_ERPAREN
);
2695 { /* Push a dummy failure point at the end of the
2696 alternative for a possible future
2697 `pop_failure_jump' to pop. See comments at
2698 `push_dummy_failure' in `re_match_2'. */
2699 BUF_PUSH (push_dummy_failure
);
2701 /* We allocated space for this jump when we assigned
2702 to `fixup_alt_jump', in the `handle_alt' case below. */
2703 STORE_JUMP (jump_past_alt
, fixup_alt_jump
, b
- 1);
2706 /* See similar code for backslashed left paren above. */
2707 if (COMPILE_STACK_EMPTY
)
2708 if (syntax
& RE_UNMATCHED_RIGHT_PAREN_ORD
)
2711 FREE_STACK_RETURN (REG_ERPAREN
);
2713 /* Since we just checked for an empty stack above, this
2714 ``can't happen''. */
2715 assert (compile_stack
.avail
!= 0);
2717 /* We don't just want to restore into `regnum', because
2718 later groups should continue to be numbered higher,
2719 as in `(ab)c(de)' -- the second group is #2. */
2720 regnum_t this_group_regnum
;
2722 compile_stack
.avail
--;
2723 begalt
= bufp
->buffer
+ COMPILE_STACK_TOP
.begalt_offset
;
2725 = COMPILE_STACK_TOP
.fixup_alt_jump
2726 ? bufp
->buffer
+ COMPILE_STACK_TOP
.fixup_alt_jump
- 1
2728 laststart
= bufp
->buffer
+ COMPILE_STACK_TOP
.laststart_offset
;
2729 this_group_regnum
= COMPILE_STACK_TOP
.regnum
;
2730 /* If we've reached MAX_REGNUM groups, then this open
2731 won't actually generate any code, so we'll have to
2732 clear pending_exact explicitly. */
2735 /* We're at the end of the group, so now we know how many
2736 groups were inside this one. */
2737 if (this_group_regnum
<= MAX_REGNUM
)
2739 unsigned char *inner_group_loc
2740 = bufp
->buffer
+ COMPILE_STACK_TOP
.inner_group_offset
;
2742 *inner_group_loc
= regnum
- this_group_regnum
;
2743 BUF_PUSH_3 (stop_memory
, this_group_regnum
,
2744 regnum
- this_group_regnum
);
2750 case '|': /* `\|'. */
2751 if (syntax
& RE_LIMITED_OPS
|| syntax
& RE_NO_BK_VBAR
)
2752 goto normal_backslash
;
2754 if (syntax
& RE_LIMITED_OPS
)
2757 /* Insert before the previous alternative a jump which
2758 jumps to this alternative if the former fails. */
2759 GET_BUFFER_SPACE (3);
2760 INSERT_JUMP (on_failure_jump
, begalt
, b
+ 6);
2764 /* The alternative before this one has a jump after it
2765 which gets executed if it gets matched. Adjust that
2766 jump so it will jump to this alternative's analogous
2767 jump (put in below, which in turn will jump to the next
2768 (if any) alternative's such jump, etc.). The last such
2769 jump jumps to the correct final destination. A picture:
2775 If we are at `b', then fixup_alt_jump right now points to a
2776 three-byte space after `a'. We'll put in the jump, set
2777 fixup_alt_jump to right after `b', and leave behind three
2778 bytes which we'll fill in when we get to after `c'. */
2781 STORE_JUMP (jump_past_alt
, fixup_alt_jump
, b
);
2783 /* Mark and leave space for a jump after this alternative,
2784 to be filled in later either by next alternative or
2785 when know we're at the end of a series of alternatives. */
2787 GET_BUFFER_SPACE (3);
2796 /* If \{ is a literal. */
2797 if (!(syntax
& RE_INTERVALS
)
2798 /* If we're at `\{' and it's not the open-interval
2800 || ((syntax
& RE_INTERVALS
) && (syntax
& RE_NO_BK_BRACES
))
2801 || (p
- 2 == pattern
&& p
== pend
))
2802 goto normal_backslash
;
2806 /* If got here, then the syntax allows intervals. */
2808 /* At least (most) this many matches must be made. */
2809 int lower_bound
= -1, upper_bound
= -1;
2811 beg_interval
= p
- 1;
2815 if (syntax
& RE_NO_BK_BRACES
)
2816 goto unfetch_interval
;
2818 FREE_STACK_RETURN (REG_EBRACE
);
2821 GET_UNSIGNED_NUMBER (lower_bound
);
2825 GET_UNSIGNED_NUMBER (upper_bound
);
2826 if (upper_bound
< 0) upper_bound
= RE_DUP_MAX
;
2829 /* Interval such as `{1}' => match exactly once. */
2830 upper_bound
= lower_bound
;
2832 if (lower_bound
< 0 || upper_bound
> RE_DUP_MAX
2833 || lower_bound
> upper_bound
)
2835 if (syntax
& RE_NO_BK_BRACES
)
2836 goto unfetch_interval
;
2838 FREE_STACK_RETURN (REG_BADBR
);
2841 if (!(syntax
& RE_NO_BK_BRACES
))
2843 if (c
!= '\\') FREE_STACK_RETURN (REG_EBRACE
);
2850 if (syntax
& RE_NO_BK_BRACES
)
2851 goto unfetch_interval
;
2853 FREE_STACK_RETURN (REG_BADBR
);
2856 /* We just parsed a valid interval. */
2858 /* If it's invalid to have no preceding re. */
2861 if (syntax
& RE_CONTEXT_INVALID_OPS
)
2862 FREE_STACK_RETURN (REG_BADRPT
);
2863 else if (syntax
& RE_CONTEXT_INDEP_OPS
)
2866 goto unfetch_interval
;
2869 /* If the upper bound is zero, don't want to succeed at
2870 all; jump from `laststart' to `b + 3', which will be
2871 the end of the buffer after we insert the jump. */
2872 if (upper_bound
== 0)
2874 GET_BUFFER_SPACE (3);
2875 INSERT_JUMP (jump
, laststart
, b
+ 3);
2879 /* Otherwise, we have a nontrivial interval. When
2880 we're all done, the pattern will look like:
2881 set_number_at <jump count> <upper bound>
2882 set_number_at <succeed_n count> <lower bound>
2883 succeed_n <after jump addr> <succeed_n count>
2885 jump_n <succeed_n addr> <jump count>
2886 (The upper bound and `jump_n' are omitted if
2887 `upper_bound' is 1, though.) */
2889 { /* If the upper bound is > 1, we need to insert
2890 more at the end of the loop. */
2891 unsigned nbytes
= 10 + (upper_bound
> 1) * 10;
2893 GET_BUFFER_SPACE (nbytes
);
2895 /* Initialize lower bound of the `succeed_n', even
2896 though it will be set during matching by its
2897 attendant `set_number_at' (inserted next),
2898 because `re_compile_fastmap' needs to know.
2899 Jump to the `jump_n' we might insert below. */
2900 INSERT_JUMP2 (succeed_n
, laststart
,
2901 b
+ 5 + (upper_bound
> 1) * 5,
2905 /* Code to initialize the lower bound. Insert
2906 before the `succeed_n'. The `5' is the last two
2907 bytes of this `set_number_at', plus 3 bytes of
2908 the following `succeed_n'. */
2909 insert_op2 (set_number_at
, laststart
, 5, lower_bound
, b
);
2912 if (upper_bound
> 1)
2913 { /* More than one repetition is allowed, so
2914 append a backward jump to the `succeed_n'
2915 that starts this interval.
2917 When we've reached this during matching,
2918 we'll have matched the interval once, so
2919 jump back only `upper_bound - 1' times. */
2920 STORE_JUMP2 (jump_n
, b
, laststart
+ 5,
2924 /* The location we want to set is the second
2925 parameter of the `jump_n'; that is `b-2' as
2926 an absolute address. `laststart' will be
2927 the `set_number_at' we're about to insert;
2928 `laststart+3' the number to set, the source
2929 for the relative address. But we are
2930 inserting into the middle of the pattern --
2931 so everything is getting moved up by 5.
2932 Conclusion: (b - 2) - (laststart + 3) + 5,
2933 i.e., b - laststart.
2935 We insert this at the beginning of the loop
2936 so that if we fail during matching, we'll
2937 reinitialize the bounds. */
2938 insert_op2 (set_number_at
, laststart
, b
- laststart
,
2939 upper_bound
- 1, b
);
2944 beg_interval
= NULL
;
2949 /* If an invalid interval, match the characters as literals. */
2950 assert (beg_interval
);
2952 beg_interval
= NULL
;
2954 /* normal_char and normal_backslash need `c'. */
2957 if (!(syntax
& RE_NO_BK_BRACES
))
2959 if (p
> pattern
&& p
[-1] == '\\')
2960 goto normal_backslash
;
2965 /* There is no way to specify the before_dot and after_dot
2966 operators. rms says this is ok. --karl */
2974 BUF_PUSH_2 (syntaxspec
, syntax_spec_code
[c
]);
2980 BUF_PUSH_2 (notsyntaxspec
, syntax_spec_code
[c
]);
2986 BUF_PUSH_2 (categoryspec
, c
);
2992 BUF_PUSH_2 (notcategoryspec
, c
);
2999 BUF_PUSH (wordchar
);
3005 BUF_PUSH (notwordchar
);
3018 BUF_PUSH (wordbound
);
3022 BUF_PUSH (notwordbound
);
3033 case '1': case '2': case '3': case '4': case '5':
3034 case '6': case '7': case '8': case '9':
3035 if (syntax
& RE_NO_BK_REFS
)
3041 FREE_STACK_RETURN (REG_ESUBREG
);
3043 /* Can't back reference to a subexpression if inside of it. */
3044 if (group_in_compile_stack (compile_stack
, c1
))
3048 BUF_PUSH_2 (duplicate
, c1
);
3054 if (syntax
& RE_BK_PLUS_QM
)
3057 goto normal_backslash
;
3061 /* You might think it would be useful for \ to mean
3062 not to translate; but if we don't translate it
3063 it will never match anything. */
3071 /* Expects the character in `c'. */
3073 p1
= p
- 1; /* P1 points the head of C. */
3075 if (bufp
->multibyte
)
3077 c
= STRING_CHAR (p1
, pend
- p1
);
3079 /* Set P to the next character boundary. */
3080 p
+= MULTIBYTE_FORM_LENGTH (p1
, pend
- p1
) - 1;
3083 /* If no exactn currently being built. */
3086 /* If last exactn not at current position. */
3087 || pending_exact
+ *pending_exact
+ 1 != b
3089 /* We have only one byte following the exactn for the count. */
3090 || *pending_exact
>= (1 << BYTEWIDTH
) - (p
- p1
)
3092 /* If followed by a repetition operator. */
3093 || (p
!= pend
&& (*p
== '*' || *p
== '^'))
3094 || ((syntax
& RE_BK_PLUS_QM
)
3095 ? p
+ 1 < pend
&& *p
== '\\' && (p
[1] == '+' || p
[1] == '?')
3096 : p
!= pend
&& (*p
== '+' || *p
== '?'))
3097 || ((syntax
& RE_INTERVALS
)
3098 && ((syntax
& RE_NO_BK_BRACES
)
3099 ? p
!= pend
&& *p
== '{'
3100 : p
+ 1 < pend
&& p
[0] == '\\' && p
[1] == '{')))
3102 /* Start building a new exactn. */
3106 BUF_PUSH_2 (exactn
, 0);
3107 pending_exact
= b
- 1;
3111 if (! SINGLE_BYTE_CHAR_P (c
))
3113 unsigned char work
[4], *str
;
3114 int i
= CHAR_STRING (c
, work
, str
);
3116 for (j
= 0; j
< i
; j
++)
3130 } /* while p != pend */
3133 /* Through the pattern now. */
3136 STORE_JUMP (jump_past_alt
, fixup_alt_jump
, b
);
3138 if (!COMPILE_STACK_EMPTY
)
3139 FREE_STACK_RETURN (REG_EPAREN
);
3141 /* If we don't want backtracking, force success
3142 the first time we reach the end of the compiled pattern. */
3143 if (syntax
& RE_NO_POSIX_BACKTRACKING
)
3146 free (compile_stack
.stack
);
3148 /* We have succeeded; set the length of the buffer. */
3149 bufp
->used
= b
- bufp
->buffer
;
3154 DEBUG_PRINT1 ("\nCompiled pattern: \n");
3155 print_compiled_pattern (bufp
);
3159 #ifndef MATCH_MAY_ALLOCATE
3160 /* Initialize the failure stack to the largest possible stack. This
3161 isn't necessary unless we're trying to avoid calling alloca in
3162 the search and match routines. */
3164 int num_regs
= bufp
->re_nsub
+ 1;
3166 if (fail_stack
.size
< re_max_failures
* TYPICAL_FAILURE_SIZE
)
3168 fail_stack
.size
= re_max_failures
* TYPICAL_FAILURE_SIZE
;
3171 if (! fail_stack
.stack
)
3173 = (fail_stack_elt_t
*) xmalloc (fail_stack
.size
3174 * sizeof (fail_stack_elt_t
));
3177 = (fail_stack_elt_t
*) xrealloc (fail_stack
.stack
,
3179 * sizeof (fail_stack_elt_t
)));
3180 #else /* not emacs */
3181 if (! fail_stack
.stack
)
3183 = (fail_stack_elt_t
*) malloc (fail_stack
.size
3184 * sizeof (fail_stack_elt_t
));
3187 = (fail_stack_elt_t
*) realloc (fail_stack
.stack
,
3189 * sizeof (fail_stack_elt_t
)));
3190 #endif /* not emacs */
3193 regex_grow_registers (num_regs
);
3195 #endif /* not MATCH_MAY_ALLOCATE */
3198 } /* regex_compile */
3200 /* Subroutines for `regex_compile'. */
3202 /* Store OP at LOC followed by two-byte integer parameter ARG. */
3205 store_op1 (op
, loc
, arg
)
3210 *loc
= (unsigned char) op
;
3211 STORE_NUMBER (loc
+ 1, arg
);
3215 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
3218 store_op2 (op
, loc
, arg1
, arg2
)
3223 *loc
= (unsigned char) op
;
3224 STORE_NUMBER (loc
+ 1, arg1
);
3225 STORE_NUMBER (loc
+ 3, arg2
);
3229 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3230 for OP followed by two-byte integer parameter ARG. */
3233 insert_op1 (op
, loc
, arg
, end
)
3239 register unsigned char *pfrom
= end
;
3240 register unsigned char *pto
= end
+ 3;
3242 while (pfrom
!= loc
)
3245 store_op1 (op
, loc
, arg
);
3249 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
3252 insert_op2 (op
, loc
, arg1
, arg2
, end
)
3258 register unsigned char *pfrom
= end
;
3259 register unsigned char *pto
= end
+ 5;
3261 while (pfrom
!= loc
)
3264 store_op2 (op
, loc
, arg1
, arg2
);
3268 /* P points to just after a ^ in PATTERN. Return true if that ^ comes
3269 after an alternative or a begin-subexpression. We assume there is at
3270 least one character before the ^. */
3273 at_begline_loc_p (pattern
, p
, syntax
)
3274 const char *pattern
, *p
;
3275 reg_syntax_t syntax
;
3277 const char *prev
= p
- 2;
3278 boolean prev_prev_backslash
= prev
> pattern
&& prev
[-1] == '\\';
3281 /* After a subexpression? */
3282 (*prev
== '(' && (syntax
& RE_NO_BK_PARENS
|| prev_prev_backslash
))
3283 /* After an alternative? */
3284 || (*prev
== '|' && (syntax
& RE_NO_BK_VBAR
|| prev_prev_backslash
));
3288 /* The dual of at_begline_loc_p. This one is for $. We assume there is
3289 at least one character after the $, i.e., `P < PEND'. */
3292 at_endline_loc_p (p
, pend
, syntax
)
3293 const char *p
, *pend
;
3296 const char *next
= p
;
3297 boolean next_backslash
= *next
== '\\';
3298 const char *next_next
= p
+ 1 < pend
? p
+ 1 : 0;
3301 /* Before a subexpression? */
3302 (syntax
& RE_NO_BK_PARENS
? *next
== ')'
3303 : next_backslash
&& next_next
&& *next_next
== ')')
3304 /* Before an alternative? */
3305 || (syntax
& RE_NO_BK_VBAR
? *next
== '|'
3306 : next_backslash
&& next_next
&& *next_next
== '|');
3310 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3311 false if it's not. */
3314 group_in_compile_stack (compile_stack
, regnum
)
3315 compile_stack_type compile_stack
;
3320 for (this_element
= compile_stack
.avail
- 1;
3323 if (compile_stack
.stack
[this_element
].regnum
== regnum
)
3329 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3330 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
3331 characters can start a string that matches the pattern. This fastmap
3332 is used by re_search to skip quickly over impossible starting points.
3334 Character codes above (1 << BYTEWIDTH) are not represented in the
3335 fastmap, but the leading codes are represented. Thus, the fastmap
3336 indicates which character sets could start a match.
3338 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3339 area as BUFP->fastmap.
3341 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3344 Returns 0 if we succeed, -2 if an internal error. */
3347 re_compile_fastmap (bufp
)
3348 struct re_pattern_buffer
*bufp
;
3351 #ifdef MATCH_MAY_ALLOCATE
3352 fail_stack_type fail_stack
;
3354 #ifndef REGEX_MALLOC
3357 /* We don't push any register information onto the failure stack. */
3358 unsigned num_regs
= 0;
3360 register char *fastmap
= bufp
->fastmap
;
3361 unsigned char *pattern
= bufp
->buffer
;
3362 unsigned long size
= bufp
->used
;
3363 unsigned char *p
= pattern
;
3364 register unsigned char *pend
= pattern
+ size
;
3366 /* This holds the pointer to the failure stack, when
3367 it is allocated relocatably. */
3368 fail_stack_elt_t
*failure_stack_ptr
;
3370 /* Assume that each path through the pattern can be null until
3371 proven otherwise. We set this false at the bottom of switch
3372 statement, to which we get only if a particular path doesn't
3373 match the empty string. */
3374 boolean path_can_be_null
= true;
3376 /* We aren't doing a `succeed_n' to begin with. */
3377 boolean succeed_n_p
= false;
3379 /* If all elements for base leading-codes in fastmap is set, this
3380 flag is set true. */
3381 boolean match_any_multibyte_characters
= false;
3383 /* Maximum code of simple (single byte) character. */
3384 int simple_char_max
;
3386 assert (fastmap
!= NULL
&& p
!= NULL
);
3389 bzero (fastmap
, 1 << BYTEWIDTH
); /* Assume nothing's valid. */
3390 bufp
->fastmap_accurate
= 1; /* It will be when we're done. */
3391 bufp
->can_be_null
= 0;
3395 if (p
== pend
|| *p
== succeed
)
3397 /* We have reached the (effective) end of pattern. */
3398 if (!FAIL_STACK_EMPTY ())
3400 bufp
->can_be_null
|= path_can_be_null
;
3402 /* Reset for next path. */
3403 path_can_be_null
= true;
3405 p
= fail_stack
.stack
[--fail_stack
.avail
].pointer
;
3413 /* We should never be about to go beyond the end of the pattern. */
3416 switch (SWITCH_ENUM_CAST ((re_opcode_t
) *p
++))
3419 /* I guess the idea here is to simply not bother with a fastmap
3420 if a backreference is used, since it's too hard to figure out
3421 the fastmap for the corresponding group. Setting
3422 `can_be_null' stops `re_search_2' from using the fastmap, so
3423 that is all we do. */
3425 bufp
->can_be_null
= 1;
3429 /* Following are the cases which match a character. These end
3440 int length
= (*p
& 0x7f);;
3443 for (j
= length
* BYTEWIDTH
- 1; j
>= 0; j
--)
3444 if (p
[j
/ BYTEWIDTH
] & (1 << (j
% BYTEWIDTH
)))
3450 /* Chars beyond end of map must be allowed. */
3452 int length
= (*p
& 0x7f);;
3455 for (j
= length
* BYTEWIDTH
; j
< (1 << BYTEWIDTH
); j
++)
3458 for (j
= length
* BYTEWIDTH
- 1; j
>= 0; j
--)
3459 if (!(p
[j
/ BYTEWIDTH
] & (1 << (j
% BYTEWIDTH
))))
3465 for (j
= 0; j
< (1 << BYTEWIDTH
); j
++)
3466 if (SYNTAX (j
) == Sword
)
3472 for (j
= 0; j
< (1 << BYTEWIDTH
); j
++)
3473 if (SYNTAX (j
) != Sword
)
3478 for (j
= CHARSET_BITMAP_SIZE (&p
[-1]) * BYTEWIDTH
- 1, p
++;
3480 if (p
[j
/ BYTEWIDTH
] & (1 << (j
% BYTEWIDTH
)))
3483 /* If we can match a character class, we can match
3484 any character set. */
3485 if (CHARSET_RANGE_TABLE_EXISTS_P (&p
[-2])
3486 && CHARSET_RANGE_TABLE_BITS (&p
[-2]) != 0)
3487 goto set_fastmap_for_multibyte_characters
;
3489 if (CHARSET_RANGE_TABLE_EXISTS_P (&p
[-2])
3490 && match_any_multibyte_characters
== false)
3492 /* Set fastmap[I] 1 where I is a base leading code of each
3493 multibyte character in the range table. */
3496 /* Make P points the range table. */
3497 p
+= CHARSET_BITMAP_SIZE (&p
[-2]);
3499 /* Extract the number of ranges in range table into COUNT. */
3500 EXTRACT_NUMBER_AND_INCR (count
, p
);
3501 for (; count
> 0; count
--, p
+= 2 * 3) /* XXX */
3503 /* Extract the start of each range. */
3504 EXTRACT_CHARACTER (c
, p
);
3505 j
= CHAR_CHARSET (c
);
3506 fastmap
[CHARSET_LEADING_CODE_BASE (j
)] = 1;
3513 /* Chars beyond end of bitmap are possible matches.
3514 All the single-byte codes can occur in multibyte buffers.
3515 So any that are not listed in the charset
3516 are possible matches, even in multibyte buffers. */
3517 simple_char_max
= (1 << BYTEWIDTH
);
3518 for (j
= CHARSET_BITMAP_SIZE (&p
[-1]) * BYTEWIDTH
;
3519 j
< simple_char_max
; j
++)
3522 for (j
= CHARSET_BITMAP_SIZE (&p
[-1]) * BYTEWIDTH
- 1, p
++;
3524 if (!(p
[j
/ BYTEWIDTH
] & (1 << (j
% BYTEWIDTH
))))
3527 if (bufp
->multibyte
)
3528 /* Any character set can possibly contain a character
3529 which doesn't match the specified set of characters. */
3531 set_fastmap_for_multibyte_characters
:
3532 if (match_any_multibyte_characters
== false)
3534 for (j
= 0x80; j
< 0xA0; j
++) /* XXX */
3535 if (BASE_LEADING_CODE_P (j
))
3537 match_any_multibyte_characters
= true;
3544 /* All the single-byte codes can occur in multibyte buffers,
3545 and they may have word syntax. So do consider them. */
3546 simple_char_max
= (1 << BYTEWIDTH
);
3547 for (j
= 0; j
< simple_char_max
; j
++)
3548 if (SYNTAX (j
) == Sword
)
3551 if (bufp
->multibyte
)
3552 /* Any character set can possibly contain a character
3553 whose syntax is `Sword'. */
3554 goto set_fastmap_for_multibyte_characters
;
3559 /* All the single-byte codes can occur in multibyte buffers,
3560 and they may not have word syntax. So do consider them. */
3561 simple_char_max
= (1 << BYTEWIDTH
);
3562 for (j
= 0; j
< simple_char_max
; j
++)
3563 if (SYNTAX (j
) != Sword
)
3566 if (bufp
->multibyte
)
3567 /* Any character set can possibly contain a character
3568 whose syntax is not `Sword'. */
3569 goto set_fastmap_for_multibyte_characters
;
3575 int fastmap_newline
= fastmap
['\n'];
3577 /* `.' matches anything, except perhaps newline.
3578 Even in a multibyte buffer, it should match any
3579 conceivable byte value for the fastmap. */
3580 if (bufp
->multibyte
)
3581 match_any_multibyte_characters
= true;
3583 simple_char_max
= (1 << BYTEWIDTH
);
3584 for (j
= 0; j
< simple_char_max
; j
++)
3587 /* ... except perhaps newline. */
3588 if (!(bufp
->syntax
& RE_DOT_NEWLINE
))
3589 fastmap
['\n'] = fastmap_newline
;
3591 /* Return if we have already set `can_be_null'; if we have,
3592 then the fastmap is irrelevant. Something's wrong here. */
3593 else if (bufp
->can_be_null
)
3596 /* Otherwise, have to check alternative paths. */
3607 /* This match depends on text properties. These end with
3608 aborting optimizations. */
3609 bufp
->can_be_null
= 1;
3613 simple_char_max
= bufp
->multibyte
? 0x80 : (1 << BYTEWIDTH
);
3614 for (j
= 0; j
< simple_char_max
; j
++)
3615 if (SYNTAX (j
) == (enum syntaxcode
) k
)
3618 if (bufp
->multibyte
)
3619 /* Any character set can possibly contain a character
3620 whose syntax is K. */
3621 goto set_fastmap_for_multibyte_characters
;
3626 simple_char_max
= bufp
->multibyte
? 0x80 : (1 << BYTEWIDTH
);
3627 for (j
= 0; j
< simple_char_max
; j
++)
3628 if (SYNTAX (j
) != (enum syntaxcode
) k
)
3631 if (bufp
->multibyte
)
3632 /* Any character set can possibly contain a character
3633 whose syntax is not K. */
3634 goto set_fastmap_for_multibyte_characters
;
3641 simple_char_max
= (1 << BYTEWIDTH
);
3642 for (j
= 0; j
< simple_char_max
; j
++)
3643 if (CHAR_HAS_CATEGORY (j
, k
))
3646 if (bufp
->multibyte
)
3647 /* Any character set can possibly contain a character
3648 whose category is K. */
3649 goto set_fastmap_for_multibyte_characters
;
3653 case notcategoryspec
:
3655 simple_char_max
= (1 << BYTEWIDTH
);
3656 for (j
= 0; j
< simple_char_max
; j
++)
3657 if (!CHAR_HAS_CATEGORY (j
, k
))
3660 if (bufp
->multibyte
)
3661 /* Any character set can possibly contain a character
3662 whose category is not K. */
3663 goto set_fastmap_for_multibyte_characters
;
3666 /* All cases after this match the empty string. These end with
3688 case push_dummy_failure
:
3693 case pop_failure_jump
:
3694 case maybe_pop_jump
:
3697 case dummy_failure_jump
:
3698 EXTRACT_NUMBER_AND_INCR (j
, p
);
3703 /* Jump backward implies we just went through the body of a
3704 loop and matched nothing. Opcode jumped to should be
3705 `on_failure_jump' or `succeed_n'. Just treat it like an
3706 ordinary jump. For a * loop, it has pushed its failure
3707 point already; if so, discard that as redundant. */
3708 if ((re_opcode_t
) *p
!= on_failure_jump
3709 && (re_opcode_t
) *p
!= succeed_n
)
3713 EXTRACT_NUMBER_AND_INCR (j
, p
);
3716 /* If what's on the stack is where we are now, pop it. */
3717 if (!FAIL_STACK_EMPTY ()
3718 && fail_stack
.stack
[fail_stack
.avail
- 1].pointer
== p
)
3724 case on_failure_jump
:
3725 case on_failure_keep_string_jump
:
3726 handle_on_failure_jump
:
3727 EXTRACT_NUMBER_AND_INCR (j
, p
);
3729 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3730 end of the pattern. We don't want to push such a point,
3731 since when we restore it above, entering the switch will
3732 increment `p' past the end of the pattern. We don't need
3733 to push such a point since we obviously won't find any more
3734 fastmap entries beyond `pend'. Such a pattern can match
3735 the null string, though. */
3738 if (!PUSH_PATTERN_OP (p
+ j
, fail_stack
))
3740 RESET_FAIL_STACK ();
3745 bufp
->can_be_null
= 1;
3749 EXTRACT_NUMBER_AND_INCR (k
, p
); /* Skip the n. */
3750 succeed_n_p
= false;
3757 /* Get to the number of times to succeed. */
3760 /* Increment p past the n for when k != 0. */
3761 EXTRACT_NUMBER_AND_INCR (k
, p
);
3765 succeed_n_p
= true; /* Spaghetti code alert. */
3766 goto handle_on_failure_jump
;
3783 abort (); /* We have listed all the cases. */
3786 /* Getting here means we have found the possible starting
3787 characters for one path of the pattern -- and that the empty
3788 string does not match. We need not follow this path further.
3789 Instead, look at the next alternative (remembered on the
3790 stack), or quit if no more. The test at the top of the loop
3791 does these things. */
3792 path_can_be_null
= false;
3796 /* Set `can_be_null' for the last path (also the first path, if the
3797 pattern is empty). */
3798 bufp
->can_be_null
|= path_can_be_null
;
3801 RESET_FAIL_STACK ();
3803 } /* re_compile_fastmap */
3805 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3806 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
3807 this memory for recording register information. STARTS and ENDS
3808 must be allocated using the malloc library routine, and must each
3809 be at least NUM_REGS * sizeof (regoff_t) bytes long.
3811 If NUM_REGS == 0, then subsequent matches should allocate their own
3814 Unless this function is called, the first search or match using
3815 PATTERN_BUFFER will allocate its own register data, without
3816 freeing the old data. */
3819 re_set_registers (bufp
, regs
, num_regs
, starts
, ends
)
3820 struct re_pattern_buffer
*bufp
;
3821 struct re_registers
*regs
;
3823 regoff_t
*starts
, *ends
;
3827 bufp
->regs_allocated
= REGS_REALLOCATE
;
3828 regs
->num_regs
= num_regs
;
3829 regs
->start
= starts
;
3834 bufp
->regs_allocated
= REGS_UNALLOCATED
;
3836 regs
->start
= regs
->end
= (regoff_t
*) 0;
3840 /* Searching routines. */
3842 /* Like re_search_2, below, but only one string is specified, and
3843 doesn't let you say where to stop matching. */
3846 re_search (bufp
, string
, size
, startpos
, range
, regs
)
3847 struct re_pattern_buffer
*bufp
;
3849 int size
, startpos
, range
;
3850 struct re_registers
*regs
;
3852 return re_search_2 (bufp
, NULL
, 0, string
, size
, startpos
, range
,
3856 /* End address of virtual concatenation of string. */
3857 #define STOP_ADDR_VSTRING(P) \
3858 (((P) >= size1 ? string2 + size2 : string1 + size1))
3860 /* Address of POS in the concatenation of virtual string. */
3861 #define POS_ADDR_VSTRING(POS) \
3862 (((POS) >= size1 ? string2 - size1 : string1) + (POS))
3864 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3865 virtual concatenation of STRING1 and STRING2, starting first at index
3866 STARTPOS, then at STARTPOS + 1, and so on.
3868 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3870 RANGE is how far to scan while trying to match. RANGE = 0 means try
3871 only at STARTPOS; in general, the last start tried is STARTPOS +
3874 In REGS, return the indices of the virtual concatenation of STRING1
3875 and STRING2 that matched the entire BUFP->buffer and its contained
3878 Do not consider matching one past the index STOP in the virtual
3879 concatenation of STRING1 and STRING2.
3881 We return either the position in the strings at which the match was
3882 found, -1 if no match, or -2 if error (such as failure
3886 re_search_2 (bufp
, string1
, size1
, string2
, size2
, startpos
, range
, regs
, stop
)
3887 struct re_pattern_buffer
*bufp
;
3888 const char *string1
, *string2
;
3892 struct re_registers
*regs
;
3896 register char *fastmap
= bufp
->fastmap
;
3897 register RE_TRANSLATE_TYPE translate
= bufp
->translate
;
3898 int total_size
= size1
+ size2
;
3899 int endpos
= startpos
+ range
;
3900 int anchored_start
= 0;
3902 /* Nonzero if we have to concern multibyte character. */
3903 int multibyte
= bufp
->multibyte
;
3905 /* Check for out-of-range STARTPOS. */
3906 if (startpos
< 0 || startpos
> total_size
)
3909 /* Fix up RANGE if it might eventually take us outside
3910 the virtual concatenation of STRING1 and STRING2.
3911 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
3913 range
= 0 - startpos
;
3914 else if (endpos
> total_size
)
3915 range
= total_size
- startpos
;
3917 /* If the search isn't to be a backwards one, don't waste time in a
3918 search for a pattern anchored at beginning of buffer. */
3919 if (bufp
->used
> 0 && (re_opcode_t
) bufp
->buffer
[0] == begbuf
&& range
> 0)
3928 /* In a forward search for something that starts with \=.
3929 don't keep searching past point. */
3930 if (bufp
->used
> 0 && (re_opcode_t
) bufp
->buffer
[0] == at_dot
&& range
> 0)
3932 range
= PT_BYTE
- BEGV_BYTE
- startpos
;
3938 /* Update the fastmap now if not correct already. */
3939 if (fastmap
&& !bufp
->fastmap_accurate
)
3940 if (re_compile_fastmap (bufp
) == -2)
3943 /* See whether the pattern is anchored. */
3944 if (bufp
->buffer
[0] == begline
)
3948 gl_state
.object
= re_match_object
;
3950 int adjpos
= NILP (re_match_object
) || BUFFERP (re_match_object
);
3951 int charpos
= SYNTAX_TABLE_BYTE_TO_CHAR (startpos
+ adjpos
);
3953 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object
, charpos
, 1);
3957 /* Loop through the string, looking for a place to start matching. */
3960 /* If the pattern is anchored,
3961 skip quickly past places we cannot match.
3962 We don't bother to treat startpos == 0 specially
3963 because that case doesn't repeat. */
3964 if (anchored_start
&& startpos
> 0)
3966 if (! (bufp
->newline_anchor
3967 && ((startpos
<= size1
? string1
[startpos
- 1]
3968 : string2
[startpos
- size1
- 1])
3973 /* If a fastmap is supplied, skip quickly over characters that
3974 cannot be the start of a match. If the pattern can match the
3975 null string, however, we don't need to skip characters; we want
3976 the first null string. */
3977 if (fastmap
&& startpos
< total_size
&& !bufp
->can_be_null
)
3979 register const char *d
;
3980 register unsigned int buf_ch
;
3982 d
= POS_ADDR_VSTRING (startpos
);
3984 if (range
> 0) /* Searching forwards. */
3986 register int lim
= 0;
3989 if (startpos
< size1
&& startpos
+ range
>= size1
)
3990 lim
= range
- (size1
- startpos
);
3992 /* Written out as an if-else to avoid testing `translate'
3994 if (RE_TRANSLATE_P (translate
))
4001 buf_ch
= STRING_CHAR_AND_LENGTH (d
, range
- lim
,
4004 buf_ch
= RE_TRANSLATE (translate
, buf_ch
);
4009 range
-= buf_charlen
;
4014 && !fastmap
[(unsigned char)
4015 RE_TRANSLATE (translate
, (unsigned char) *d
)])
4022 while (range
> lim
&& !fastmap
[(unsigned char) *d
])
4028 startpos
+= irange
- range
;
4030 else /* Searching backwards. */
4032 int room
= (size1
== 0 || startpos
>= size1
4033 ? size2
+ size1
- startpos
4034 : size1
- startpos
);
4036 buf_ch
= STRING_CHAR (d
, room
);
4037 if (RE_TRANSLATE_P (translate
))
4038 buf_ch
= RE_TRANSLATE (translate
, buf_ch
);
4040 if (! (buf_ch
>= 0400
4041 || fastmap
[buf_ch
]))
4046 /* If can't match the null string, and that's all we have left, fail. */
4047 if (range
>= 0 && startpos
== total_size
&& fastmap
4048 && !bufp
->can_be_null
)
4051 val
= re_match_2_internal (bufp
, string1
, size1
, string2
, size2
,
4052 startpos
, regs
, stop
);
4053 #ifndef REGEX_MALLOC
4070 /* Update STARTPOS to the next character boundary. */
4073 const unsigned char *p
4074 = (const unsigned char *) POS_ADDR_VSTRING (startpos
);
4075 const unsigned char *pend
4076 = (const unsigned char *) STOP_ADDR_VSTRING (startpos
);
4077 int len
= MULTIBYTE_FORM_LENGTH (p
, pend
- p
);
4095 /* Update STARTPOS to the previous character boundary. */
4098 const unsigned char *p
4099 = (const unsigned char *) POS_ADDR_VSTRING (startpos
);
4102 /* Find the head of multibyte form. */
4103 while (!CHAR_HEAD_P (*p
))
4108 if (MULTIBYTE_FORM_LENGTH (p
, len
+ 1) != (len
+ 1))
4125 /* Declarations and macros for re_match_2. */
4127 static int bcmp_translate ();
4128 static boolean
alt_match_null_string_p (),
4129 common_op_match_null_string_p (),
4130 group_match_null_string_p ();
4132 /* This converts PTR, a pointer into one of the search strings `string1'
4133 and `string2' into an offset from the beginning of that string. */
4134 #define POINTER_TO_OFFSET(ptr) \
4135 (FIRST_STRING_P (ptr) \
4136 ? ((regoff_t) ((ptr) - string1)) \
4137 : ((regoff_t) ((ptr) - string2 + size1)))
4139 /* Macros for dealing with the split strings in re_match_2. */
4141 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
4143 /* Call before fetching a character with *d. This switches over to
4144 string2 if necessary. */
4145 #define PREFETCH() \
4148 /* End of string2 => fail. */ \
4149 if (dend == end_match_2) \
4151 /* End of string1 => advance to string2. */ \
4153 dend = end_match_2; \
4157 /* Test if at very beginning or at very end of the virtual concatenation
4158 of `string1' and `string2'. If only one string, it's `string2'. */
4159 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4160 #define AT_STRINGS_END(d) ((d) == end2)
4163 /* Test if D points to a character which is word-constituent. We have
4164 two special cases to check for: if past the end of string1, look at
4165 the first character in string2; and if before the beginning of
4166 string2, look at the last character in string1. */
4167 #define WORDCHAR_P(d) \
4168 (SYNTAX ((d) == end1 ? *string2 \
4169 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
4172 /* Disabled due to a compiler bug -- see comment at case wordbound */
4174 /* The comment at case wordbound is following one, but we don't use
4175 AT_WORD_BOUNDARY anymore to support multibyte form.
4177 The DEC Alpha C compiler 3.x generates incorrect code for the
4178 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
4179 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
4180 macro and introducing temporary variables works around the bug. */
4183 /* Test if the character before D and the one at D differ with respect
4184 to being word-constituent. */
4185 #define AT_WORD_BOUNDARY(d) \
4186 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
4187 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4190 /* Free everything we malloc. */
4191 #ifdef MATCH_MAY_ALLOCATE
4192 #define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4193 #define FREE_VARIABLES() \
4195 REGEX_FREE_STACK (fail_stack.stack); \
4196 FREE_VAR (regstart); \
4197 FREE_VAR (regend); \
4198 FREE_VAR (old_regstart); \
4199 FREE_VAR (old_regend); \
4200 FREE_VAR (best_regstart); \
4201 FREE_VAR (best_regend); \
4202 FREE_VAR (reg_info); \
4203 FREE_VAR (reg_dummy); \
4204 FREE_VAR (reg_info_dummy); \
4207 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4208 #endif /* not MATCH_MAY_ALLOCATE */
4210 /* These values must meet several constraints. They must not be valid
4211 register values; since we have a limit of 255 registers (because
4212 we use only one byte in the pattern for the register number), we can
4213 use numbers larger than 255. They must differ by 1, because of
4214 NUM_FAILURE_ITEMS above. And the value for the lowest register must
4215 be larger than the value for the highest register, so we do not try
4216 to actually save any registers when none are active. */
4217 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4218 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4220 /* Matching routines. */
4222 #ifndef emacs /* Emacs never uses this. */
4223 /* re_match is like re_match_2 except it takes only a single string. */
4226 re_match (bufp
, string
, size
, pos
, regs
)
4227 struct re_pattern_buffer
*bufp
;
4230 struct re_registers
*regs
;
4232 int result
= re_match_2_internal (bufp
, NULL
, 0, string
, size
,
4237 #endif /* not emacs */
4240 /* In Emacs, this is the string or buffer in which we
4241 are matching. It is used for looking up syntax properties. */
4242 Lisp_Object re_match_object
;
4245 /* re_match_2 matches the compiled pattern in BUFP against the
4246 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4247 and SIZE2, respectively). We start matching at POS, and stop
4250 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4251 store offsets for the substring each group matched in REGS. See the
4252 documentation for exactly how many groups we fill.
4254 We return -1 if no match, -2 if an internal error (such as the
4255 failure stack overflowing). Otherwise, we return the length of the
4256 matched substring. */
4259 re_match_2 (bufp
, string1
, size1
, string2
, size2
, pos
, regs
, stop
)
4260 struct re_pattern_buffer
*bufp
;
4261 const char *string1
, *string2
;
4264 struct re_registers
*regs
;
4271 int adjpos
= NILP (re_match_object
) || BUFFERP (re_match_object
);
4272 gl_state
.object
= re_match_object
;
4273 charpos
= SYNTAX_TABLE_BYTE_TO_CHAR (pos
+ adjpos
);
4274 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object
, charpos
, 1);
4277 result
= re_match_2_internal (bufp
, string1
, size1
, string2
, size2
,
4283 /* This is a separate function so that we can force an alloca cleanup
4286 re_match_2_internal (bufp
, string1
, size1
, string2
, size2
, pos
, regs
, stop
)
4287 struct re_pattern_buffer
*bufp
;
4288 const char *string1
, *string2
;
4291 struct re_registers
*regs
;
4294 /* General temporaries. */
4298 /* Just past the end of the corresponding string. */
4299 const char *end1
, *end2
;
4301 /* Pointers into string1 and string2, just past the last characters in
4302 each to consider matching. */
4303 const char *end_match_1
, *end_match_2
;
4305 /* Where we are in the data, and the end of the current string. */
4306 const char *d
, *dend
;
4308 /* Where we are in the pattern, and the end of the pattern. */
4309 unsigned char *p
= bufp
->buffer
;
4310 register unsigned char *pend
= p
+ bufp
->used
;
4312 /* Mark the opcode just after a start_memory, so we can test for an
4313 empty subpattern when we get to the stop_memory. */
4314 unsigned char *just_past_start_mem
= 0;
4316 /* We use this to map every character in the string. */
4317 RE_TRANSLATE_TYPE translate
= bufp
->translate
;
4319 /* Nonzero if we have to concern multibyte character. */
4320 int multibyte
= bufp
->multibyte
;
4322 /* Failure point stack. Each place that can handle a failure further
4323 down the line pushes a failure point on this stack. It consists of
4324 restart, regend, and reg_info for all registers corresponding to
4325 the subexpressions we're currently inside, plus the number of such
4326 registers, and, finally, two char *'s. The first char * is where
4327 to resume scanning the pattern; the second one is where to resume
4328 scanning the strings. If the latter is zero, the failure point is
4329 a ``dummy''; if a failure happens and the failure point is a dummy,
4330 it gets discarded and the next next one is tried. */
4331 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4332 fail_stack_type fail_stack
;
4335 static unsigned failure_id
= 0;
4336 unsigned nfailure_points_pushed
= 0, nfailure_points_popped
= 0;
4339 /* This holds the pointer to the failure stack, when
4340 it is allocated relocatably. */
4341 fail_stack_elt_t
*failure_stack_ptr
;
4343 /* We fill all the registers internally, independent of what we
4344 return, for use in backreferences. The number here includes
4345 an element for register zero. */
4346 unsigned num_regs
= bufp
->re_nsub
+ 1;
4348 /* The currently active registers. */
4349 unsigned lowest_active_reg
= NO_LOWEST_ACTIVE_REG
;
4350 unsigned highest_active_reg
= NO_HIGHEST_ACTIVE_REG
;
4352 /* Information on the contents of registers. These are pointers into
4353 the input strings; they record just what was matched (on this
4354 attempt) by a subexpression part of the pattern, that is, the
4355 regnum-th regstart pointer points to where in the pattern we began
4356 matching and the regnum-th regend points to right after where we
4357 stopped matching the regnum-th subexpression. (The zeroth register
4358 keeps track of what the whole pattern matches.) */
4359 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4360 const char **regstart
, **regend
;
4363 /* If a group that's operated upon by a repetition operator fails to
4364 match anything, then the register for its start will need to be
4365 restored because it will have been set to wherever in the string we
4366 are when we last see its open-group operator. Similarly for a
4368 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4369 const char **old_regstart
, **old_regend
;
4372 /* The is_active field of reg_info helps us keep track of which (possibly
4373 nested) subexpressions we are currently in. The matched_something
4374 field of reg_info[reg_num] helps us tell whether or not we have
4375 matched any of the pattern so far this time through the reg_num-th
4376 subexpression. These two fields get reset each time through any
4377 loop their register is in. */
4378 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4379 register_info_type
*reg_info
;
4382 /* The following record the register info as found in the above
4383 variables when we find a match better than any we've seen before.
4384 This happens as we backtrack through the failure points, which in
4385 turn happens only if we have not yet matched the entire string. */
4386 unsigned best_regs_set
= false;
4387 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4388 const char **best_regstart
, **best_regend
;
4391 /* Logically, this is `best_regend[0]'. But we don't want to have to
4392 allocate space for that if we're not allocating space for anything
4393 else (see below). Also, we never need info about register 0 for
4394 any of the other register vectors, and it seems rather a kludge to
4395 treat `best_regend' differently than the rest. So we keep track of
4396 the end of the best match so far in a separate variable. We
4397 initialize this to NULL so that when we backtrack the first time
4398 and need to test it, it's not garbage. */
4399 const char *match_end
= NULL
;
4401 /* This helps SET_REGS_MATCHED avoid doing redundant work. */
4402 int set_regs_matched_done
= 0;
4404 /* Used when we pop values we don't care about. */
4405 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4406 const char **reg_dummy
;
4407 register_info_type
*reg_info_dummy
;
4411 /* Counts the total number of registers pushed. */
4412 unsigned num_regs_pushed
= 0;
4415 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4419 #ifdef MATCH_MAY_ALLOCATE
4420 /* Do not bother to initialize all the register variables if there are
4421 no groups in the pattern, as it takes a fair amount of time. If
4422 there are groups, we include space for register 0 (the whole
4423 pattern), even though we never use it, since it simplifies the
4424 array indexing. We should fix this. */
4427 regstart
= REGEX_TALLOC (num_regs
, const char *);
4428 regend
= REGEX_TALLOC (num_regs
, const char *);
4429 old_regstart
= REGEX_TALLOC (num_regs
, const char *);
4430 old_regend
= REGEX_TALLOC (num_regs
, const char *);
4431 best_regstart
= REGEX_TALLOC (num_regs
, const char *);
4432 best_regend
= REGEX_TALLOC (num_regs
, const char *);
4433 reg_info
= REGEX_TALLOC (num_regs
, register_info_type
);
4434 reg_dummy
= REGEX_TALLOC (num_regs
, const char *);
4435 reg_info_dummy
= REGEX_TALLOC (num_regs
, register_info_type
);
4437 if (!(regstart
&& regend
&& old_regstart
&& old_regend
&& reg_info
4438 && best_regstart
&& best_regend
&& reg_dummy
&& reg_info_dummy
))
4446 /* We must initialize all our variables to NULL, so that
4447 `FREE_VARIABLES' doesn't try to free them. */
4448 regstart
= regend
= old_regstart
= old_regend
= best_regstart
4449 = best_regend
= reg_dummy
= NULL
;
4450 reg_info
= reg_info_dummy
= (register_info_type
*) NULL
;
4452 #endif /* MATCH_MAY_ALLOCATE */
4454 /* The starting position is bogus. */
4455 if (pos
< 0 || pos
> size1
+ size2
)
4461 /* Initialize subexpression text positions to -1 to mark ones that no
4462 start_memory/stop_memory has been seen for. Also initialize the
4463 register information struct. */
4464 for (mcnt
= 1; mcnt
< num_regs
; mcnt
++)
4466 regstart
[mcnt
] = regend
[mcnt
]
4467 = old_regstart
[mcnt
] = old_regend
[mcnt
] = REG_UNSET_VALUE
;
4469 REG_MATCH_NULL_STRING_P (reg_info
[mcnt
]) = MATCH_NULL_UNSET_VALUE
;
4470 IS_ACTIVE (reg_info
[mcnt
]) = 0;
4471 MATCHED_SOMETHING (reg_info
[mcnt
]) = 0;
4472 EVER_MATCHED_SOMETHING (reg_info
[mcnt
]) = 0;
4475 /* We move `string1' into `string2' if the latter's empty -- but not if
4476 `string1' is null. */
4477 if (size2
== 0 && string1
!= NULL
)
4484 end1
= string1
+ size1
;
4485 end2
= string2
+ size2
;
4487 /* Compute where to stop matching, within the two strings. */
4490 end_match_1
= string1
+ stop
;
4491 end_match_2
= string2
;
4496 end_match_2
= string2
+ stop
- size1
;
4499 /* `p' scans through the pattern as `d' scans through the data.
4500 `dend' is the end of the input string that `d' points within. `d'
4501 is advanced into the following input string whenever necessary, but
4502 this happens before fetching; therefore, at the beginning of the
4503 loop, `d' can be pointing at the end of a string, but it cannot
4505 if (size1
> 0 && pos
<= size1
)
4512 d
= string2
+ pos
- size1
;
4516 DEBUG_PRINT1 ("The compiled pattern is: ");
4517 DEBUG_PRINT_COMPILED_PATTERN (bufp
, p
, pend
);
4518 DEBUG_PRINT1 ("The string to match is: `");
4519 DEBUG_PRINT_DOUBLE_STRING (d
, string1
, size1
, string2
, size2
);
4520 DEBUG_PRINT1 ("'\n");
4522 /* This loops over pattern commands. It exits by returning from the
4523 function if the match is complete, or it drops through if the match
4524 fails at this starting point in the input data. */
4527 DEBUG_PRINT2 ("\n0x%x: ", p
);
4530 { /* End of pattern means we might have succeeded. */
4531 DEBUG_PRINT1 ("end of pattern ... ");
4533 /* If we haven't matched the entire string, and we want the
4534 longest match, try backtracking. */
4535 if (d
!= end_match_2
)
4537 /* 1 if this match ends in the same string (string1 or string2)
4538 as the best previous match. */
4539 boolean same_str_p
= (FIRST_STRING_P (match_end
)
4540 == MATCHING_IN_FIRST_STRING
);
4541 /* 1 if this match is the best seen so far. */
4542 boolean best_match_p
;
4544 /* AIX compiler got confused when this was combined
4545 with the previous declaration. */
4547 best_match_p
= d
> match_end
;
4549 best_match_p
= !MATCHING_IN_FIRST_STRING
;
4551 DEBUG_PRINT1 ("backtracking.\n");
4553 if (!FAIL_STACK_EMPTY ())
4554 { /* More failure points to try. */
4556 /* If exceeds best match so far, save it. */
4557 if (!best_regs_set
|| best_match_p
)
4559 best_regs_set
= true;
4562 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4564 for (mcnt
= 1; mcnt
< num_regs
; mcnt
++)
4566 best_regstart
[mcnt
] = regstart
[mcnt
];
4567 best_regend
[mcnt
] = regend
[mcnt
];
4573 /* If no failure points, don't restore garbage. And if
4574 last match is real best match, don't restore second
4576 else if (best_regs_set
&& !best_match_p
)
4579 /* Restore best match. It may happen that `dend ==
4580 end_match_1' while the restored d is in string2.
4581 For example, the pattern `x.*y.*z' against the
4582 strings `x-' and `y-z-', if the two strings are
4583 not consecutive in memory. */
4584 DEBUG_PRINT1 ("Restoring best registers.\n");
4587 dend
= ((d
>= string1
&& d
<= end1
)
4588 ? end_match_1
: end_match_2
);
4590 for (mcnt
= 1; mcnt
< num_regs
; mcnt
++)
4592 regstart
[mcnt
] = best_regstart
[mcnt
];
4593 regend
[mcnt
] = best_regend
[mcnt
];
4596 } /* d != end_match_2 */
4599 DEBUG_PRINT1 ("Accepting match.\n");
4601 /* If caller wants register contents data back, do it. */
4602 if (regs
&& !bufp
->no_sub
)
4604 /* Have the register data arrays been allocated? */
4605 if (bufp
->regs_allocated
== REGS_UNALLOCATED
)
4606 { /* No. So allocate them with malloc. We need one
4607 extra element beyond `num_regs' for the `-1' marker
4609 regs
->num_regs
= MAX (RE_NREGS
, num_regs
+ 1);
4610 regs
->start
= TALLOC (regs
->num_regs
, regoff_t
);
4611 regs
->end
= TALLOC (regs
->num_regs
, regoff_t
);
4612 if (regs
->start
== NULL
|| regs
->end
== NULL
)
4617 bufp
->regs_allocated
= REGS_REALLOCATE
;
4619 else if (bufp
->regs_allocated
== REGS_REALLOCATE
)
4620 { /* Yes. If we need more elements than were already
4621 allocated, reallocate them. If we need fewer, just
4623 if (regs
->num_regs
< num_regs
+ 1)
4625 regs
->num_regs
= num_regs
+ 1;
4626 RETALLOC (regs
->start
, regs
->num_regs
, regoff_t
);
4627 RETALLOC (regs
->end
, regs
->num_regs
, regoff_t
);
4628 if (regs
->start
== NULL
|| regs
->end
== NULL
)
4637 /* These braces fend off a "empty body in an else-statement"
4638 warning under GCC when assert expands to nothing. */
4639 assert (bufp
->regs_allocated
== REGS_FIXED
);
4642 /* Convert the pointer data in `regstart' and `regend' to
4643 indices. Register zero has to be set differently,
4644 since we haven't kept track of any info for it. */
4645 if (regs
->num_regs
> 0)
4647 regs
->start
[0] = pos
;
4648 regs
->end
[0] = (MATCHING_IN_FIRST_STRING
4649 ? ((regoff_t
) (d
- string1
))
4650 : ((regoff_t
) (d
- string2
+ size1
)));
4653 /* Go through the first `min (num_regs, regs->num_regs)'
4654 registers, since that is all we initialized. */
4655 for (mcnt
= 1; mcnt
< MIN (num_regs
, regs
->num_regs
); mcnt
++)
4657 if (REG_UNSET (regstart
[mcnt
]) || REG_UNSET (regend
[mcnt
]))
4658 regs
->start
[mcnt
] = regs
->end
[mcnt
] = -1;
4662 = (regoff_t
) POINTER_TO_OFFSET (regstart
[mcnt
]);
4664 = (regoff_t
) POINTER_TO_OFFSET (regend
[mcnt
]);
4668 /* If the regs structure we return has more elements than
4669 were in the pattern, set the extra elements to -1. If
4670 we (re)allocated the registers, this is the case,
4671 because we always allocate enough to have at least one
4673 for (mcnt
= num_regs
; mcnt
< regs
->num_regs
; mcnt
++)
4674 regs
->start
[mcnt
] = regs
->end
[mcnt
] = -1;
4675 } /* regs && !bufp->no_sub */
4677 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4678 nfailure_points_pushed
, nfailure_points_popped
,
4679 nfailure_points_pushed
- nfailure_points_popped
);
4680 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed
);
4682 mcnt
= d
- pos
- (MATCHING_IN_FIRST_STRING
4686 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt
);
4692 /* Otherwise match next pattern command. */
4693 switch (SWITCH_ENUM_CAST ((re_opcode_t
) *p
++))
4695 /* Ignore these. Used to ignore the n of succeed_n's which
4696 currently have n == 0. */
4698 DEBUG_PRINT1 ("EXECUTING no_op.\n");
4702 DEBUG_PRINT1 ("EXECUTING succeed.\n");
4705 /* Match the next n pattern characters exactly. The following
4706 byte in the pattern defines n, and the n bytes after that
4707 are the characters to match. */
4710 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt
);
4712 /* This is written out as an if-else so we don't waste time
4713 testing `translate' inside the loop. */
4714 if (RE_TRANSLATE_P (translate
))
4720 int pat_charlen
, buf_charlen
;
4721 unsigned int pat_ch
, buf_ch
;
4724 pat_ch
= STRING_CHAR_AND_LENGTH (p
, pend
- p
, pat_charlen
);
4725 buf_ch
= STRING_CHAR_AND_LENGTH (d
, dend
- d
, buf_charlen
);
4727 if (RE_TRANSLATE (translate
, buf_ch
)
4733 mcnt
-= pat_charlen
;
4737 #endif /* not emacs */
4741 if ((unsigned char) RE_TRANSLATE (translate
, (unsigned char) *d
)
4742 != (unsigned char) *p
++)
4753 if (*d
++ != (char) *p
++) goto fail
;
4757 SET_REGS_MATCHED ();
4761 /* Match any character except possibly a newline or a null. */
4765 unsigned int buf_ch
;
4767 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4773 buf_ch
= STRING_CHAR_AND_LENGTH (d
, dend
- d
, buf_charlen
);
4775 #endif /* not emacs */
4777 buf_ch
= (unsigned char) *d
;
4781 buf_ch
= TRANSLATE (buf_ch
);
4783 if ((!(bufp
->syntax
& RE_DOT_NEWLINE
)
4785 || ((bufp
->syntax
& RE_DOT_NOT_NULL
)
4786 && buf_ch
== '\000'))
4789 SET_REGS_MATCHED ();
4790 DEBUG_PRINT2 (" Matched `%d'.\n", *d
);
4799 register unsigned int c
;
4800 boolean
not = (re_opcode_t
) *(p
- 1) == charset_not
;
4803 /* Start of actual range_table, or end of bitmap if there is no
4805 unsigned char *range_table
;
4807 /* Nonzero if there is a range table. */
4808 int range_table_exists
;
4810 /* Number of ranges of range table. This is not included
4811 in the initial byte-length of the command. */
4814 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4817 c
= (unsigned char) *d
;
4819 range_table_exists
= CHARSET_RANGE_TABLE_EXISTS_P (&p
[-1]);
4822 if (range_table_exists
)
4824 range_table
= CHARSET_RANGE_TABLE (&p
[-1]); /* Past the bitmap. */
4825 EXTRACT_NUMBER_AND_INCR (count
, range_table
);
4828 if (multibyte
&& BASE_LEADING_CODE_P (c
))
4829 c
= STRING_CHAR_AND_LENGTH (d
, dend
- d
, len
);
4832 if (SINGLE_BYTE_CHAR_P (c
))
4833 { /* Lookup bitmap. */
4834 c
= TRANSLATE (c
); /* The character to match. */
4837 /* Cast to `unsigned' instead of `unsigned char' in
4838 case the bit list is a full 32 bytes long. */
4839 if (c
< (unsigned) (CHARSET_BITMAP_SIZE (&p
[-1]) * BYTEWIDTH
)
4840 && p
[1 + c
/ BYTEWIDTH
] & (1 << (c
% BYTEWIDTH
)))
4844 else if (range_table_exists
)
4846 int class_bits
= CHARSET_RANGE_TABLE_BITS (&p
[-1]);
4848 if ( (class_bits
& BIT_ALNUM
&& ISALNUM (c
))
4849 | (class_bits
& BIT_ALPHA
&& ISALPHA (c
))
4850 | (class_bits
& BIT_ASCII
&& IS_REAL_ASCII (c
))
4851 | (class_bits
& BIT_GRAPH
&& ISGRAPH (c
))
4852 | (class_bits
& BIT_LOWER
&& ISLOWER (c
))
4853 | (class_bits
& BIT_MULTIBYTE
&& !ISUNIBYTE (c
))
4854 | (class_bits
& BIT_NONASCII
&& !IS_REAL_ASCII (c
))
4855 | (class_bits
& BIT_PRINT
&& ISPRINT (c
))
4856 | (class_bits
& BIT_PUNCT
&& ISPUNCT (c
))
4857 | (class_bits
& BIT_SPACE
&& ISSPACE (c
))
4858 | (class_bits
& BIT_UNIBYTE
&& ISUNIBYTE (c
))
4859 | (class_bits
& BIT_UPPER
&& ISUPPER (c
))
4860 | (class_bits
& BIT_WORD
&& ISWORD (c
)))
4863 CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c
, range_table
, count
);
4867 if (range_table_exists
)
4868 p
= CHARSET_RANGE_TABLE_END (range_table
, count
);
4870 p
+= CHARSET_BITMAP_SIZE (&p
[-1]) + 1;
4872 if (!not) goto fail
;
4874 SET_REGS_MATCHED ();
4880 /* The beginning of a group is represented by start_memory.
4881 The arguments are the register number in the next byte, and the
4882 number of groups inner to this one in the next. The text
4883 matched within the group is recorded (in the internal
4884 registers data structure) under the register number. */
4886 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p
, p
[1]);
4888 /* Find out if this group can match the empty string. */
4889 p1
= p
; /* To send to group_match_null_string_p. */
4891 if (REG_MATCH_NULL_STRING_P (reg_info
[*p
]) == MATCH_NULL_UNSET_VALUE
)
4892 REG_MATCH_NULL_STRING_P (reg_info
[*p
])
4893 = group_match_null_string_p (&p1
, pend
, reg_info
);
4895 /* Save the position in the string where we were the last time
4896 we were at this open-group operator in case the group is
4897 operated upon by a repetition operator, e.g., with `(a*)*b'
4898 against `ab'; then we want to ignore where we are now in
4899 the string in case this attempt to match fails. */
4900 old_regstart
[*p
] = REG_MATCH_NULL_STRING_P (reg_info
[*p
])
4901 ? REG_UNSET (regstart
[*p
]) ? d
: regstart
[*p
]
4903 DEBUG_PRINT2 (" old_regstart: %d\n",
4904 POINTER_TO_OFFSET (old_regstart
[*p
]));
4907 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart
[*p
]));
4909 IS_ACTIVE (reg_info
[*p
]) = 1;
4910 MATCHED_SOMETHING (reg_info
[*p
]) = 0;
4912 /* Clear this whenever we change the register activity status. */
4913 set_regs_matched_done
= 0;
4915 /* This is the new highest active register. */
4916 highest_active_reg
= *p
;
4918 /* If nothing was active before, this is the new lowest active
4920 if (lowest_active_reg
== NO_LOWEST_ACTIVE_REG
)
4921 lowest_active_reg
= *p
;
4923 /* Move past the register number and inner group count. */
4925 just_past_start_mem
= p
;
4930 /* The stop_memory opcode represents the end of a group. Its
4931 arguments are the same as start_memory's: the register
4932 number, and the number of inner groups. */
4934 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p
, p
[1]);
4936 /* We need to save the string position the last time we were at
4937 this close-group operator in case the group is operated
4938 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4939 against `aba'; then we want to ignore where we are now in
4940 the string in case this attempt to match fails. */
4941 old_regend
[*p
] = REG_MATCH_NULL_STRING_P (reg_info
[*p
])
4942 ? REG_UNSET (regend
[*p
]) ? d
: regend
[*p
]
4944 DEBUG_PRINT2 (" old_regend: %d\n",
4945 POINTER_TO_OFFSET (old_regend
[*p
]));
4948 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend
[*p
]));
4950 /* This register isn't active anymore. */
4951 IS_ACTIVE (reg_info
[*p
]) = 0;
4953 /* Clear this whenever we change the register activity status. */
4954 set_regs_matched_done
= 0;
4956 /* If this was the only register active, nothing is active
4958 if (lowest_active_reg
== highest_active_reg
)
4960 lowest_active_reg
= NO_LOWEST_ACTIVE_REG
;
4961 highest_active_reg
= NO_HIGHEST_ACTIVE_REG
;
4964 { /* We must scan for the new highest active register, since
4965 it isn't necessarily one less than now: consider
4966 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
4967 new highest active register is 1. */
4968 unsigned char r
= *p
- 1;
4969 while (r
> 0 && !IS_ACTIVE (reg_info
[r
]))
4972 /* If we end up at register zero, that means that we saved
4973 the registers as the result of an `on_failure_jump', not
4974 a `start_memory', and we jumped to past the innermost
4975 `stop_memory'. For example, in ((.)*) we save
4976 registers 1 and 2 as a result of the *, but when we pop
4977 back to the second ), we are at the stop_memory 1.
4978 Thus, nothing is active. */
4981 lowest_active_reg
= NO_LOWEST_ACTIVE_REG
;
4982 highest_active_reg
= NO_HIGHEST_ACTIVE_REG
;
4985 highest_active_reg
= r
;
4988 /* If just failed to match something this time around with a
4989 group that's operated on by a repetition operator, try to
4990 force exit from the ``loop'', and restore the register
4991 information for this group that we had before trying this
4993 if ((!MATCHED_SOMETHING (reg_info
[*p
])
4994 || just_past_start_mem
== p
- 1)
4997 boolean is_a_jump_n
= false;
5001 switch ((re_opcode_t
) *p1
++)
5005 case pop_failure_jump
:
5006 case maybe_pop_jump
:
5008 case dummy_failure_jump
:
5009 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
5019 /* If the next operation is a jump backwards in the pattern
5020 to an on_failure_jump right before the start_memory
5021 corresponding to this stop_memory, exit from the loop
5022 by forcing a failure after pushing on the stack the
5023 on_failure_jump's jump in the pattern, and d. */
5024 if (mcnt
< 0 && (re_opcode_t
) *p1
== on_failure_jump
5025 && (re_opcode_t
) p1
[3] == start_memory
&& p1
[4] == *p
)
5027 /* If this group ever matched anything, then restore
5028 what its registers were before trying this last
5029 failed match, e.g., with `(a*)*b' against `ab' for
5030 regstart[1], and, e.g., with `((a*)*(b*)*)*'
5031 against `aba' for regend[3].
5033 Also restore the registers for inner groups for,
5034 e.g., `((a*)(b*))*' against `aba' (register 3 would
5035 otherwise get trashed). */
5037 if (EVER_MATCHED_SOMETHING (reg_info
[*p
]))
5041 EVER_MATCHED_SOMETHING (reg_info
[*p
]) = 0;
5043 /* Restore this and inner groups' (if any) registers. */
5044 for (r
= *p
; r
< *p
+ *(p
+ 1); r
++)
5046 regstart
[r
] = old_regstart
[r
];
5048 /* xx why this test? */
5049 if (old_regend
[r
] >= regstart
[r
])
5050 regend
[r
] = old_regend
[r
];
5054 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
5055 PUSH_FAILURE_POINT (p1
+ mcnt
, d
, -2);
5061 /* Move past the register number and the inner group count. */
5066 /* \<digit> has been turned into a `duplicate' command which is
5067 followed by the numeric value of <digit> as the register number. */
5070 register const char *d2
, *dend2
;
5071 int regno
= *p
++; /* Get which register to match against. */
5072 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno
);
5074 /* Can't back reference a group which we've never matched. */
5075 if (REG_UNSET (regstart
[regno
]) || REG_UNSET (regend
[regno
]))
5078 /* Where in input to try to start matching. */
5079 d2
= regstart
[regno
];
5081 /* Where to stop matching; if both the place to start and
5082 the place to stop matching are in the same string, then
5083 set to the place to stop, otherwise, for now have to use
5084 the end of the first string. */
5086 dend2
= ((FIRST_STRING_P (regstart
[regno
])
5087 == FIRST_STRING_P (regend
[regno
]))
5088 ? regend
[regno
] : end_match_1
);
5091 /* If necessary, advance to next segment in register
5095 if (dend2
== end_match_2
) break;
5096 if (dend2
== regend
[regno
]) break;
5098 /* End of string1 => advance to string2. */
5100 dend2
= regend
[regno
];
5102 /* At end of register contents => success */
5103 if (d2
== dend2
) break;
5105 /* If necessary, advance to next segment in data. */
5108 /* How many characters left in this segment to match. */
5111 /* Want how many consecutive characters we can match in
5112 one shot, so, if necessary, adjust the count. */
5113 if (mcnt
> dend2
- d2
)
5116 /* Compare that many; failure if mismatch, else move
5118 if (RE_TRANSLATE_P (translate
)
5119 ? bcmp_translate (d
, d2
, mcnt
, translate
)
5120 : bcmp (d
, d2
, mcnt
))
5122 d
+= mcnt
, d2
+= mcnt
;
5124 /* Do this because we've match some characters. */
5125 SET_REGS_MATCHED ();
5131 /* begline matches the empty string at the beginning of the string
5132 (unless `not_bol' is set in `bufp'), and, if
5133 `newline_anchor' is set, after newlines. */
5135 DEBUG_PRINT1 ("EXECUTING begline.\n");
5137 if (AT_STRINGS_BEG (d
))
5139 if (!bufp
->not_bol
) break;
5141 else if (d
[-1] == '\n' && bufp
->newline_anchor
)
5145 /* In all other cases, we fail. */
5149 /* endline is the dual of begline. */
5151 DEBUG_PRINT1 ("EXECUTING endline.\n");
5153 if (AT_STRINGS_END (d
))
5155 if (!bufp
->not_eol
) break;
5158 /* We have to ``prefetch'' the next character. */
5159 else if ((d
== end1
? *string2
: *d
) == '\n'
5160 && bufp
->newline_anchor
)
5167 /* Match at the very beginning of the data. */
5169 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5170 if (AT_STRINGS_BEG (d
))
5175 /* Match at the very end of the data. */
5177 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
5178 if (AT_STRINGS_END (d
))
5183 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
5184 pushes NULL as the value for the string on the stack. Then
5185 `pop_failure_point' will keep the current value for the
5186 string, instead of restoring it. To see why, consider
5187 matching `foo\nbar' against `.*\n'. The .* matches the foo;
5188 then the . fails against the \n. But the next thing we want
5189 to do is match the \n against the \n; if we restored the
5190 string value, we would be back at the foo.
5192 Because this is used only in specific cases, we don't need to
5193 check all the things that `on_failure_jump' does, to make
5194 sure the right things get saved on the stack. Hence we don't
5195 share its code. The only reason to push anything on the
5196 stack at all is that otherwise we would have to change
5197 `anychar's code to do something besides goto fail in this
5198 case; that seems worse than this. */
5199 case on_failure_keep_string_jump
:
5200 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
5202 EXTRACT_NUMBER_AND_INCR (mcnt
, p
);
5203 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt
, p
+ mcnt
);
5205 PUSH_FAILURE_POINT (p
+ mcnt
, NULL
, -2);
5209 /* Uses of on_failure_jump:
5211 Each alternative starts with an on_failure_jump that points
5212 to the beginning of the next alternative. Each alternative
5213 except the last ends with a jump that in effect jumps past
5214 the rest of the alternatives. (They really jump to the
5215 ending jump of the following alternative, because tensioning
5216 these jumps is a hassle.)
5218 Repeats start with an on_failure_jump that points past both
5219 the repetition text and either the following jump or
5220 pop_failure_jump back to this on_failure_jump. */
5221 case on_failure_jump
:
5223 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
5225 #if defined (WINDOWSNT) && defined (emacs)
5229 EXTRACT_NUMBER_AND_INCR (mcnt
, p
);
5230 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt
, p
+ mcnt
);
5232 /* If this on_failure_jump comes right before a group (i.e.,
5233 the original * applied to a group), save the information
5234 for that group and all inner ones, so that if we fail back
5235 to this point, the group's information will be correct.
5236 For example, in \(a*\)*\1, we need the preceding group,
5237 and in \(zz\(a*\)b*\)\2, we need the inner group. */
5239 /* We can't use `p' to check ahead because we push
5240 a failure point to `p + mcnt' after we do this. */
5243 /* We need to skip no_op's before we look for the
5244 start_memory in case this on_failure_jump is happening as
5245 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5247 while (p1
< pend
&& (re_opcode_t
) *p1
== no_op
)
5250 if (p1
< pend
&& (re_opcode_t
) *p1
== start_memory
)
5252 /* We have a new highest active register now. This will
5253 get reset at the start_memory we are about to get to,
5254 but we will have saved all the registers relevant to
5255 this repetition op, as described above. */
5256 highest_active_reg
= *(p1
+ 1) + *(p1
+ 2);
5257 if (lowest_active_reg
== NO_LOWEST_ACTIVE_REG
)
5258 lowest_active_reg
= *(p1
+ 1);
5261 DEBUG_PRINT1 (":\n");
5262 PUSH_FAILURE_POINT (p
+ mcnt
, d
, -2);
5266 /* A smart repeat ends with `maybe_pop_jump'.
5267 We change it to either `pop_failure_jump' or `jump'. */
5268 case maybe_pop_jump
:
5269 #if defined (WINDOWSNT) && defined (emacs)
5272 EXTRACT_NUMBER_AND_INCR (mcnt
, p
);
5273 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt
);
5275 register unsigned char *p2
= p
;
5277 /* Compare the beginning of the repeat with what in the
5278 pattern follows its end. If we can establish that there
5279 is nothing that they would both match, i.e., that we
5280 would have to backtrack because of (as in, e.g., `a*a')
5281 then we can change to pop_failure_jump, because we'll
5282 never have to backtrack.
5284 This is not true in the case of alternatives: in
5285 `(a|ab)*' we do need to backtrack to the `ab' alternative
5286 (e.g., if the string was `ab'). But instead of trying to
5287 detect that here, the alternative has put on a dummy
5288 failure point which is what we will end up popping. */
5290 /* Skip over open/close-group commands.
5291 If what follows this loop is a ...+ construct,
5292 look at what begins its body, since we will have to
5293 match at least one of that. */
5297 && ((re_opcode_t
) *p2
== stop_memory
5298 || (re_opcode_t
) *p2
== start_memory
))
5300 else if (p2
+ 6 < pend
5301 && (re_opcode_t
) *p2
== dummy_failure_jump
)
5308 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5309 to the `maybe_finalize_jump' of this case. Examine what
5312 /* If we're at the end of the pattern, we can change. */
5315 /* Consider what happens when matching ":\(.*\)"
5316 against ":/". I don't really understand this code
5318 p
[-3] = (unsigned char) pop_failure_jump
;
5320 (" End of pattern: change to `pop_failure_jump'.\n");
5323 else if ((re_opcode_t
) *p2
== exactn
5324 || (bufp
->newline_anchor
&& (re_opcode_t
) *p2
== endline
))
5326 register unsigned int c
5327 = *p2
== (unsigned char) endline
? '\n' : p2
[2];
5329 if ((re_opcode_t
) p1
[3] == exactn
)
5331 if (!(multibyte
/* && (c != '\n') */
5332 && BASE_LEADING_CODE_P (c
))
5334 : (STRING_CHAR (&p2
[2], pend
- &p2
[2])
5335 != STRING_CHAR (&p1
[5], pend
- &p1
[5])))
5337 p
[-3] = (unsigned char) pop_failure_jump
;
5338 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5343 else if ((re_opcode_t
) p1
[3] == charset
5344 || (re_opcode_t
) p1
[3] == charset_not
)
5346 int not = (re_opcode_t
) p1
[3] == charset_not
;
5348 if (multibyte
/* && (c != '\n') */
5349 && BASE_LEADING_CODE_P (c
))
5350 c
= STRING_CHAR (&p2
[2], pend
- &p2
[2]);
5352 /* Test if C is listed in charset (or charset_not)
5354 if (SINGLE_BYTE_CHAR_P (c
))
5356 if (c
< CHARSET_BITMAP_SIZE (&p1
[3]) * BYTEWIDTH
5357 && p1
[5 + c
/ BYTEWIDTH
] & (1 << (c
% BYTEWIDTH
)))
5360 else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1
[3]))
5361 CHARSET_LOOKUP_RANGE_TABLE (not, c
, &p1
[3]);
5363 /* `not' is equal to 1 if c would match, which means
5364 that we can't change to pop_failure_jump. */
5367 p
[-3] = (unsigned char) pop_failure_jump
;
5368 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5372 else if ((re_opcode_t
) *p2
== charset
)
5374 if ((re_opcode_t
) p1
[3] == exactn
)
5376 register unsigned int c
= p1
[5];
5379 if (multibyte
&& BASE_LEADING_CODE_P (c
))
5380 c
= STRING_CHAR (&p1
[5], pend
- &p1
[5]);
5382 /* Test if C is listed in charset at `p2'. */
5383 if (SINGLE_BYTE_CHAR_P (c
))
5385 if (c
< CHARSET_BITMAP_SIZE (p2
) * BYTEWIDTH
5386 && (p2
[2 + c
/ BYTEWIDTH
]
5387 & (1 << (c
% BYTEWIDTH
))))
5390 else if (CHARSET_RANGE_TABLE_EXISTS_P (p2
))
5391 CHARSET_LOOKUP_RANGE_TABLE (not, c
, p2
);
5395 p
[-3] = (unsigned char) pop_failure_jump
;
5396 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5400 /* It is hard to list up all the character in charset
5401 P2 if it includes multibyte character. Give up in
5403 else if (!multibyte
|| !CHARSET_RANGE_TABLE_EXISTS_P (p2
))
5405 /* Now, we are sure that P2 has no range table.
5406 So, for the size of bitmap in P2, `p2[1]' is
5407 enough. But P1 may have range table, so the
5408 size of bitmap table of P1 is extracted by
5409 using macro `CHARSET_BITMAP_SIZE'.
5411 Since we know that all the character listed in
5412 P2 is ASCII, it is enough to test only bitmap
5415 if ((re_opcode_t
) p1
[3] == charset_not
)
5418 /* We win if the charset_not inside the loop lists
5419 every character listed in the charset after. */
5420 for (idx
= 0; idx
< (int) p2
[1]; idx
++)
5421 if (! (p2
[2 + idx
] == 0
5422 || (idx
< CHARSET_BITMAP_SIZE (&p1
[3])
5423 && ((p2
[2 + idx
] & ~ p1
[5 + idx
]) == 0))))
5428 p
[-3] = (unsigned char) pop_failure_jump
;
5429 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5432 else if ((re_opcode_t
) p1
[3] == charset
)
5435 /* We win if the charset inside the loop
5436 has no overlap with the one after the loop. */
5439 && idx
< CHARSET_BITMAP_SIZE (&p1
[3]));
5441 if ((p2
[2 + idx
] & p1
[5 + idx
]) != 0)
5445 || idx
== CHARSET_BITMAP_SIZE (&p1
[3]))
5447 p
[-3] = (unsigned char) pop_failure_jump
;
5448 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5454 p
-= 2; /* Point at relative address again. */
5455 if ((re_opcode_t
) p
[-1] != pop_failure_jump
)
5457 p
[-1] = (unsigned char) jump
;
5458 DEBUG_PRINT1 (" Match => jump.\n");
5459 goto unconditional_jump
;
5461 /* Note fall through. */
5464 /* The end of a simple repeat has a pop_failure_jump back to
5465 its matching on_failure_jump, where the latter will push a
5466 failure point. The pop_failure_jump takes off failure
5467 points put on by this pop_failure_jump's matching
5468 on_failure_jump; we got through the pattern to here from the
5469 matching on_failure_jump, so didn't fail. */
5470 case pop_failure_jump
:
5472 /* We need to pass separate storage for the lowest and
5473 highest registers, even though we don't care about the
5474 actual values. Otherwise, we will restore only one
5475 register from the stack, since lowest will == highest in
5476 `pop_failure_point'. */
5477 unsigned dummy_low_reg
, dummy_high_reg
;
5478 unsigned char *pdummy
;
5481 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5482 POP_FAILURE_POINT (sdummy
, pdummy
,
5483 dummy_low_reg
, dummy_high_reg
,
5484 reg_dummy
, reg_dummy
, reg_info_dummy
);
5486 /* Note fall through. */
5489 /* Unconditionally jump (without popping any failure points). */
5492 #if defined (WINDOWSNT) && defined (emacs)
5495 EXTRACT_NUMBER_AND_INCR (mcnt
, p
); /* Get the amount to jump. */
5496 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt
);
5497 p
+= mcnt
; /* Do the jump. */
5498 DEBUG_PRINT2 ("(to 0x%x).\n", p
);
5502 /* We need this opcode so we can detect where alternatives end
5503 in `group_match_null_string_p' et al. */
5505 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5506 goto unconditional_jump
;
5509 /* Normally, the on_failure_jump pushes a failure point, which
5510 then gets popped at pop_failure_jump. We will end up at
5511 pop_failure_jump, also, and with a pattern of, say, `a+', we
5512 are skipping over the on_failure_jump, so we have to push
5513 something meaningless for pop_failure_jump to pop. */
5514 case dummy_failure_jump
:
5515 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5516 /* It doesn't matter what we push for the string here. What
5517 the code at `fail' tests is the value for the pattern. */
5518 PUSH_FAILURE_POINT (0, 0, -2);
5519 goto unconditional_jump
;
5522 /* At the end of an alternative, we need to push a dummy failure
5523 point in case we are followed by a `pop_failure_jump', because
5524 we don't want the failure point for the alternative to be
5525 popped. For example, matching `(a|ab)*' against `aab'
5526 requires that we match the `ab' alternative. */
5527 case push_dummy_failure
:
5528 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5529 /* See comments just above at `dummy_failure_jump' about the
5531 PUSH_FAILURE_POINT (0, 0, -2);
5534 /* Have to succeed matching what follows at least n times.
5535 After that, handle like `on_failure_jump'. */
5537 EXTRACT_NUMBER (mcnt
, p
+ 2);
5538 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt
);
5541 /* Originally, this is how many times we HAVE to succeed. */
5546 STORE_NUMBER_AND_INCR (p
, mcnt
);
5547 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p
, mcnt
);
5551 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p
+2);
5552 p
[2] = (unsigned char) no_op
;
5553 p
[3] = (unsigned char) no_op
;
5559 EXTRACT_NUMBER (mcnt
, p
+ 2);
5560 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt
);
5562 /* Originally, this is how many times we CAN jump. */
5566 STORE_NUMBER (p
+ 2, mcnt
);
5567 goto unconditional_jump
;
5569 /* If don't have to jump any more, skip over the rest of command. */
5576 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5578 EXTRACT_NUMBER_AND_INCR (mcnt
, p
);
5580 EXTRACT_NUMBER_AND_INCR (mcnt
, p
);
5581 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1
, mcnt
);
5582 STORE_NUMBER (p1
, mcnt
);
5587 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5589 /* We SUCCEED in one of the following cases: */
5591 /* Case 1: D is at the beginning or the end of string. */
5592 if (AT_STRINGS_BEG (d
) || AT_STRINGS_END (d
))
5596 /* C1 is the character before D, S1 is the syntax of C1, C2
5597 is the character at D, and S2 is the syntax of C2. */
5599 int pos1
= PTR_TO_OFFSET (d
- 1);
5602 GET_CHAR_BEFORE_2 (c1
, d
, string1
, end1
, string2
, end2
);
5603 GET_CHAR_AFTER_2 (c2
, d
, string1
, end1
, string2
, end2
);
5605 charpos
= SYNTAX_TABLE_BYTE_TO_CHAR (pos1
);
5606 UPDATE_SYNTAX_TABLE (charpos
);
5610 UPDATE_SYNTAX_TABLE_FORWARD (charpos
+ 1);
5614 if (/* Case 2: Only one of S1 and S2 is Sword. */
5615 ((s1
== Sword
) != (s2
== Sword
))
5616 /* Case 3: Both of S1 and S2 are Sword, and macro
5617 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5618 || ((s1
== Sword
) && WORD_BOUNDARY_P (c1
, c2
)))
5624 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5626 /* We FAIL in one of the following cases: */
5628 /* Case 1: D is at the beginning or the end of string. */
5629 if (AT_STRINGS_BEG (d
) || AT_STRINGS_END (d
))
5633 /* C1 is the character before D, S1 is the syntax of C1, C2
5634 is the character at D, and S2 is the syntax of C2. */
5636 int pos1
= PTR_TO_OFFSET (d
- 1);
5639 GET_CHAR_BEFORE_2 (c1
, d
, string1
, end1
, string2
, end2
);
5640 GET_CHAR_AFTER_2 (c2
, d
, string1
, end1
, string2
, end2
);
5642 charpos
= SYNTAX_TABLE_BYTE_TO_CHAR (pos1
);
5643 UPDATE_SYNTAX_TABLE (charpos
);
5647 UPDATE_SYNTAX_TABLE_FORWARD (charpos
+ 1);
5651 if (/* Case 2: Only one of S1 and S2 is Sword. */
5652 ((s1
== Sword
) != (s2
== Sword
))
5653 /* Case 3: Both of S1 and S2 are Sword, and macro
5654 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5655 || ((s1
== Sword
) && WORD_BOUNDARY_P (c1
, c2
)))
5661 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5663 /* We FAIL in one of the following cases: */
5665 /* Case 1: D is at the end of string. */
5666 if (AT_STRINGS_END (d
))
5670 /* C1 is the character before D, S1 is the syntax of C1, C2
5671 is the character at D, and S2 is the syntax of C2. */
5673 int pos1
= PTR_TO_OFFSET (d
);
5676 GET_CHAR_AFTER_2 (c2
, d
, string1
, end1
, string2
, end2
);
5678 charpos
= SYNTAX_TABLE_BYTE_TO_CHAR (pos1
);
5679 UPDATE_SYNTAX_TABLE (charpos
);
5683 /* Case 2: S2 is not Sword. */
5687 /* Case 3: D is not at the beginning of string ... */
5688 if (!AT_STRINGS_BEG (d
))
5690 GET_CHAR_BEFORE_2 (c1
, d
, string1
, end1
, string2
, end2
);
5692 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
- 1);
5696 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5698 if ((s1
== Sword
) && !WORD_BOUNDARY_P (c1
, c2
))
5705 DEBUG_PRINT1 ("EXECUTING wordend.\n");
5707 /* We FAIL in one of the following cases: */
5709 /* Case 1: D is at the beginning of string. */
5710 if (AT_STRINGS_BEG (d
))
5714 /* C1 is the character before D, S1 is the syntax of C1, C2
5715 is the character at D, and S2 is the syntax of C2. */
5717 int pos1
= PTR_TO_OFFSET (d
);
5720 GET_CHAR_BEFORE_2 (c1
, d
, string1
, end1
, string2
, end2
);
5722 charpos
= SYNTAX_TABLE_BYTE_TO_CHAR (pos1
- 1);
5723 UPDATE_SYNTAX_TABLE (charpos
);
5727 /* Case 2: S1 is not Sword. */
5731 /* Case 3: D is not at the end of string ... */
5732 if (!AT_STRINGS_END (d
))
5734 GET_CHAR_AFTER_2 (c2
, d
, string1
, end1
, string2
, end2
);
5736 UPDATE_SYNTAX_TABLE_FORWARD (charpos
);
5740 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5742 if ((s2
== Sword
) && !WORD_BOUNDARY_P (c1
, c2
))
5750 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5751 if (PTR_BYTE_POS ((unsigned char *) d
) >= PT_BYTE
)
5756 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5757 if (PTR_BYTE_POS ((unsigned char *) d
) != PT_BYTE
)
5762 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5763 if (PTR_BYTE_POS ((unsigned char *) d
) <= PT_BYTE
)
5768 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt
);
5773 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5779 int pos1
= SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d
));
5780 UPDATE_SYNTAX_TABLE (pos1
);
5787 /* we must concern about multibyte form, ... */
5788 c
= STRING_CHAR_AND_LENGTH (d
, dend
- d
, len
);
5790 /* everything should be handled as ASCII, even though it
5791 looks like multibyte form. */
5794 if (SYNTAX (c
) != (enum syntaxcode
) mcnt
)
5798 SET_REGS_MATCHED ();
5802 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt
);
5804 goto matchnotsyntax
;
5807 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5813 int pos1
= SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d
));
5814 UPDATE_SYNTAX_TABLE (pos1
);
5821 c
= STRING_CHAR_AND_LENGTH (d
, dend
- d
, len
);
5825 if (SYNTAX (c
) == (enum syntaxcode
) mcnt
)
5829 SET_REGS_MATCHED ();
5833 DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p
);
5840 c
= STRING_CHAR_AND_LENGTH (d
, dend
- d
, len
);
5844 if (!CHAR_HAS_CATEGORY (c
, mcnt
))
5848 SET_REGS_MATCHED ();
5851 case notcategoryspec
:
5852 DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p
);
5859 c
= STRING_CHAR_AND_LENGTH (d
, dend
- d
, len
);
5863 if (CHAR_HAS_CATEGORY (c
, mcnt
))
5867 SET_REGS_MATCHED ();
5870 #else /* not emacs */
5872 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5874 if (!WORDCHAR_P (d
))
5876 SET_REGS_MATCHED ();
5881 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5885 SET_REGS_MATCHED ();
5888 #endif /* not emacs */
5893 continue; /* Successfully executed one pattern command; keep going. */
5896 /* We goto here if a matching operation fails. */
5898 #if defined (WINDOWSNT) && defined (emacs)
5901 if (!FAIL_STACK_EMPTY ())
5902 { /* A restart point is known. Restore to that state. */
5903 DEBUG_PRINT1 ("\nFAIL:\n");
5904 POP_FAILURE_POINT (d
, p
,
5905 lowest_active_reg
, highest_active_reg
,
5906 regstart
, regend
, reg_info
);
5908 /* If this failure point is a dummy, try the next one. */
5912 /* If we failed to the end of the pattern, don't examine *p. */
5916 boolean is_a_jump_n
= false;
5918 /* If failed to a backwards jump that's part of a repetition
5919 loop, need to pop this failure point and use the next one. */
5920 switch ((re_opcode_t
) *p
)
5924 case maybe_pop_jump
:
5925 case pop_failure_jump
:
5928 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
5931 if ((is_a_jump_n
&& (re_opcode_t
) *p1
== succeed_n
)
5933 && (re_opcode_t
) *p1
== on_failure_jump
))
5941 if (d
>= string1
&& d
<= end1
)
5945 break; /* Matching at this starting point really fails. */
5949 goto restore_best_regs
;
5953 return -1; /* Failure to match. */
5956 /* Subroutine definitions for re_match_2. */
5959 /* We are passed P pointing to a register number after a start_memory.
5961 Return true if the pattern up to the corresponding stop_memory can
5962 match the empty string, and false otherwise.
5964 If we find the matching stop_memory, sets P to point to one past its number.
5965 Otherwise, sets P to an undefined byte less than or equal to END.
5967 We don't handle duplicates properly (yet). */
5970 group_match_null_string_p (p
, end
, reg_info
)
5971 unsigned char **p
, *end
;
5972 register_info_type
*reg_info
;
5975 /* Point to after the args to the start_memory. */
5976 unsigned char *p1
= *p
+ 2;
5980 /* Skip over opcodes that can match nothing, and return true or
5981 false, as appropriate, when we get to one that can't, or to the
5982 matching stop_memory. */
5984 switch ((re_opcode_t
) *p1
)
5986 /* Could be either a loop or a series of alternatives. */
5987 case on_failure_jump
:
5989 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
5991 /* If the next operation is not a jump backwards in the
5996 /* Go through the on_failure_jumps of the alternatives,
5997 seeing if any of the alternatives cannot match nothing.
5998 The last alternative starts with only a jump,
5999 whereas the rest start with on_failure_jump and end
6000 with a jump, e.g., here is the pattern for `a|b|c':
6002 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
6003 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
6006 So, we have to first go through the first (n-1)
6007 alternatives and then deal with the last one separately. */
6010 /* Deal with the first (n-1) alternatives, which start
6011 with an on_failure_jump (see above) that jumps to right
6012 past a jump_past_alt. */
6014 while ((re_opcode_t
) p1
[mcnt
-3] == jump_past_alt
)
6016 /* `mcnt' holds how many bytes long the alternative
6017 is, including the ending `jump_past_alt' and
6020 if (!alt_match_null_string_p (p1
, p1
+ mcnt
- 3,
6024 /* Move to right after this alternative, including the
6028 /* Break if it's the beginning of an n-th alternative
6029 that doesn't begin with an on_failure_jump. */
6030 if ((re_opcode_t
) *p1
!= on_failure_jump
)
6033 /* Still have to check that it's not an n-th
6034 alternative that starts with an on_failure_jump. */
6036 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
6037 if ((re_opcode_t
) p1
[mcnt
-3] != jump_past_alt
)
6039 /* Get to the beginning of the n-th alternative. */
6045 /* Deal with the last alternative: go back and get number
6046 of the `jump_past_alt' just before it. `mcnt' contains
6047 the length of the alternative. */
6048 EXTRACT_NUMBER (mcnt
, p1
- 2);
6050 if (!alt_match_null_string_p (p1
, p1
+ mcnt
, reg_info
))
6053 p1
+= mcnt
; /* Get past the n-th alternative. */
6059 assert (p1
[1] == **p
);
6065 if (!common_op_match_null_string_p (&p1
, end
, reg_info
))
6068 } /* while p1 < end */
6071 } /* group_match_null_string_p */
6074 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
6075 It expects P to be the first byte of a single alternative and END one
6076 byte past the last. The alternative can contain groups. */
6079 alt_match_null_string_p (p
, end
, reg_info
)
6080 unsigned char *p
, *end
;
6081 register_info_type
*reg_info
;
6084 unsigned char *p1
= p
;
6088 /* Skip over opcodes that can match nothing, and break when we get
6089 to one that can't. */
6091 switch ((re_opcode_t
) *p1
)
6094 case on_failure_jump
:
6096 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
6101 if (!common_op_match_null_string_p (&p1
, end
, reg_info
))
6104 } /* while p1 < end */
6107 } /* alt_match_null_string_p */
6110 /* Deals with the ops common to group_match_null_string_p and
6111 alt_match_null_string_p.
6113 Sets P to one after the op and its arguments, if any. */
6116 common_op_match_null_string_p (p
, end
, reg_info
)
6117 unsigned char **p
, *end
;
6118 register_info_type
*reg_info
;
6123 unsigned char *p1
= *p
;
6125 switch ((re_opcode_t
) *p1
++)
6145 assert (reg_no
> 0 && reg_no
<= MAX_REGNUM
);
6146 ret
= group_match_null_string_p (&p1
, end
, reg_info
);
6148 /* Have to set this here in case we're checking a group which
6149 contains a group and a back reference to it. */
6151 if (REG_MATCH_NULL_STRING_P (reg_info
[reg_no
]) == MATCH_NULL_UNSET_VALUE
)
6152 REG_MATCH_NULL_STRING_P (reg_info
[reg_no
]) = ret
;
6158 /* If this is an optimized succeed_n for zero times, make the jump. */
6160 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
6168 /* Get to the number of times to succeed. */
6170 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
6175 EXTRACT_NUMBER_AND_INCR (mcnt
, p1
);
6183 if (!REG_MATCH_NULL_STRING_P (reg_info
[*p1
]))
6191 /* All other opcodes mean we cannot match the empty string. */
6197 } /* common_op_match_null_string_p */
6200 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6201 bytes; nonzero otherwise. */
6204 bcmp_translate (s1
, s2
, len
, translate
)
6205 unsigned char *s1
, *s2
;
6207 RE_TRANSLATE_TYPE translate
;
6209 register unsigned char *p1
= s1
, *p2
= s2
;
6210 unsigned char *p1_end
= s1
+ len
;
6211 unsigned char *p2_end
= s2
+ len
;
6213 while (p1
!= p1_end
&& p2
!= p2_end
)
6215 int p1_charlen
, p2_charlen
;
6218 p1_ch
= STRING_CHAR_AND_LENGTH (p1
, p1_end
- p1
, p1_charlen
);
6219 p2_ch
= STRING_CHAR_AND_LENGTH (p2
, p2_end
- p2
, p2_charlen
);
6221 if (RE_TRANSLATE (translate
, p1_ch
)
6222 != RE_TRANSLATE (translate
, p2_ch
))
6225 p1
+= p1_charlen
, p2
+= p2_charlen
;
6228 if (p1
!= p1_end
|| p2
!= p2_end
)
6234 /* Entry points for GNU code. */
6236 /* re_compile_pattern is the GNU regular expression compiler: it
6237 compiles PATTERN (of length SIZE) and puts the result in BUFP.
6238 Returns 0 if the pattern was valid, otherwise an error string.
6240 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6241 are set in BUFP on entry.
6243 We call regex_compile to do the actual compilation. */
6246 re_compile_pattern (pattern
, length
, bufp
)
6247 const char *pattern
;
6249 struct re_pattern_buffer
*bufp
;
6253 /* GNU code is written to assume at least RE_NREGS registers will be set
6254 (and at least one extra will be -1). */
6255 bufp
->regs_allocated
= REGS_UNALLOCATED
;
6257 /* And GNU code determines whether or not to get register information
6258 by passing null for the REGS argument to re_match, etc., not by
6262 /* Match anchors at newline. */
6263 bufp
->newline_anchor
= 1;
6265 ret
= regex_compile (pattern
, length
, re_syntax_options
, bufp
);
6269 return gettext (re_error_msgid
[(int) ret
]);
6272 /* Entry points compatible with 4.2 BSD regex library. We don't define
6273 them unless specifically requested. */
6275 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
6277 /* BSD has one and only one pattern buffer. */
6278 static struct re_pattern_buffer re_comp_buf
;
6282 /* Make these definitions weak in libc, so POSIX programs can redefine
6283 these names if they don't use our functions, and still use
6284 regcomp/regexec below without link errors. */
6294 if (!re_comp_buf
.buffer
)
6295 return gettext ("No previous regular expression");
6299 if (!re_comp_buf
.buffer
)
6301 re_comp_buf
.buffer
= (unsigned char *) malloc (200);
6302 if (re_comp_buf
.buffer
== NULL
)
6303 return gettext (re_error_msgid
[(int) REG_ESPACE
]);
6304 re_comp_buf
.allocated
= 200;
6306 re_comp_buf
.fastmap
= (char *) malloc (1 << BYTEWIDTH
);
6307 if (re_comp_buf
.fastmap
== NULL
)
6308 return gettext (re_error_msgid
[(int) REG_ESPACE
]);
6311 /* Since `re_exec' always passes NULL for the `regs' argument, we
6312 don't need to initialize the pattern buffer fields which affect it. */
6314 /* Match anchors at newlines. */
6315 re_comp_buf
.newline_anchor
= 1;
6317 ret
= regex_compile (s
, strlen (s
), re_syntax_options
, &re_comp_buf
);
6322 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6323 return (char *) gettext (re_error_msgid
[(int) ret
]);
6334 const int len
= strlen (s
);
6336 0 <= re_search (&re_comp_buf
, s
, len
, 0, len
, (struct re_registers
*) 0);
6338 #endif /* _REGEX_RE_COMP */
6340 /* POSIX.2 functions. Don't define these for Emacs. */
6344 /* regcomp takes a regular expression as a string and compiles it.
6346 PREG is a regex_t *. We do not expect any fields to be initialized,
6347 since POSIX says we shouldn't. Thus, we set
6349 `buffer' to the compiled pattern;
6350 `used' to the length of the compiled pattern;
6351 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6352 REG_EXTENDED bit in CFLAGS is set; otherwise, to
6353 RE_SYNTAX_POSIX_BASIC;
6354 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6355 `fastmap' and `fastmap_accurate' to zero;
6356 `re_nsub' to the number of subexpressions in PATTERN.
6358 PATTERN is the address of the pattern string.
6360 CFLAGS is a series of bits which affect compilation.
6362 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6363 use POSIX basic syntax.
6365 If REG_NEWLINE is set, then . and [^...] don't match newline.
6366 Also, regexec will try a match beginning after every newline.
6368 If REG_ICASE is set, then we considers upper- and lowercase
6369 versions of letters to be equivalent when matching.
6371 If REG_NOSUB is set, then when PREG is passed to regexec, that
6372 routine will report only success or failure, and nothing about the
6375 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
6376 the return codes and their meanings.) */
6379 regcomp (preg
, pattern
, cflags
)
6381 const char *pattern
;
6386 = (cflags
& REG_EXTENDED
) ?
6387 RE_SYNTAX_POSIX_EXTENDED
: RE_SYNTAX_POSIX_BASIC
;
6389 /* regex_compile will allocate the space for the compiled pattern. */
6391 preg
->allocated
= 0;
6394 /* Don't bother to use a fastmap when searching. This simplifies the
6395 REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6396 characters after newlines into the fastmap. This way, we just try
6400 if (cflags
& REG_ICASE
)
6405 = (RE_TRANSLATE_TYPE
) malloc (CHAR_SET_SIZE
6406 * sizeof (*(RE_TRANSLATE_TYPE
)0));
6407 if (preg
->translate
== NULL
)
6408 return (int) REG_ESPACE
;
6410 /* Map uppercase characters to corresponding lowercase ones. */
6411 for (i
= 0; i
< CHAR_SET_SIZE
; i
++)
6412 preg
->translate
[i
] = ISUPPER (i
) ? tolower (i
) : i
;
6415 preg
->translate
= NULL
;
6417 /* If REG_NEWLINE is set, newlines are treated differently. */
6418 if (cflags
& REG_NEWLINE
)
6419 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
6420 syntax
&= ~RE_DOT_NEWLINE
;
6421 syntax
|= RE_HAT_LISTS_NOT_NEWLINE
;
6422 /* It also changes the matching behavior. */
6423 preg
->newline_anchor
= 1;
6426 preg
->newline_anchor
= 0;
6428 preg
->no_sub
= !!(cflags
& REG_NOSUB
);
6430 /* POSIX says a null character in the pattern terminates it, so we
6431 can use strlen here in compiling the pattern. */
6432 ret
= regex_compile (pattern
, strlen (pattern
), syntax
, preg
);
6434 /* POSIX doesn't distinguish between an unmatched open-group and an
6435 unmatched close-group: both are REG_EPAREN. */
6436 if (ret
== REG_ERPAREN
) ret
= REG_EPAREN
;
6442 /* regexec searches for a given pattern, specified by PREG, in the
6445 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6446 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
6447 least NMATCH elements, and we set them to the offsets of the
6448 corresponding matched substrings.
6450 EFLAGS specifies `execution flags' which affect matching: if
6451 REG_NOTBOL is set, then ^ does not match at the beginning of the
6452 string; if REG_NOTEOL is set, then $ does not match at the end.
6454 We return 0 if we find a match and REG_NOMATCH if not. */
6457 regexec (preg
, string
, nmatch
, pmatch
, eflags
)
6458 const regex_t
*preg
;
6461 regmatch_t pmatch
[];
6465 struct re_registers regs
;
6466 regex_t private_preg
;
6467 int len
= strlen (string
);
6468 boolean want_reg_info
= !preg
->no_sub
&& nmatch
> 0;
6470 private_preg
= *preg
;
6472 private_preg
.not_bol
= !!(eflags
& REG_NOTBOL
);
6473 private_preg
.not_eol
= !!(eflags
& REG_NOTEOL
);
6475 /* The user has told us exactly how many registers to return
6476 information about, via `nmatch'. We have to pass that on to the
6477 matching routines. */
6478 private_preg
.regs_allocated
= REGS_FIXED
;
6482 regs
.num_regs
= nmatch
;
6483 regs
.start
= TALLOC (nmatch
, regoff_t
);
6484 regs
.end
= TALLOC (nmatch
, regoff_t
);
6485 if (regs
.start
== NULL
|| regs
.end
== NULL
)
6486 return (int) REG_NOMATCH
;
6489 /* Perform the searching operation. */
6490 ret
= re_search (&private_preg
, string
, len
,
6491 /* start: */ 0, /* range: */ len
,
6492 want_reg_info
? ®s
: (struct re_registers
*) 0);
6494 /* Copy the register information to the POSIX structure. */
6501 for (r
= 0; r
< nmatch
; r
++)
6503 pmatch
[r
].rm_so
= regs
.start
[r
];
6504 pmatch
[r
].rm_eo
= regs
.end
[r
];
6508 /* If we needed the temporary register info, free the space now. */
6513 /* We want zero return to mean success, unlike `re_search'. */
6514 return ret
>= 0 ? (int) REG_NOERROR
: (int) REG_NOMATCH
;
6518 /* Returns a message corresponding to an error code, ERRCODE, returned
6519 from either regcomp or regexec. We don't use PREG here. */
6522 regerror (errcode
, preg
, errbuf
, errbuf_size
)
6524 const regex_t
*preg
;
6532 || errcode
>= (sizeof (re_error_msgid
) / sizeof (re_error_msgid
[0])))
6533 /* Only error codes returned by the rest of the code should be passed
6534 to this routine. If we are given anything else, or if other regex
6535 code generates an invalid error code, then the program has a bug.
6536 Dump core so we can fix it. */
6539 msg
= gettext (re_error_msgid
[errcode
]);
6541 msg_size
= strlen (msg
) + 1; /* Includes the null. */
6543 if (errbuf_size
!= 0)
6545 if (msg_size
> errbuf_size
)
6547 strncpy (errbuf
, msg
, errbuf_size
- 1);
6548 errbuf
[errbuf_size
- 1] = 0;
6551 strcpy (errbuf
, msg
);
6558 /* Free dynamically allocated space used by PREG. */
6564 if (preg
->buffer
!= NULL
)
6565 free (preg
->buffer
);
6566 preg
->buffer
= NULL
;
6568 preg
->allocated
= 0;
6571 if (preg
->fastmap
!= NULL
)
6572 free (preg
->fastmap
);
6573 preg
->fastmap
= NULL
;
6574 preg
->fastmap_accurate
= 0;
6576 if (preg
->translate
!= NULL
)
6577 free (preg
->translate
);
6578 preg
->translate
= NULL
;
6581 #endif /* not emacs */