1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
28 #include "region-cache.h"
30 #include "blockinput.h"
31 #include "intervals.h"
33 #include <sys/types.h>
36 #define REGEXP_CACHE_SIZE 20
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
42 struct regexp_cache
*next
;
44 struct re_pattern_buffer buf
;
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
50 /* The instances of that struct. */
51 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
53 /* The head of the linked list; points to the most recently used buffer. */
54 struct regexp_cache
*searchbuf_head
;
57 /* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
64 been allocated by checking search_regs.num_regs.
66 The regex code keeps track of whether it has allocated the search
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
74 static struct re_registers search_regs
;
76 /* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79 static Lisp_Object last_thing_searched
;
81 /* error condition signaled when regexp compile_pattern fails */
83 Lisp_Object Qinvalid_regexp
;
85 static void set_search_regs ();
86 static void save_search_regs ();
88 static int search_buffer ();
93 error ("Stack overflow in regexp matcher");
102 /* Compile a regexp and signal a Lisp error if anything goes wrong.
103 PATTERN is the pattern to compile.
104 CP is the place to put the result.
105 TRANSLATE is a translation table for ignoring case, or NULL for none.
106 REGP is the structure that says where to store the "register"
107 values that will result from matching this pattern.
108 If it is 0, we should compile the pattern not to record any
109 subexpression bounds.
110 POSIX is nonzero if we want full backtracking (POSIX style)
111 for this pattern. 0 means backtrack only enough to get a valid match.
112 MULTIBYTE is nonzero if we want to handle multibyte characters in
113 PATTERN. 0 means all multibyte characters are recognized just as
114 sequences of binary data. */
117 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
118 struct regexp_cache
*cp
;
120 Lisp_Object
*translate
;
121 struct re_registers
*regp
;
129 cp
->buf
.translate
= translate
;
131 cp
->buf
.multibyte
= multibyte
;
133 old
= re_set_syntax (RE_SYNTAX_EMACS
134 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
135 val
= (char *) re_compile_pattern ((char *) XSTRING (pattern
)->data
,
136 XSTRING (pattern
)->size
, &cp
->buf
);
140 Fsignal (Qinvalid_regexp
, Fcons (build_string (val
), Qnil
));
142 cp
->regexp
= Fcopy_sequence (pattern
);
145 /* Compile a regexp if necessary, but first check to see if there's one in
147 PATTERN is the pattern to compile.
148 TRANSLATE is a translation table for ignoring case, or NULL for none.
149 REGP is the structure that says where to store the "register"
150 values that will result from matching this pattern.
151 If it is 0, we should compile the pattern not to record any
152 subexpression bounds.
153 POSIX is nonzero if we want full backtracking (POSIX style)
154 for this pattern. 0 means backtrack only enough to get a valid match. */
156 struct re_pattern_buffer
*
157 compile_pattern (pattern
, regp
, translate
, posix
)
159 struct re_registers
*regp
;
160 Lisp_Object
*translate
;
163 struct regexp_cache
*cp
, **cpp
;
164 /* Should we check it here, or add an argument `multibyte' to this
166 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
168 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
171 if (XSTRING (cp
->regexp
)->size
== XSTRING (pattern
)->size
172 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
173 && cp
->buf
.translate
== translate
174 && cp
->posix
== posix
175 && cp
->buf
.multibyte
== multibyte
)
178 /* If we're at the end of the cache, compile into the last cell. */
181 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
186 /* When we get here, cp (aka *cpp) contains the compiled pattern,
187 either because we found it in the cache or because we just compiled it.
188 Move it to the front of the queue to mark it as most recently used. */
190 cp
->next
= searchbuf_head
;
193 /* Advise the searching functions about the space we have allocated
194 for register data. */
196 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
201 /* Error condition used for failing searches */
202 Lisp_Object Qsearch_failed
;
208 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
213 looking_at_1 (string
, posix
)
218 unsigned char *p1
, *p2
;
221 struct re_pattern_buffer
*bufp
;
223 if (running_asynch_code
)
226 CHECK_STRING (string
, 0);
227 bufp
= compile_pattern (string
, &search_regs
,
228 (!NILP (current_buffer
->case_fold_search
)
229 ? XCHAR_TABLE (DOWNCASE_TABLE
)->contents
: 0),
233 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
235 /* Get pointers and sizes of the two strings
236 that make up the visible portion of the buffer. */
254 re_match_object
= Qnil
;
256 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
257 PT
- BEGV
, &search_regs
,
262 val
= (0 <= i
? Qt
: Qnil
);
263 for (i
= 0; i
< search_regs
.num_regs
; i
++)
264 if (search_regs
.start
[i
] >= 0)
266 search_regs
.start
[i
] += BEGV
;
267 search_regs
.end
[i
] += BEGV
;
269 XSETBUFFER (last_thing_searched
, current_buffer
);
274 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
275 "Return t if text after point matches regular expression REGEXP.\n\
276 This function modifies the match data that `match-beginning',\n\
277 `match-end' and `match-data' access; save and restore the match\n\
278 data if you want to preserve them.")
282 return looking_at_1 (regexp
, 0);
285 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
286 "Return t if text after point matches regular expression REGEXP.\n\
287 Find the longest match, in accord with Posix regular expression rules.\n\
288 This function modifies the match data that `match-beginning',\n\
289 `match-end' and `match-data' access; save and restore the match\n\
290 data if you want to preserve them.")
294 return looking_at_1 (regexp
, 1);
298 string_match_1 (regexp
, string
, start
, posix
)
299 Lisp_Object regexp
, string
, start
;
304 struct re_pattern_buffer
*bufp
;
306 if (running_asynch_code
)
309 CHECK_STRING (regexp
, 0);
310 CHECK_STRING (string
, 1);
316 int len
= XSTRING (string
)->size
;
318 CHECK_NUMBER (start
, 2);
320 if (s
< 0 && -s
<= len
)
322 else if (0 > s
|| s
> len
)
323 args_out_of_range (string
, start
);
326 bufp
= compile_pattern (regexp
, &search_regs
,
327 (!NILP (current_buffer
->case_fold_search
)
328 ? XCHAR_TABLE (DOWNCASE_TABLE
)->contents
: 0),
331 re_match_object
= string
;
333 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
334 XSTRING (string
)->size
, s
, XSTRING (string
)->size
- s
,
337 last_thing_searched
= Qt
;
340 if (val
< 0) return Qnil
;
341 return make_number (val
);
344 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
345 "Return index of start of first match for REGEXP in STRING, or nil.\n\
346 If third arg START is non-nil, start search at that index in STRING.\n\
347 For index of first char beyond the match, do (match-end 0).\n\
348 `match-end' and `match-beginning' also give indices of substrings\n\
349 matched by parenthesis constructs in the pattern.")
350 (regexp
, string
, start
)
351 Lisp_Object regexp
, string
, start
;
353 return string_match_1 (regexp
, string
, start
, 0);
356 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
357 "Return index of start of first match for REGEXP in STRING, or nil.\n\
358 Find the longest match, in accord with Posix regular expression rules.\n\
359 If third arg START is non-nil, start search at that index in STRING.\n\
360 For index of first char beyond the match, do (match-end 0).\n\
361 `match-end' and `match-beginning' also give indices of substrings\n\
362 matched by parenthesis constructs in the pattern.")
363 (regexp
, string
, start
)
364 Lisp_Object regexp
, string
, start
;
366 return string_match_1 (regexp
, string
, start
, 1);
369 /* Match REGEXP against STRING, searching all of STRING,
370 and return the index of the match, or negative on failure.
371 This does not clobber the match data. */
374 fast_string_match (regexp
, string
)
375 Lisp_Object regexp
, string
;
378 struct re_pattern_buffer
*bufp
;
380 bufp
= compile_pattern (regexp
, 0, 0, 0);
382 re_match_object
= string
;
384 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
385 XSTRING (string
)->size
, 0, XSTRING (string
)->size
,
391 /* Match REGEXP against STRING, searching all of STRING ignoring case,
392 and return the index of the match, or negative on failure.
393 This does not clobber the match data. */
395 extern Lisp_Object Vascii_downcase_table
;
398 fast_c_string_match_ignore_case (regexp
, string
)
403 struct re_pattern_buffer
*bufp
;
404 int len
= strlen (string
);
406 re_match_object
= Qt
;
407 bufp
= compile_pattern (regexp
, 0,
408 XCHAR_TABLE (Vascii_downcase_table
)->contents
, 0);
410 val
= re_search (bufp
, string
, len
, 0, len
, 0);
421 return ((a
> b
) ? a
: b
);
428 return ((a
< b
) ? a
: b
);
432 /* The newline cache: remembering which sections of text have no newlines. */
434 /* If the user has requested newline caching, make sure it's on.
435 Otherwise, make sure it's off.
436 This is our cheezy way of associating an action with the change of
437 state of a buffer-local variable. */
439 newline_cache_on_off (buf
)
442 if (NILP (buf
->cache_long_line_scans
))
444 /* It should be off. */
445 if (buf
->newline_cache
)
447 free_region_cache (buf
->newline_cache
);
448 buf
->newline_cache
= 0;
453 /* It should be on. */
454 if (buf
->newline_cache
== 0)
455 buf
->newline_cache
= new_region_cache ();
460 /* Search for COUNT instances of the character TARGET between START and END.
462 If COUNT is positive, search forwards; END must be >= START.
463 If COUNT is negative, search backwards for the -COUNTth instance;
464 END must be <= START.
465 If COUNT is zero, do anything you please; run rogue, for all I care.
467 If END is zero, use BEGV or ZV instead, as appropriate for the
468 direction indicated by COUNT.
470 If we find COUNT instances, set *SHORTAGE to zero, and return the
471 position after the COUNTth match. Note that for reverse motion
472 this is not the same as the usual convention for Emacs motion commands.
474 If we don't find COUNT instances before reaching END, set *SHORTAGE
475 to the number of TARGETs left unfound, and return END.
477 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
478 except when inside redisplay. */
480 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
487 struct region_cache
*newline_cache
;
498 if (! end
) end
= BEGV
;
501 newline_cache_on_off (current_buffer
);
502 newline_cache
= current_buffer
->newline_cache
;
507 immediate_quit
= allow_quit
;
512 /* Our innermost scanning loop is very simple; it doesn't know
513 about gaps, buffer ends, or the newline cache. ceiling is
514 the position of the last character before the next such
515 obstacle --- the last character the dumb search loop should
517 register int ceiling
= end
- 1;
519 /* If we're looking for a newline, consult the newline cache
520 to see where we can avoid some scanning. */
521 if (target
== '\n' && newline_cache
)
525 while (region_cache_forward
526 (current_buffer
, newline_cache
, start
, &next_change
))
528 immediate_quit
= allow_quit
;
530 /* start should never be after end. */
534 /* Now the text after start is an unknown region, and
535 next_change is the position of the next known region. */
536 ceiling
= min (next_change
- 1, ceiling
);
539 /* The dumb loop can only scan text stored in contiguous
540 bytes. BUFFER_CEILING_OF returns the last character
541 position that is contiguous, so the ceiling is the
542 position after that. */
543 ceiling
= min (BUFFER_CEILING_OF (start
), ceiling
);
546 /* The termination address of the dumb loop. */
547 register unsigned char *ceiling_addr
= POS_ADDR (ceiling
) + 1;
548 register unsigned char *cursor
= POS_ADDR (start
);
549 unsigned char *base
= cursor
;
551 while (cursor
< ceiling_addr
)
553 unsigned char *scan_start
= cursor
;
556 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
559 /* If we're looking for newlines, cache the fact that
560 the region from start to cursor is free of them. */
561 if (target
== '\n' && newline_cache
)
562 know_region_cache (current_buffer
, newline_cache
,
563 start
+ scan_start
- base
,
564 start
+ cursor
- base
);
566 /* Did we find the target character? */
567 if (cursor
< ceiling_addr
)
572 return (start
+ cursor
- base
+ 1);
578 start
+= cursor
- base
;
584 /* The last character to check before the next obstacle. */
585 register int ceiling
= end
;
587 /* Consult the newline cache, if appropriate. */
588 if (target
== '\n' && newline_cache
)
592 while (region_cache_backward
593 (current_buffer
, newline_cache
, start
, &next_change
))
595 immediate_quit
= allow_quit
;
597 /* Start should never be at or before end. */
601 /* Now the text before start is an unknown region, and
602 next_change is the position of the next known region. */
603 ceiling
= max (next_change
, ceiling
);
606 /* Stop scanning before the gap. */
607 ceiling
= max (BUFFER_FLOOR_OF (start
- 1), ceiling
);
610 /* The termination address of the dumb loop. */
611 register unsigned char *ceiling_addr
= POS_ADDR (ceiling
);
612 register unsigned char *cursor
= POS_ADDR (start
- 1);
613 unsigned char *base
= cursor
;
615 while (cursor
>= ceiling_addr
)
617 unsigned char *scan_start
= cursor
;
619 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
622 /* If we're looking for newlines, cache the fact that
623 the region from after the cursor to start is free of them. */
624 if (target
== '\n' && newline_cache
)
625 know_region_cache (current_buffer
, newline_cache
,
626 start
+ cursor
- base
,
627 start
+ scan_start
- base
);
629 /* Did we find the target character? */
630 if (cursor
>= ceiling_addr
)
635 return (start
+ cursor
- base
);
641 start
+= cursor
- base
;
647 *shortage
= count
* direction
;
652 find_next_newline_no_quit (from
, cnt
)
653 register int from
, cnt
;
655 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
659 find_next_newline (from
, cnt
)
660 register int from
, cnt
;
662 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 1);
666 /* Like find_next_newline, but returns position before the newline,
667 not after, and only search up to TO. This isn't just
668 find_next_newline (...)-1, because you might hit TO. */
670 find_before_next_newline (from
, to
, cnt
)
674 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
682 /* Subroutines of Lisp buffer search functions. */
685 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
686 Lisp_Object string
, bound
, noerror
, count
;
697 CHECK_NUMBER (count
, 3);
701 CHECK_STRING (string
, 0);
703 lim
= n
> 0 ? ZV
: BEGV
;
706 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
708 if (n
> 0 ? lim
< PT
: lim
> PT
)
709 error ("Invalid search bound (wrong side of point)");
716 np
= search_buffer (string
, PT
, lim
, n
, RE
,
717 (!NILP (current_buffer
->case_fold_search
)
718 ? XCHAR_TABLE (current_buffer
->case_canon_table
)->contents
720 (!NILP (current_buffer
->case_fold_search
)
721 ? XCHAR_TABLE (current_buffer
->case_eqv_table
)->contents
727 return signal_failure (string
);
728 if (!EQ (noerror
, Qt
))
730 if (lim
< BEGV
|| lim
> ZV
)
734 #if 0 /* This would be clean, but maybe programs depend on
735 a value of nil here. */
743 if (np
< BEGV
|| np
> ZV
)
748 return make_number (np
);
752 trivial_regexp_p (regexp
)
755 int len
= XSTRING (regexp
)->size
;
756 unsigned char *s
= XSTRING (regexp
)->data
;
762 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
769 case '|': case '(': case ')': case '`': case '\'': case 'b':
770 case 'B': case '<': case '>': case 'w': case 'W': case 's':
772 case 'c': case 'C': /* for categoryspec and notcategoryspec */
773 case '1': case '2': case '3': case '4': case '5':
774 case '6': case '7': case '8': case '9':
782 /* Search for the n'th occurrence of STRING in the current buffer,
783 starting at position POS and stopping at position LIM,
784 treating STRING as a literal string if RE is false or as
785 a regular expression if RE is true.
787 If N is positive, searching is forward and LIM must be greater than POS.
788 If N is negative, searching is backward and LIM must be less than POS.
790 Returns -x if only N-x occurrences found (x > 0),
791 or else the position at the beginning of the Nth occurrence
792 (if searching backward) or the end (if searching forward).
794 POSIX is nonzero if we want full backtracking (POSIX style)
795 for this pattern. 0 means backtrack only enough to get a valid match. */
798 search_buffer (string
, pos
, lim
, n
, RE
, trt
, inverse_trt
, posix
)
805 Lisp_Object
*inverse_trt
;
808 int len
= XSTRING (string
)->size
;
809 unsigned char *base_pat
= XSTRING (string
)->data
;
810 register int *BM_tab
;
812 register int direction
= ((n
> 0) ? 1 : -1);
814 int infinity
, limit
, k
, stride_for_teases
;
815 register unsigned char *pat
, *cursor
, *p_limit
;
817 unsigned char *p1
, *p2
;
820 if (running_asynch_code
)
823 /* Null string is found at starting position. */
826 set_search_regs (pos
, 0);
830 /* Searching 0 times means don't move. */
834 if (RE
&& !trivial_regexp_p (string
))
836 struct re_pattern_buffer
*bufp
;
838 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
);
840 immediate_quit
= 1; /* Quit immediately if user types ^G,
841 because letting this function finish
842 can take too long. */
843 QUIT
; /* Do a pending quit right away,
844 to avoid paradoxical behavior */
845 /* Get pointers and sizes of the two strings
846 that make up the visible portion of the buffer. */
863 re_match_object
= Qnil
;
868 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
869 pos
- BEGV
, lim
- pos
, &search_regs
,
870 /* Don't allow match past current point */
879 for (i
= 0; i
< search_regs
.num_regs
; i
++)
880 if (search_regs
.start
[i
] >= 0)
882 search_regs
.start
[i
] += j
;
883 search_regs
.end
[i
] += j
;
885 XSETBUFFER (last_thing_searched
, current_buffer
);
886 /* Set pos to the new position. */
887 pos
= search_regs
.start
[0];
899 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
900 pos
- BEGV
, lim
- pos
, &search_regs
,
909 for (i
= 0; i
< search_regs
.num_regs
; i
++)
910 if (search_regs
.start
[i
] >= 0)
912 search_regs
.start
[i
] += j
;
913 search_regs
.end
[i
] += j
;
915 XSETBUFFER (last_thing_searched
, current_buffer
);
916 pos
= search_regs
.end
[0];
928 else /* non-RE case */
931 int BM_tab_space
[0400];
932 BM_tab
= &BM_tab_space
[0];
934 BM_tab
= (int *) alloca (0400 * sizeof (int));
937 unsigned char *patbuf
= (unsigned char *) alloca (len
);
941 /* If we got here and the RE flag is set, it's because we're
942 dealing with a regexp known to be trivial, so the backslash
943 just quotes the next character. */
944 if (RE
&& *base_pat
== '\\')
949 *pat
++ = (trt
? XINT (trt
[*base_pat
++]) : *base_pat
++);
952 pat
= base_pat
= patbuf
;
954 /* The general approach is that we are going to maintain that we know */
955 /* the first (closest to the present position, in whatever direction */
956 /* we're searching) character that could possibly be the last */
957 /* (furthest from present position) character of a valid match. We */
958 /* advance the state of our knowledge by looking at that character */
959 /* and seeing whether it indeed matches the last character of the */
960 /* pattern. If it does, we take a closer look. If it does not, we */
961 /* move our pointer (to putative last characters) as far as is */
962 /* logically possible. This amount of movement, which I call a */
963 /* stride, will be the length of the pattern if the actual character */
964 /* appears nowhere in the pattern, otherwise it will be the distance */
965 /* from the last occurrence of that character to the end of the */
967 /* As a coding trick, an enormous stride is coded into the table for */
968 /* characters that match the last character. This allows use of only */
969 /* a single test, a test for having gone past the end of the */
970 /* permissible match region, to test for both possible matches (when */
971 /* the stride goes past the end immediately) and failure to */
972 /* match (where you get nudged past the end one stride at a time). */
974 /* Here we make a "mickey mouse" BM table. The stride of the search */
975 /* is determined only by the last character of the putative match. */
976 /* If that character does not match, we will stride the proper */
977 /* distance to propose a match that superimposes it on the last */
978 /* instance of a character that matches it (per trt), or misses */
979 /* it entirely if there is none. */
981 dirlen
= len
* direction
;
982 infinity
= dirlen
- (lim
+ pos
+ len
+ len
) * direction
;
984 pat
= (base_pat
+= len
- 1);
985 BM_tab_base
= BM_tab
;
987 j
= dirlen
; /* to get it in a register */
988 /* A character that does not appear in the pattern induces a */
989 /* stride equal to the pattern length. */
990 while (BM_tab_base
!= BM_tab
)
998 while (i
!= infinity
)
1000 j
= pat
[i
]; i
+= direction
;
1001 if (i
== dirlen
) i
= infinity
;
1004 k
= (j
= XINT (trt
[j
]));
1006 stride_for_teases
= BM_tab
[j
];
1007 BM_tab
[j
] = dirlen
- i
;
1008 /* A translation table is accompanied by its inverse -- see */
1009 /* comment following downcase_table for details */
1010 while ((j
= (unsigned char) XINT (inverse_trt
[j
])) != k
)
1011 BM_tab
[j
] = dirlen
- i
;
1016 stride_for_teases
= BM_tab
[j
];
1017 BM_tab
[j
] = dirlen
- i
;
1019 /* stride_for_teases tells how much to stride if we get a */
1020 /* match on the far character but are subsequently */
1021 /* disappointed, by recording what the stride would have been */
1022 /* for that character if the last character had been */
1025 infinity
= dirlen
- infinity
;
1026 pos
+= dirlen
- ((direction
> 0) ? direction
: 0);
1027 /* loop invariant - pos points at where last char (first char if reverse)
1028 of pattern would align in a possible match. */
1031 /* It's been reported that some (broken) compiler thinks that
1032 Boolean expressions in an arithmetic context are unsigned.
1033 Using an explicit ?1:0 prevents this. */
1034 if ((lim
- pos
- ((direction
> 0) ? 1 : 0)) * direction
< 0)
1035 return (n
* (0 - direction
));
1036 /* First we do the part we can by pointers (maybe nothing) */
1039 limit
= pos
- dirlen
+ direction
;
1040 limit
= ((direction
> 0)
1041 ? BUFFER_CEILING_OF (limit
)
1042 : BUFFER_FLOOR_OF (limit
));
1043 /* LIMIT is now the last (not beyond-last!) value
1044 POS can take on without hitting edge of buffer or the gap. */
1045 limit
= ((direction
> 0)
1046 ? min (lim
- 1, min (limit
, pos
+ 20000))
1047 : max (lim
, max (limit
, pos
- 20000)));
1048 if ((limit
- pos
) * direction
> 20)
1050 p_limit
= POS_ADDR (limit
);
1051 p2
= (cursor
= POS_ADDR (pos
));
1052 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1053 while (1) /* use one cursor setting as long as i can */
1055 if (direction
> 0) /* worth duplicating */
1057 /* Use signed comparison if appropriate
1058 to make cursor+infinity sure to be > p_limit.
1059 Assuming that the buffer lies in a range of addresses
1060 that are all "positive" (as ints) or all "negative",
1061 either kind of comparison will work as long
1062 as we don't step by infinity. So pick the kind
1063 that works when we do step by infinity. */
1064 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1065 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1066 cursor
+= BM_tab
[*cursor
];
1068 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1069 cursor
+= BM_tab
[*cursor
];
1073 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1074 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1075 cursor
+= BM_tab
[*cursor
];
1077 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1078 cursor
+= BM_tab
[*cursor
];
1080 /* If you are here, cursor is beyond the end of the searched region. */
1081 /* This can happen if you match on the far character of the pattern, */
1082 /* because the "stride" of that character is infinity, a number able */
1083 /* to throw you well beyond the end of the search. It can also */
1084 /* happen if you fail to match within the permitted region and would */
1085 /* otherwise try a character beyond that region */
1086 if ((cursor
- p_limit
) * direction
<= len
)
1087 break; /* a small overrun is genuine */
1088 cursor
-= infinity
; /* large overrun = hit */
1089 i
= dirlen
- direction
;
1092 while ((i
-= direction
) + direction
!= 0)
1093 if (pat
[i
] != XINT (trt
[*(cursor
-= direction
)]))
1098 while ((i
-= direction
) + direction
!= 0)
1099 if (pat
[i
] != *(cursor
-= direction
))
1102 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1103 if (i
+ direction
== 0)
1105 cursor
-= direction
;
1107 set_search_regs (pos
+ cursor
- p2
+ ((direction
> 0)
1111 if ((n
-= direction
) != 0)
1112 cursor
+= dirlen
; /* to resume search */
1114 return ((direction
> 0)
1115 ? search_regs
.end
[0] : search_regs
.start
[0]);
1118 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1123 /* Now we'll pick up a clump that has to be done the hard */
1124 /* way because it covers a discontinuity */
1126 limit
= ((direction
> 0)
1127 ? BUFFER_CEILING_OF (pos
- dirlen
+ 1)
1128 : BUFFER_FLOOR_OF (pos
- dirlen
- 1));
1129 limit
= ((direction
> 0)
1130 ? min (limit
+ len
, lim
- 1)
1131 : max (limit
- len
, lim
));
1132 /* LIMIT is now the last value POS can have
1133 and still be valid for a possible match. */
1136 /* This loop can be coded for space rather than */
1137 /* speed because it will usually run only once. */
1138 /* (the reach is at most len + 21, and typically */
1139 /* does not exceed len) */
1140 while ((limit
- pos
) * direction
>= 0)
1141 pos
+= BM_tab
[FETCH_BYTE (pos
)];
1142 /* now run the same tests to distinguish going off the */
1143 /* end, a match or a phony match. */
1144 if ((pos
- limit
) * direction
<= len
)
1145 break; /* ran off the end */
1146 /* Found what might be a match.
1147 Set POS back to last (first if reverse) char pos. */
1149 i
= dirlen
- direction
;
1150 while ((i
-= direction
) + direction
!= 0)
1153 if (pat
[i
] != (trt
!= 0
1154 ? XINT (trt
[FETCH_BYTE (pos
)])
1155 : FETCH_BYTE (pos
)))
1158 /* Above loop has moved POS part or all the way
1159 back to the first char pos (last char pos if reverse).
1160 Set it once again at the last (first if reverse) char. */
1161 pos
+= dirlen
- i
- direction
;
1162 if (i
+ direction
== 0)
1166 set_search_regs (pos
+ ((direction
> 0) ? 1 - len
: 0),
1169 if ((n
-= direction
) != 0)
1170 pos
+= dirlen
; /* to resume search */
1172 return ((direction
> 0)
1173 ? search_regs
.end
[0] : search_regs
.start
[0]);
1176 pos
+= stride_for_teases
;
1179 /* We have done one clump. Can we continue? */
1180 if ((lim
- pos
) * direction
< 0)
1181 return ((0 - n
) * direction
);
1187 /* Record beginning BEG and end BEG + LEN
1188 for a match just found in the current buffer. */
1191 set_search_regs (beg
, len
)
1194 /* Make sure we have registers in which to store
1195 the match position. */
1196 if (search_regs
.num_regs
== 0)
1198 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1199 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1200 search_regs
.num_regs
= 2;
1203 search_regs
.start
[0] = beg
;
1204 search_regs
.end
[0] = beg
+ len
;
1205 XSETBUFFER (last_thing_searched
, current_buffer
);
1208 /* Given a string of words separated by word delimiters,
1209 compute a regexp that matches those exact words
1210 separated by arbitrary punctuation. */
1216 register unsigned char *p
, *o
;
1217 register int i
, len
, punct_count
= 0, word_count
= 0;
1220 CHECK_STRING (string
, 0);
1221 p
= XSTRING (string
)->data
;
1222 len
= XSTRING (string
)->size
;
1224 for (i
= 0; i
< len
; i
++)
1225 if (SYNTAX (p
[i
]) != Sword
)
1228 if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
) word_count
++;
1230 if (SYNTAX (p
[len
-1]) == Sword
) word_count
++;
1231 if (!word_count
) return build_string ("");
1233 val
= make_string (p
, len
- punct_count
+ 5 * (word_count
- 1) + 4);
1235 o
= XSTRING (val
)->data
;
1239 for (i
= 0; i
< len
; i
++)
1240 if (SYNTAX (p
[i
]) == Sword
)
1242 else if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
&& --word_count
)
1257 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
1258 "MSearch backward: ",
1259 "Search backward from point for STRING.\n\
1260 Set point to the beginning of the occurrence found, and return point.\n\
1261 An optional second argument bounds the search; it is a buffer position.\n\
1262 The match found must not extend before that position.\n\
1263 Optional third argument, if t, means if fail just return nil (no error).\n\
1264 If not nil and not t, position at limit of search and return nil.\n\
1265 Optional fourth argument is repeat count--search for successive occurrences.\n\
1266 See also the functions `match-beginning', `match-end' and `replace-match'.")
1267 (string
, bound
, noerror
, count
)
1268 Lisp_Object string
, bound
, noerror
, count
;
1270 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
1273 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
1274 "Search forward from point for STRING.\n\
1275 Set point to the end of the occurrence found, and return point.\n\
1276 An optional second argument bounds the search; it is a buffer position.\n\
1277 The match found must not extend after that position. nil is equivalent\n\
1279 Optional third argument, if t, means if fail just return nil (no error).\n\
1280 If not nil and not t, move to limit of search and return nil.\n\
1281 Optional fourth argument is repeat count--search for successive occurrences.\n\
1282 See also the functions `match-beginning', `match-end' and `replace-match'.")
1283 (string
, bound
, noerror
, count
)
1284 Lisp_Object string
, bound
, noerror
, count
;
1286 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
1289 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
1290 "sWord search backward: ",
1291 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1292 Set point to the beginning of the occurrence found, and return point.\n\
1293 An optional second argument bounds the search; it is a buffer position.\n\
1294 The match found must not extend before that position.\n\
1295 Optional third argument, if t, means if fail just return nil (no error).\n\
1296 If not nil and not t, move to limit of search and return nil.\n\
1297 Optional fourth argument is repeat count--search for successive occurrences.")
1298 (string
, bound
, noerror
, count
)
1299 Lisp_Object string
, bound
, noerror
, count
;
1301 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
1304 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
1306 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1307 Set point to the end of the occurrence found, and return point.\n\
1308 An optional second argument bounds the search; it is a buffer position.\n\
1309 The match found must not extend after that position.\n\
1310 Optional third argument, if t, means if fail just return nil (no error).\n\
1311 If not nil and not t, move to limit of search and return nil.\n\
1312 Optional fourth argument is repeat count--search for successive occurrences.")
1313 (string
, bound
, noerror
, count
)
1314 Lisp_Object string
, bound
, noerror
, count
;
1316 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
1319 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
1320 "sRE search backward: ",
1321 "Search backward from point for match for regular expression REGEXP.\n\
1322 Set point to the beginning of the match, and return point.\n\
1323 The match found is the one starting last in the buffer\n\
1324 and yet ending before the origin of the search.\n\
1325 An optional second argument bounds the search; it is a buffer position.\n\
1326 The match found must start at or after that position.\n\
1327 Optional third argument, if t, means if fail just return nil (no error).\n\
1328 If not nil and not t, move to limit of search and return nil.\n\
1329 Optional fourth argument is repeat count--search for successive occurrences.\n\
1330 See also the functions `match-beginning', `match-end' and `replace-match'.")
1331 (regexp
, bound
, noerror
, count
)
1332 Lisp_Object regexp
, bound
, noerror
, count
;
1334 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
1337 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
1339 "Search forward from point for regular expression REGEXP.\n\
1340 Set point to the end of the occurrence found, and return point.\n\
1341 An optional second argument bounds the search; it is a buffer position.\n\
1342 The match found must not extend after that position.\n\
1343 Optional third argument, if t, means if fail just return nil (no error).\n\
1344 If not nil and not t, move to limit of search and return nil.\n\
1345 Optional fourth argument is repeat count--search for successive occurrences.\n\
1346 See also the functions `match-beginning', `match-end' and `replace-match'.")
1347 (regexp
, bound
, noerror
, count
)
1348 Lisp_Object regexp
, bound
, noerror
, count
;
1350 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
1353 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
1354 "sPosix search backward: ",
1355 "Search backward from point for match for regular expression REGEXP.\n\
1356 Find the longest match in accord with Posix regular expression rules.\n\
1357 Set point to the beginning of the match, and return point.\n\
1358 The match found is the one starting last in the buffer\n\
1359 and yet ending before the origin of the search.\n\
1360 An optional second argument bounds the search; it is a buffer position.\n\
1361 The match found must start at or after that position.\n\
1362 Optional third argument, if t, means if fail just return nil (no error).\n\
1363 If not nil and not t, move to limit of search and return nil.\n\
1364 Optional fourth argument is repeat count--search for successive occurrences.\n\
1365 See also the functions `match-beginning', `match-end' and `replace-match'.")
1366 (regexp
, bound
, noerror
, count
)
1367 Lisp_Object regexp
, bound
, noerror
, count
;
1369 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
1372 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
1374 "Search forward from point for regular expression REGEXP.\n\
1375 Find the longest match in accord with Posix regular expression rules.\n\
1376 Set point to the end of the occurrence found, and return point.\n\
1377 An optional second argument bounds the search; it is a buffer position.\n\
1378 The match found must not extend after that position.\n\
1379 Optional third argument, if t, means if fail just return nil (no error).\n\
1380 If not nil and not t, move to limit of search and return nil.\n\
1381 Optional fourth argument is repeat count--search for successive occurrences.\n\
1382 See also the functions `match-beginning', `match-end' and `replace-match'.")
1383 (regexp
, bound
, noerror
, count
)
1384 Lisp_Object regexp
, bound
, noerror
, count
;
1386 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
1389 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
1390 "Replace text matched by last search with NEWTEXT.\n\
1391 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1392 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1393 based on the replaced text.\n\
1394 If the replaced text has only capital letters\n\
1395 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1396 If the replaced text has at least one word starting with a capital letter,\n\
1397 then capitalize each word in NEWTEXT.\n\n\
1398 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1399 Otherwise treat `\\' as special:\n\
1400 `\\&' in NEWTEXT means substitute original matched text.\n\
1401 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1402 If Nth parens didn't match, substitute nothing.\n\
1403 `\\\\' means insert one `\\'.\n\
1404 FIXEDCASE and LITERAL are optional arguments.\n\
1405 Leaves point at end of replacement text.\n\
1407 The optional fourth argument STRING can be a string to modify.\n\
1408 In that case, this function creates and returns a new string\n\
1409 which is made by replacing the part of STRING that was matched.\n\
1411 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1412 It says to replace just that subexpression instead of the whole match.\n\
1413 This is useful only after a regular expression search or match\n\
1414 since only regular expressions have distinguished subexpressions.")
1415 (newtext
, fixedcase
, literal
, string
, subexp
)
1416 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
1418 enum { nochange
, all_caps
, cap_initial
} case_action
;
1419 register int pos
, last
;
1420 int some_multiletter_word
;
1423 int some_nonuppercase_initial
;
1424 register int c
, prevc
;
1427 int opoint
, newpoint
;
1429 CHECK_STRING (newtext
, 0);
1431 if (! NILP (string
))
1432 CHECK_STRING (string
, 4);
1434 case_action
= nochange
; /* We tried an initialization */
1435 /* but some C compilers blew it */
1437 if (search_regs
.num_regs
<= 0)
1438 error ("replace-match called before any match found");
1444 CHECK_NUMBER (subexp
, 3);
1445 sub
= XINT (subexp
);
1446 if (sub
< 0 || sub
>= search_regs
.num_regs
)
1447 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
1452 if (search_regs
.start
[sub
] < BEGV
1453 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
1454 || search_regs
.end
[sub
] > ZV
)
1455 args_out_of_range (make_number (search_regs
.start
[sub
]),
1456 make_number (search_regs
.end
[sub
]));
1460 if (search_regs
.start
[sub
] < 0
1461 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
1462 || search_regs
.end
[sub
] > XSTRING (string
)->size
)
1463 args_out_of_range (make_number (search_regs
.start
[sub
]),
1464 make_number (search_regs
.end
[sub
]));
1467 if (NILP (fixedcase
))
1469 /* Decide how to casify by examining the matched text. */
1471 last
= search_regs
.end
[sub
];
1473 case_action
= all_caps
;
1475 /* some_multiletter_word is set nonzero if any original word
1476 is more than one letter long. */
1477 some_multiletter_word
= 0;
1479 some_nonuppercase_initial
= 0;
1482 for (pos
= search_regs
.start
[sub
]; pos
< last
; pos
++)
1485 c
= FETCH_BYTE (pos
);
1487 c
= XSTRING (string
)->data
[pos
];
1491 /* Cannot be all caps if any original char is lower case */
1494 if (SYNTAX (prevc
) != Sword
)
1495 some_nonuppercase_initial
= 1;
1497 some_multiletter_word
= 1;
1499 else if (!NOCASEP (c
))
1502 if (SYNTAX (prevc
) != Sword
)
1505 some_multiletter_word
= 1;
1509 /* If the initial is a caseless word constituent,
1510 treat that like a lowercase initial. */
1511 if (SYNTAX (prevc
) != Sword
)
1512 some_nonuppercase_initial
= 1;
1518 /* Convert to all caps if the old text is all caps
1519 and has at least one multiletter word. */
1520 if (! some_lowercase
&& some_multiletter_word
)
1521 case_action
= all_caps
;
1522 /* Capitalize each word, if the old text has all capitalized words. */
1523 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
1524 case_action
= cap_initial
;
1525 else if (!some_nonuppercase_initial
&& some_uppercase
)
1526 /* Should x -> yz, operating on X, give Yz or YZ?
1527 We'll assume the latter. */
1528 case_action
= all_caps
;
1530 case_action
= nochange
;
1533 /* Do replacement in a string. */
1536 Lisp_Object before
, after
;
1538 before
= Fsubstring (string
, make_number (0),
1539 make_number (search_regs
.start
[sub
]));
1540 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
1542 /* Substitute parts of the match into NEWTEXT
1547 /* We build up the substituted string in ACCUM. */
1553 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
1557 int delbackslash
= 0;
1559 c
= XSTRING (newtext
)->data
[pos
];
1562 c
= XSTRING (newtext
)->data
[++pos
];
1565 substart
= search_regs
.start
[sub
];
1566 subend
= search_regs
.end
[sub
];
1568 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
1570 if (search_regs
.start
[c
- '0'] >= 0)
1572 substart
= search_regs
.start
[c
- '0'];
1573 subend
= search_regs
.end
[c
- '0'];
1579 error ("Invalid use of `\\' in replacement text");
1583 if (pos
- 1 != lastpos
+ 1)
1584 middle
= Fsubstring (newtext
,
1585 make_number (lastpos
+ 1),
1586 make_number (pos
- 1));
1589 accum
= concat3 (accum
, middle
,
1590 Fsubstring (string
, make_number (substart
),
1591 make_number (subend
)));
1594 else if (delbackslash
)
1596 middle
= Fsubstring (newtext
, make_number (lastpos
+ 1),
1598 accum
= concat2 (accum
, middle
);
1603 if (pos
!= lastpos
+ 1)
1604 middle
= Fsubstring (newtext
, make_number (lastpos
+ 1),
1609 newtext
= concat2 (accum
, middle
);
1612 /* Do case substitution in NEWTEXT if desired. */
1613 if (case_action
== all_caps
)
1614 newtext
= Fupcase (newtext
);
1615 else if (case_action
== cap_initial
)
1616 newtext
= Fupcase_initials (newtext
);
1618 return concat3 (before
, newtext
, after
);
1621 /* Record point, the move (quietly) to the start of the match. */
1622 if (PT
> search_regs
.start
[sub
])
1627 temp_set_point (search_regs
.start
[sub
], current_buffer
);
1629 /* We insert the replacement text before the old text, and then
1630 delete the original text. This means that markers at the
1631 beginning or end of the original will float to the corresponding
1632 position in the replacement. */
1633 if (!NILP (literal
))
1634 Finsert_and_inherit (1, &newtext
);
1637 struct gcpro gcpro1
;
1640 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
1642 int offset
= PT
- search_regs
.start
[sub
];
1644 c
= XSTRING (newtext
)->data
[pos
];
1647 c
= XSTRING (newtext
)->data
[++pos
];
1649 Finsert_buffer_substring
1650 (Fcurrent_buffer (),
1651 make_number (search_regs
.start
[sub
] + offset
),
1652 make_number (search_regs
.end
[sub
] + offset
));
1653 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
1655 if (search_regs
.start
[c
- '0'] >= 1)
1656 Finsert_buffer_substring
1657 (Fcurrent_buffer (),
1658 make_number (search_regs
.start
[c
- '0'] + offset
),
1659 make_number (search_regs
.end
[c
- '0'] + offset
));
1664 error ("Invalid use of `\\' in replacement text");
1672 inslen
= PT
- (search_regs
.start
[sub
]);
1673 del_range (search_regs
.start
[sub
] + inslen
, search_regs
.end
[sub
] + inslen
);
1675 if (case_action
== all_caps
)
1676 Fupcase_region (make_number (PT
- inslen
), make_number (PT
));
1677 else if (case_action
== cap_initial
)
1678 Fupcase_initials_region (make_number (PT
- inslen
), make_number (PT
));
1682 /* Put point back where it was in the text. */
1684 temp_set_point (opoint
+ ZV
, current_buffer
);
1686 temp_set_point (opoint
, current_buffer
);
1688 /* Now move point "officially" to the start of the inserted replacement. */
1689 move_if_not_intangible (newpoint
);
1695 match_limit (num
, beginningp
)
1701 CHECK_NUMBER (num
, 0);
1703 if (n
< 0 || n
>= search_regs
.num_regs
)
1704 args_out_of_range (num
, make_number (search_regs
.num_regs
));
1705 if (search_regs
.num_regs
<= 0
1706 || search_regs
.start
[n
] < 0)
1708 return (make_number ((beginningp
) ? search_regs
.start
[n
]
1709 : search_regs
.end
[n
]));
1712 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
1713 "Return position of start of text matched by last search.\n\
1714 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1716 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1718 Zero means the entire text matched by the whole regexp or whole string.")
1722 return match_limit (subexp
, 1);
1725 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
1726 "Return position of end of text matched by last search.\n\
1727 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1729 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1731 Zero means the entire text matched by the whole regexp or whole string.")
1735 return match_limit (subexp
, 0);
1738 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 2, 0,
1739 "Return a list containing all info on what the last search matched.\n\
1740 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1741 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1742 if the last match was on a buffer; integers or nil if a string was matched.\n\
1743 Use `store-match-data' to reinstate the data in this list.\n\
1745 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
1746 \(rather than markers) to represent buffer positions.\n\
1747 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
1748 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
1750 Lisp_Object integers
, reuse
;
1752 Lisp_Object tail
, prev
;
1756 if (NILP (last_thing_searched
))
1759 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
1760 * sizeof (Lisp_Object
));
1763 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1765 int start
= search_regs
.start
[i
];
1768 if (EQ (last_thing_searched
, Qt
)
1769 || ! NILP (integers
))
1771 XSETFASTINT (data
[2 * i
], start
);
1772 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
1774 else if (BUFFERP (last_thing_searched
))
1776 data
[2 * i
] = Fmake_marker ();
1777 Fset_marker (data
[2 * i
],
1778 make_number (start
),
1779 last_thing_searched
);
1780 data
[2 * i
+ 1] = Fmake_marker ();
1781 Fset_marker (data
[2 * i
+ 1],
1782 make_number (search_regs
.end
[i
]),
1783 last_thing_searched
);
1786 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1792 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
1795 /* If REUSE is not usable, cons up the values and return them. */
1796 if (! CONSP (reuse
))
1797 return Flist (2 * len
+ 2, data
);
1799 /* If REUSE is a list, store as many value elements as will fit
1800 into the elements of REUSE. */
1801 for (i
= 0, tail
= reuse
; CONSP (tail
);
1802 i
++, tail
= XCONS (tail
)->cdr
)
1804 if (i
< 2 * len
+ 2)
1805 XCONS (tail
)->car
= data
[i
];
1807 XCONS (tail
)->car
= Qnil
;
1811 /* If we couldn't fit all value elements into REUSE,
1812 cons up the rest of them and add them to the end of REUSE. */
1813 if (i
< 2 * len
+ 2)
1814 XCONS (prev
)->cdr
= Flist (2 * len
+ 2 - i
, data
+ i
);
1820 DEFUN ("store-match-data", Fstore_match_data
, Sstore_match_data
, 1, 1, 0,
1821 "Set internal data on last search match from elements of LIST.\n\
1822 LIST should have been created by calling `match-data' previously.")
1824 register Lisp_Object list
;
1827 register Lisp_Object marker
;
1829 if (running_asynch_code
)
1830 save_search_regs ();
1832 if (!CONSP (list
) && !NILP (list
))
1833 list
= wrong_type_argument (Qconsp
, list
);
1835 /* Unless we find a marker with a buffer in LIST, assume that this
1836 match data came from a string. */
1837 last_thing_searched
= Qt
;
1839 /* Allocate registers if they don't already exist. */
1841 int length
= XFASTINT (Flength (list
)) / 2;
1843 if (length
> search_regs
.num_regs
)
1845 if (search_regs
.num_regs
== 0)
1848 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1850 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1855 = (regoff_t
*) xrealloc (search_regs
.start
,
1856 length
* sizeof (regoff_t
));
1858 = (regoff_t
*) xrealloc (search_regs
.end
,
1859 length
* sizeof (regoff_t
));
1862 search_regs
.num_regs
= length
;
1866 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1868 marker
= Fcar (list
);
1871 search_regs
.start
[i
] = -1;
1876 if (MARKERP (marker
))
1878 if (XMARKER (marker
)->buffer
== 0)
1879 XSETFASTINT (marker
, 0);
1881 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
1884 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1885 search_regs
.start
[i
] = XINT (marker
);
1888 marker
= Fcar (list
);
1889 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
1890 XSETFASTINT (marker
, 0);
1892 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1893 search_regs
.end
[i
] = XINT (marker
);
1901 /* If non-zero the match data have been saved in saved_search_regs
1902 during the execution of a sentinel or filter. */
1903 static int search_regs_saved
;
1904 static struct re_registers saved_search_regs
;
1906 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
1907 if asynchronous code (filter or sentinel) is running. */
1911 if (!search_regs_saved
)
1913 saved_search_regs
.num_regs
= search_regs
.num_regs
;
1914 saved_search_regs
.start
= search_regs
.start
;
1915 saved_search_regs
.end
= search_regs
.end
;
1916 search_regs
.num_regs
= 0;
1917 search_regs
.start
= 0;
1918 search_regs
.end
= 0;
1920 search_regs_saved
= 1;
1924 /* Called upon exit from filters and sentinels. */
1926 restore_match_data ()
1928 if (search_regs_saved
)
1930 if (search_regs
.num_regs
> 0)
1932 xfree (search_regs
.start
);
1933 xfree (search_regs
.end
);
1935 search_regs
.num_regs
= saved_search_regs
.num_regs
;
1936 search_regs
.start
= saved_search_regs
.start
;
1937 search_regs
.end
= saved_search_regs
.end
;
1939 search_regs_saved
= 0;
1943 /* Quote a string to inactivate reg-expr chars */
1945 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
1946 "Return a regexp string which matches exactly STRING and nothing else.")
1950 register unsigned char *in
, *out
, *end
;
1951 register unsigned char *temp
;
1953 CHECK_STRING (string
, 0);
1955 temp
= (unsigned char *) alloca (XSTRING (string
)->size
* 2);
1957 /* Now copy the data into the new string, inserting escapes. */
1959 in
= XSTRING (string
)->data
;
1960 end
= in
+ XSTRING (string
)->size
;
1963 for (; in
!= end
; in
++)
1965 if (*in
== '[' || *in
== ']'
1966 || *in
== '*' || *in
== '.' || *in
== '\\'
1967 || *in
== '?' || *in
== '+'
1968 || *in
== '^' || *in
== '$')
1973 return make_string (temp
, out
- temp
);
1980 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
1982 searchbufs
[i
].buf
.allocated
= 100;
1983 searchbufs
[i
].buf
.buffer
= (unsigned char *) malloc (100);
1984 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
1985 searchbufs
[i
].regexp
= Qnil
;
1986 staticpro (&searchbufs
[i
].regexp
);
1987 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
1989 searchbuf_head
= &searchbufs
[0];
1991 Qsearch_failed
= intern ("search-failed");
1992 staticpro (&Qsearch_failed
);
1993 Qinvalid_regexp
= intern ("invalid-regexp");
1994 staticpro (&Qinvalid_regexp
);
1996 Fput (Qsearch_failed
, Qerror_conditions
,
1997 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
1998 Fput (Qsearch_failed
, Qerror_message
,
1999 build_string ("Search failed"));
2001 Fput (Qinvalid_regexp
, Qerror_conditions
,
2002 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
2003 Fput (Qinvalid_regexp
, Qerror_message
,
2004 build_string ("Invalid regexp"));
2006 last_thing_searched
= Qnil
;
2007 staticpro (&last_thing_searched
);
2009 defsubr (&Slooking_at
);
2010 defsubr (&Sposix_looking_at
);
2011 defsubr (&Sstring_match
);
2012 defsubr (&Sposix_string_match
);
2013 defsubr (&Ssearch_forward
);
2014 defsubr (&Ssearch_backward
);
2015 defsubr (&Sword_search_forward
);
2016 defsubr (&Sword_search_backward
);
2017 defsubr (&Sre_search_forward
);
2018 defsubr (&Sre_search_backward
);
2019 defsubr (&Sposix_search_forward
);
2020 defsubr (&Sposix_search_backward
);
2021 defsubr (&Sreplace_match
);
2022 defsubr (&Smatch_beginning
);
2023 defsubr (&Smatch_end
);
2024 defsubr (&Smatch_data
);
2025 defsubr (&Sstore_match_data
);
2026 defsubr (&Sregexp_quote
);