1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 1, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #include "blockinput.h"
28 #include <sys/types.h>
31 #define max(a, b) ((a) > (b) ? (a) : (b))
32 #define min(a, b) ((a) < (b) ? (a) : (b))
34 /* We compile regexps into this buffer and then use it for searching. */
36 struct re_pattern_buffer searchbuf
;
38 char search_fastmap
[0400];
40 /* Last regexp we compiled */
42 Lisp_Object last_regexp
;
44 /* Every call to re_match, etc., must pass &search_regs as the regs
45 argument unless you can show it is unnecessary (i.e., if re_match
46 is certainly going to be called again before region-around-match
49 Since the registers are now dynamically allocated, we need to make
50 sure not to refer to the Nth register before checking that it has
51 been allocated by checking search_regs.num_regs.
53 The regex code keeps track of whether it has allocated the search
54 buffer using bits in searchbuf. This means that whenever you
55 compile a new pattern, it completely forgets whether it has
56 allocated any registers, and will allocate new registers the next
57 time you call a searching or matching function. Therefore, we need
58 to call re_set_registers after compiling a new pattern or after
59 setting the match registers, so that the regex functions will be
60 able to free or re-allocate it properly. */
61 static struct re_registers search_regs
;
63 /* The buffer in which the last search was performed, or
64 Qt if the last search was done in a string;
65 Qnil if no searching has been done yet. */
66 static Lisp_Object last_thing_searched
;
68 /* error condition signalled when regexp compile_pattern fails */
70 Lisp_Object Qinvalid_regexp
;
72 static void set_search_regs ();
77 error ("Stack overflow in regexp matcher");
86 /* Compile a regexp and signal a Lisp error if anything goes wrong. */
88 compile_pattern (pattern
, bufp
, regp
, translate
)
90 struct re_pattern_buffer
*bufp
;
91 struct re_registers
*regp
;
97 if (EQ (pattern
, last_regexp
)
98 && translate
== bufp
->translate
)
102 bufp
->translate
= translate
;
104 val
= (CONST
char *) re_compile_pattern ((char *) XSTRING (pattern
)->data
,
105 XSTRING (pattern
)->size
, bufp
);
109 dummy
= build_string (val
);
111 Fsignal (Qinvalid_regexp
, Fcons (dummy
, Qnil
));
114 last_regexp
= pattern
;
116 /* Advise the searching functions about the space we have allocated
117 for register data. */
120 re_set_registers (bufp
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
126 /* Error condition used for failing searches */
127 Lisp_Object Qsearch_failed
;
133 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
137 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
138 "Return t if text after point matches regular expression PAT.\n\
139 This function modifies the match data that `match-beginning',\n\
140 `match-end' and `match-data' access; save and restore the match\n\
141 data if you want to preserve them.")
146 unsigned char *p1
, *p2
;
150 CHECK_STRING (string
, 0);
151 compile_pattern (string
, &searchbuf
, &search_regs
,
152 !NILP (current_buffer
->case_fold_search
) ? DOWNCASE_TABLE
: 0);
155 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
157 /* Get pointers and sizes of the two strings
158 that make up the visible portion of the buffer. */
176 i
= re_match_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
177 point
- BEGV
, &search_regs
,
182 val
= (0 <= i
? Qt
: Qnil
);
183 for (i
= 0; i
< search_regs
.num_regs
; i
++)
184 if (search_regs
.start
[i
] >= 0)
186 search_regs
.start
[i
] += BEGV
;
187 search_regs
.end
[i
] += BEGV
;
189 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
194 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
195 "Return index of start of first match for REGEXP in STRING, or nil.\n\
196 If third arg START is non-nil, start search at that index in STRING.\n\
197 For index of first char beyond the match, do (match-end 0).\n\
198 `match-end' and `match-beginning' also give indices of substrings\n\
199 matched by parenthesis constructs in the pattern.")
200 (regexp
, string
, start
)
201 Lisp_Object regexp
, string
, start
;
206 CHECK_STRING (regexp
, 0);
207 CHECK_STRING (string
, 1);
213 int len
= XSTRING (string
)->size
;
215 CHECK_NUMBER (start
, 2);
217 if (s
< 0 && -s
<= len
)
219 else if (0 > s
|| s
> len
)
220 args_out_of_range (string
, start
);
223 compile_pattern (regexp
, &searchbuf
, &search_regs
,
224 !NILP (current_buffer
->case_fold_search
) ? DOWNCASE_TABLE
: 0);
226 val
= re_search (&searchbuf
, (char *) XSTRING (string
)->data
,
227 XSTRING (string
)->size
, s
, XSTRING (string
)->size
- s
,
230 last_thing_searched
= Qt
;
233 if (val
< 0) return Qnil
;
234 return make_number (val
);
237 /* Match REGEXP against STRING, searching all of STRING,
238 and return the index of the match, or negative on failure.
239 This does not clobber the match data. */
242 fast_string_match (regexp
, string
)
243 Lisp_Object regexp
, string
;
247 compile_pattern (regexp
, &searchbuf
, 0, 0);
249 val
= re_search (&searchbuf
, (char *) XSTRING (string
)->data
,
250 XSTRING (string
)->size
, 0, XSTRING (string
)->size
,
256 /* Search for COUNT instances of the character TARGET, starting at START.
257 If COUNT is negative, search backwards.
259 If we find COUNT instances, set *SHORTAGE to zero, and return the
260 position after the COUNTth match. Note that for reverse motion
261 this is not the same as the usual convention for Emacs motion commands.
263 If we don't find COUNT instances before reaching the end of the
264 buffer (or the beginning, if scanning backwards), set *SHORTAGE to
265 the number of TARGETs left unfound, and return the end of the
266 buffer we bumped up against.
268 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
269 except when inside redisplay. */
271 scan_buffer (target
, start
, count
, shortage
, allow_quit
)
272 int *shortage
, start
;
273 register int count
, target
;
276 int limit
= ((count
> 0) ? ZV
- 1 : BEGV
);
277 int direction
= ((count
> 0) ? 1 : -1);
279 register unsigned char *cursor
;
282 register int ceiling
;
283 register unsigned char *ceiling_addr
;
288 immediate_quit
= allow_quit
;
291 while (start
!= limit
+ 1)
293 ceiling
= BUFFER_CEILING_OF (start
);
294 ceiling
= min (limit
, ceiling
);
295 ceiling_addr
= &FETCH_CHAR (ceiling
) + 1;
296 base
= (cursor
= &FETCH_CHAR (start
));
299 while (*cursor
!= target
&& ++cursor
!= ceiling_addr
)
301 if (cursor
!= ceiling_addr
)
306 return (start
+ cursor
- base
+ 1);
309 if (++cursor
== ceiling_addr
)
315 start
+= cursor
- base
;
319 start
--; /* first character we scan */
320 while (start
> limit
- 1)
321 { /* we WILL scan under start */
322 ceiling
= BUFFER_FLOOR_OF (start
);
323 ceiling
= max (limit
, ceiling
);
324 ceiling_addr
= &FETCH_CHAR (ceiling
) - 1;
325 base
= (cursor
= &FETCH_CHAR (start
));
329 while (--cursor
!= ceiling_addr
&& *cursor
!= target
)
331 if (cursor
!= ceiling_addr
)
336 return (start
+ cursor
- base
+ 1);
342 start
+= cursor
- base
;
347 *shortage
= count
* direction
;
348 return (start
+ ((direction
== 1 ? 0 : 1)));
352 find_next_newline (from
, cnt
)
353 register int from
, cnt
;
355 return scan_buffer ('\n', from
, cnt
, (int *) 0, 1);
358 Lisp_Object
skip_chars ();
360 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
361 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
362 STRING is like the inside of a `[...]' in a regular expression\n\
363 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
364 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
365 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
366 Returns the distance traveled, either zero or positive.")
368 Lisp_Object string
, lim
;
370 return skip_chars (1, 0, string
, lim
);
373 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
374 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
375 See `skip-chars-forward' for details.\n\
376 Returns the distance traveled, either zero or negative.")
378 Lisp_Object string
, lim
;
380 return skip_chars (0, 0, string
, lim
);
383 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
384 "Move point forward across chars in specified syntax classes.\n\
385 SYNTAX is a string of syntax code characters.\n\
386 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
387 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
388 This function returns the distance traveled, either zero or positive.")
390 Lisp_Object syntax
, lim
;
392 return skip_chars (1, 1, syntax
, lim
);
395 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
396 "Move point backward across chars in specified syntax classes.\n\
397 SYNTAX is a string of syntax code characters.\n\
398 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
399 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
400 This function returns the distance traveled, either zero or negative.")
402 Lisp_Object syntax
, lim
;
404 return skip_chars (0, 1, syntax
, lim
);
408 skip_chars (forwardp
, syntaxp
, string
, lim
)
409 int forwardp
, syntaxp
;
410 Lisp_Object string
, lim
;
412 register unsigned char *p
, *pend
;
413 register unsigned char c
;
414 unsigned char fastmap
[0400];
418 CHECK_STRING (string
, 0);
421 XSET (lim
, Lisp_Int
, forwardp
? ZV
: BEGV
);
423 CHECK_NUMBER_COERCE_MARKER (lim
, 1);
425 /* In any case, don't allow scan outside bounds of buffer. */
426 /* jla turned this off, for no known reason.
427 bfox turned the ZV part on, and rms turned the
428 BEGV part back on. */
431 if (XINT (lim
) < BEGV
)
432 XFASTINT (lim
) = BEGV
;
434 p
= XSTRING (string
)->data
;
435 pend
= p
+ XSTRING (string
)->size
;
436 bzero (fastmap
, sizeof fastmap
);
438 if (p
!= pend
&& *p
== '^')
443 /* Find the characters specified and set their elements of fastmap.
444 If syntaxp, each character counts as itself.
445 Otherwise, handle backslashes and ranges specially */
456 if (p
== pend
) break;
459 if (p
!= pend
&& *p
== '-')
462 if (p
== pend
) break;
475 if (syntaxp
&& fastmap
['-'] != 0)
478 /* If ^ was the first character, complement the fastmap. */
481 for (i
= 0; i
< sizeof fastmap
; i
++)
485 int start_point
= point
;
493 while (point
< XINT (lim
)
494 && fastmap
[(unsigned char) syntax_code_spec
[(int) SYNTAX (FETCH_CHAR (point
))]])
499 while (point
> XINT (lim
)
500 && fastmap
[(unsigned char) syntax_code_spec
[(int) SYNTAX (FETCH_CHAR (point
- 1))]])
508 while (point
< XINT (lim
) && fastmap
[FETCH_CHAR (point
)])
513 while (point
> XINT (lim
) && fastmap
[FETCH_CHAR (point
- 1)])
519 return make_number (point
- start_point
);
523 /* Subroutines of Lisp buffer search functions. */
526 search_command (string
, bound
, noerror
, count
, direction
, RE
)
527 Lisp_Object string
, bound
, noerror
, count
;
537 CHECK_NUMBER (count
, 3);
541 CHECK_STRING (string
, 0);
543 lim
= n
> 0 ? ZV
: BEGV
;
546 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
548 if (n
> 0 ? lim
< point
: lim
> point
)
549 error ("Invalid search bound (wrong side of point)");
556 np
= search_buffer (string
, point
, lim
, n
, RE
,
557 (!NILP (current_buffer
->case_fold_search
)
558 ? XSTRING (current_buffer
->case_canon_table
)->data
: 0),
559 (!NILP (current_buffer
->case_fold_search
)
560 ? XSTRING (current_buffer
->case_eqv_table
)->data
: 0));
564 return signal_failure (string
);
565 if (!EQ (noerror
, Qt
))
567 if (lim
< BEGV
|| lim
> ZV
)
571 #if 0 /* This would be clean, but maybe programs depend on
572 a value of nil here. */
580 if (np
< BEGV
|| np
> ZV
)
585 return make_number (np
);
588 /* Search for the n'th occurrence of STRING in the current buffer,
589 starting at position POS and stopping at position LIM,
590 treating PAT as a literal string if RE is false or as
591 a regular expression if RE is true.
593 If N is positive, searching is forward and LIM must be greater than POS.
594 If N is negative, searching is backward and LIM must be less than POS.
596 Returns -x if only N-x occurrences found (x > 0),
597 or else the position at the beginning of the Nth occurrence
598 (if searching backward) or the end (if searching forward). */
600 search_buffer (string
, pos
, lim
, n
, RE
, trt
, inverse_trt
)
606 register unsigned char *trt
;
607 register unsigned char *inverse_trt
;
609 int len
= XSTRING (string
)->size
;
610 unsigned char *base_pat
= XSTRING (string
)->data
;
611 register int *BM_tab
;
613 register int direction
= ((n
> 0) ? 1 : -1);
615 int infinity
, limit
, k
, stride_for_teases
;
616 register unsigned char *pat
, *cursor
, *p_limit
;
618 unsigned char *p1
, *p2
;
621 /* Null string is found at starting position. */
624 set_search_regs (pos
, 0);
628 /* Searching 0 times means don't move. */
633 compile_pattern (string
, &searchbuf
, &search_regs
, (char *) trt
);
635 if (RE
/* Here we detect whether the */
636 /* generality of an RE search is */
638 /* first item is "exact match" */
639 && *(searchbuf
.buffer
) == (char) RE_EXACTN_VALUE
640 && searchbuf
.buffer
[1] + 2 == searchbuf
.used
) /*first is ONLY item */
642 RE
= 0; /* can do straight (non RE) search */
643 pat
= (base_pat
= (unsigned char *) searchbuf
.buffer
+ 2);
644 /* trt already applied */
645 len
= searchbuf
.used
- 2;
649 pat
= (unsigned char *) alloca (len
);
651 for (i
= len
; i
--;) /* Copy the pattern; apply trt */
652 *pat
++ = (((int) trt
) ? trt
[*base_pat
++] : *base_pat
++);
653 pat
-= len
; base_pat
= pat
;
658 immediate_quit
= 1; /* Quit immediately if user types ^G,
659 because letting this function finish
660 can take too long. */
661 QUIT
; /* Do a pending quit right away,
662 to avoid paradoxical behavior */
663 /* Get pointers and sizes of the two strings
664 that make up the visible portion of the buffer. */
684 val
= re_search_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
685 pos
- BEGV
, lim
- pos
, &search_regs
,
686 /* Don't allow match past current point */
693 for (i
= 0; i
< search_regs
.num_regs
; i
++)
694 if (search_regs
.start
[i
] >= 0)
696 search_regs
.start
[i
] += j
;
697 search_regs
.end
[i
] += j
;
699 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
700 /* Set pos to the new position. */
701 pos
= search_regs
.start
[0];
713 val
= re_search_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
714 pos
- BEGV
, lim
- pos
, &search_regs
,
721 for (i
= 0; i
< search_regs
.num_regs
; i
++)
722 if (search_regs
.start
[i
] >= 0)
724 search_regs
.start
[i
] += j
;
725 search_regs
.end
[i
] += j
;
727 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
728 pos
= search_regs
.end
[0];
740 else /* non-RE case */
743 int BM_tab_space
[0400];
744 BM_tab
= &BM_tab_space
[0];
746 BM_tab
= (int *) alloca (0400 * sizeof (int));
748 /* The general approach is that we are going to maintain that we know */
749 /* the first (closest to the present position, in whatever direction */
750 /* we're searching) character that could possibly be the last */
751 /* (furthest from present position) character of a valid match. We */
752 /* advance the state of our knowledge by looking at that character */
753 /* and seeing whether it indeed matches the last character of the */
754 /* pattern. If it does, we take a closer look. If it does not, we */
755 /* move our pointer (to putative last characters) as far as is */
756 /* logically possible. This amount of movement, which I call a */
757 /* stride, will be the length of the pattern if the actual character */
758 /* appears nowhere in the pattern, otherwise it will be the distance */
759 /* from the last occurrence of that character to the end of the */
761 /* As a coding trick, an enormous stride is coded into the table for */
762 /* characters that match the last character. This allows use of only */
763 /* a single test, a test for having gone past the end of the */
764 /* permissible match region, to test for both possible matches (when */
765 /* the stride goes past the end immediately) and failure to */
766 /* match (where you get nudged past the end one stride at a time). */
768 /* Here we make a "mickey mouse" BM table. The stride of the search */
769 /* is determined only by the last character of the putative match. */
770 /* If that character does not match, we will stride the proper */
771 /* distance to propose a match that superimposes it on the last */
772 /* instance of a character that matches it (per trt), or misses */
773 /* it entirely if there is none. */
775 dirlen
= len
* direction
;
776 infinity
= dirlen
- (lim
+ pos
+ len
+ len
) * direction
;
778 pat
= (base_pat
+= len
- 1);
779 BM_tab_base
= BM_tab
;
781 j
= dirlen
; /* to get it in a register */
782 /* A character that does not appear in the pattern induces a */
783 /* stride equal to the pattern length. */
784 while (BM_tab_base
!= BM_tab
)
792 while (i
!= infinity
)
794 j
= pat
[i
]; i
+= direction
;
795 if (i
== dirlen
) i
= infinity
;
800 stride_for_teases
= BM_tab
[j
];
801 BM_tab
[j
] = dirlen
- i
;
802 /* A translation table is accompanied by its inverse -- see */
803 /* comment following downcase_table for details */
804 while ((j
= inverse_trt
[j
]) != k
)
805 BM_tab
[j
] = dirlen
- i
;
810 stride_for_teases
= BM_tab
[j
];
811 BM_tab
[j
] = dirlen
- i
;
813 /* stride_for_teases tells how much to stride if we get a */
814 /* match on the far character but are subsequently */
815 /* disappointed, by recording what the stride would have been */
816 /* for that character if the last character had been */
819 infinity
= dirlen
- infinity
;
820 pos
+= dirlen
- ((direction
> 0) ? direction
: 0);
821 /* loop invariant - pos points at where last char (first char if reverse)
822 of pattern would align in a possible match. */
825 /* It's been reported that some (broken) compiler thinks that
826 Boolean expressions in an arithmetic context are unsigned.
827 Using an explicit ?1:0 prevents this. */
828 if ((lim
- pos
- ((direction
> 0) ? 1 : 0)) * direction
< 0)
829 return (n
* (0 - direction
));
830 /* First we do the part we can by pointers (maybe nothing) */
833 limit
= pos
- dirlen
+ direction
;
834 limit
= ((direction
> 0)
835 ? BUFFER_CEILING_OF (limit
)
836 : BUFFER_FLOOR_OF (limit
));
837 /* LIMIT is now the last (not beyond-last!) value
838 POS can take on without hitting edge of buffer or the gap. */
839 limit
= ((direction
> 0)
840 ? min (lim
- 1, min (limit
, pos
+ 20000))
841 : max (lim
, max (limit
, pos
- 20000)));
842 if ((limit
- pos
) * direction
> 20)
844 p_limit
= &FETCH_CHAR (limit
);
845 p2
= (cursor
= &FETCH_CHAR (pos
));
846 /* In this loop, pos + cursor - p2 is the surrogate for pos */
847 while (1) /* use one cursor setting as long as i can */
849 if (direction
> 0) /* worth duplicating */
851 /* Use signed comparison if appropriate
852 to make cursor+infinity sure to be > p_limit.
853 Assuming that the buffer lies in a range of addresses
854 that are all "positive" (as ints) or all "negative",
855 either kind of comparison will work as long
856 as we don't step by infinity. So pick the kind
857 that works when we do step by infinity. */
858 if ((int) (p_limit
+ infinity
) > (int) p_limit
)
859 while ((int) cursor
<= (int) p_limit
)
860 cursor
+= BM_tab
[*cursor
];
862 while ((unsigned int) cursor
<= (unsigned int) p_limit
)
863 cursor
+= BM_tab
[*cursor
];
867 if ((int) (p_limit
+ infinity
) < (int) p_limit
)
868 while ((int) cursor
>= (int) p_limit
)
869 cursor
+= BM_tab
[*cursor
];
871 while ((unsigned int) cursor
>= (unsigned int) p_limit
)
872 cursor
+= BM_tab
[*cursor
];
874 /* If you are here, cursor is beyond the end of the searched region. */
875 /* This can happen if you match on the far character of the pattern, */
876 /* because the "stride" of that character is infinity, a number able */
877 /* to throw you well beyond the end of the search. It can also */
878 /* happen if you fail to match within the permitted region and would */
879 /* otherwise try a character beyond that region */
880 if ((cursor
- p_limit
) * direction
<= len
)
881 break; /* a small overrun is genuine */
882 cursor
-= infinity
; /* large overrun = hit */
883 i
= dirlen
- direction
;
886 while ((i
-= direction
) + direction
!= 0)
887 if (pat
[i
] != trt
[*(cursor
-= direction
)])
892 while ((i
-= direction
) + direction
!= 0)
893 if (pat
[i
] != *(cursor
-= direction
))
896 cursor
+= dirlen
- i
- direction
; /* fix cursor */
897 if (i
+ direction
== 0)
901 set_search_regs (pos
+ cursor
- p2
+ ((direction
> 0)
905 if ((n
-= direction
) != 0)
906 cursor
+= dirlen
; /* to resume search */
908 return ((direction
> 0)
909 ? search_regs
.end
[0] : search_regs
.start
[0]);
912 cursor
+= stride_for_teases
; /* <sigh> we lose - */
917 /* Now we'll pick up a clump that has to be done the hard */
918 /* way because it covers a discontinuity */
920 limit
= ((direction
> 0)
921 ? BUFFER_CEILING_OF (pos
- dirlen
+ 1)
922 : BUFFER_FLOOR_OF (pos
- dirlen
- 1));
923 limit
= ((direction
> 0)
924 ? min (limit
+ len
, lim
- 1)
925 : max (limit
- len
, lim
));
926 /* LIMIT is now the last value POS can have
927 and still be valid for a possible match. */
930 /* This loop can be coded for space rather than */
931 /* speed because it will usually run only once. */
932 /* (the reach is at most len + 21, and typically */
933 /* does not exceed len) */
934 while ((limit
- pos
) * direction
>= 0)
935 pos
+= BM_tab
[FETCH_CHAR(pos
)];
936 /* now run the same tests to distinguish going off the */
937 /* end, a match or a phony match. */
938 if ((pos
- limit
) * direction
<= len
)
939 break; /* ran off the end */
940 /* Found what might be a match.
941 Set POS back to last (first if reverse) char pos. */
943 i
= dirlen
- direction
;
944 while ((i
-= direction
) + direction
!= 0)
947 if (pat
[i
] != (((int) trt
)
948 ? trt
[FETCH_CHAR(pos
)]
952 /* Above loop has moved POS part or all the way
953 back to the first char pos (last char pos if reverse).
954 Set it once again at the last (first if reverse) char. */
955 pos
+= dirlen
- i
- direction
;
956 if (i
+ direction
== 0)
960 set_search_regs (pos
+ ((direction
> 0) ? 1 - len
: 0),
963 if ((n
-= direction
) != 0)
964 pos
+= dirlen
; /* to resume search */
966 return ((direction
> 0)
967 ? search_regs
.end
[0] : search_regs
.start
[0]);
970 pos
+= stride_for_teases
;
973 /* We have done one clump. Can we continue? */
974 if ((lim
- pos
) * direction
< 0)
975 return ((0 - n
) * direction
);
981 /* Record beginning BEG and end BEG + LEN
982 for a match just found in the current buffer. */
985 set_search_regs (beg
, len
)
988 /* Make sure we have registers in which to store
989 the match position. */
990 if (search_regs
.num_regs
== 0)
992 regoff_t
*starts
, *ends
;
994 starts
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
995 ends
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
997 re_set_registers (&searchbuf
,
1003 search_regs
.start
[0] = beg
;
1004 search_regs
.end
[0] = beg
+ len
;
1005 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
1008 /* Given a string of words separated by word delimiters,
1009 compute a regexp that matches those exact words
1010 separated by arbitrary punctuation. */
1016 register unsigned char *p
, *o
;
1017 register int i
, len
, punct_count
= 0, word_count
= 0;
1020 CHECK_STRING (string
, 0);
1021 p
= XSTRING (string
)->data
;
1022 len
= XSTRING (string
)->size
;
1024 for (i
= 0; i
< len
; i
++)
1025 if (SYNTAX (p
[i
]) != Sword
)
1028 if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
) word_count
++;
1030 if (SYNTAX (p
[len
-1]) == Sword
) word_count
++;
1031 if (!word_count
) return build_string ("");
1033 val
= make_string (p
, len
- punct_count
+ 5 * (word_count
- 1) + 4);
1035 o
= XSTRING (val
)->data
;
1039 for (i
= 0; i
< len
; i
++)
1040 if (SYNTAX (p
[i
]) == Sword
)
1042 else if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
&& --word_count
)
1057 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
1058 "sSearch backward: ",
1059 "Search backward from point for STRING.\n\
1060 Set point to the beginning of the occurrence found, and return point.\n\
1061 An optional second argument bounds the search; it is a buffer position.\n\
1062 The match found must not extend before that position.\n\
1063 Optional third argument, if t, means if fail just return nil (no error).\n\
1064 If not nil and not t, position at limit of search and return nil.\n\
1065 Optional fourth argument is repeat count--search for successive occurrences.\n\
1066 See also the functions `match-beginning', `match-end' and `replace-match'.")
1067 (string
, bound
, noerror
, count
)
1068 Lisp_Object string
, bound
, noerror
, count
;
1070 return search_command (string
, bound
, noerror
, count
, -1, 0);
1073 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "sSearch: ",
1074 "Search forward from point for STRING.\n\
1075 Set point to the end of the occurrence found, and return point.\n\
1076 An optional second argument bounds the search; it is a buffer position.\n\
1077 The match found must not extend after that position. nil is equivalent\n\
1079 Optional third argument, if t, means if fail just return nil (no error).\n\
1080 If not nil and not t, move to limit of search and return nil.\n\
1081 Optional fourth argument is repeat count--search for successive occurrences.\n\
1082 See also the functions `match-beginning', `match-end' and `replace-match'.")
1083 (string
, bound
, noerror
, count
)
1084 Lisp_Object string
, bound
, noerror
, count
;
1086 return search_command (string
, bound
, noerror
, count
, 1, 0);
1089 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
1090 "sWord search backward: ",
1091 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1092 Set point to the beginning of the occurrence found, and return point.\n\
1093 An optional second argument bounds the search; it is a buffer position.\n\
1094 The match found must not extend before that position.\n\
1095 Optional third argument, if t, means if fail just return nil (no error).\n\
1096 If not nil and not t, move to limit of search and return nil.\n\
1097 Optional fourth argument is repeat count--search for successive occurrences.")
1098 (string
, bound
, noerror
, count
)
1099 Lisp_Object string
, bound
, noerror
, count
;
1101 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1);
1104 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
1106 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1107 Set point to the end of the occurrence found, and return point.\n\
1108 An optional second argument bounds the search; it is a buffer position.\n\
1109 The match found must not extend after that position.\n\
1110 Optional third argument, if t, means if fail just return nil (no error).\n\
1111 If not nil and not t, move to limit of search and return nil.\n\
1112 Optional fourth argument is repeat count--search for successive occurrences.")
1113 (string
, bound
, noerror
, count
)
1114 Lisp_Object string
, bound
, noerror
, count
;
1116 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1);
1119 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
1120 "sRE search backward: ",
1121 "Search backward from point for match for regular expression REGEXP.\n\
1122 Set point to the beginning of the match, and return point.\n\
1123 The match found is the one starting last in the buffer\n\
1124 and yet ending before the origin of the search.\n\
1125 An optional second argument bounds the search; it is a buffer position.\n\
1126 The match found must start at or after that position.\n\
1127 Optional third argument, if t, means if fail just return nil (no error).\n\
1128 If not nil and not t, move to limit of search and return nil.\n\
1129 Optional fourth argument is repeat count--search for successive occurrences.\n\
1130 See also the functions `match-beginning', `match-end' and `replace-match'.")
1131 (regexp
, bound
, noerror
, count
)
1132 Lisp_Object regexp
, bound
, noerror
, count
;
1134 return search_command (regexp
, bound
, noerror
, count
, -1, 1);
1137 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
1139 "Search forward from point for regular expression REGEXP.\n\
1140 Set point to the end of the occurrence found, and return point.\n\
1141 An optional second argument bounds the search; it is a buffer position.\n\
1142 The match found must not extend after that position.\n\
1143 Optional third argument, if t, means if fail just return nil (no error).\n\
1144 If not nil and not t, move to limit of search and return nil.\n\
1145 Optional fourth argument is repeat count--search for successive occurrences.\n\
1146 See also the functions `match-beginning', `match-end' and `replace-match'.")
1147 (regexp
, bound
, noerror
, count
)
1148 Lisp_Object regexp
, bound
, noerror
, count
;
1150 return search_command (regexp
, bound
, noerror
, count
, 1, 1);
1153 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 3, 0,
1154 "Replace text matched by last search with NEWTEXT.\n\
1155 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1156 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1157 based on the replaced text.\n\
1158 If the replaced text has only capital letters\n\
1159 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1160 If the replaced text has at least one word starting with a capital letter,\n\
1161 then capitalize each word in NEWTEXT.\n\n\
1162 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1163 Otherwise treat `\\' as special:\n\
1164 `\\&' in NEWTEXT means substitute original matched text.\n\
1165 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1166 If Nth parens didn't match, substitute nothing.\n\
1167 `\\\\' means insert one `\\'.\n\
1168 FIXEDCASE and LITERAL are optional arguments.\n\
1169 Leaves point at end of replacement text.")
1170 (newtext
, fixedcase
, literal
)
1171 Lisp_Object newtext
, fixedcase
, literal
;
1173 enum { nochange
, all_caps
, cap_initial
} case_action
;
1174 register int pos
, last
;
1175 int some_multiletter_word
;
1177 int some_lowercase_initial
;
1178 register int c
, prevc
;
1181 CHECK_STRING (newtext
, 0);
1183 case_action
= nochange
; /* We tried an initialization */
1184 /* but some C compilers blew it */
1186 if (search_regs
.num_regs
<= 0)
1187 error ("replace-match called before any match found");
1189 if (search_regs
.start
[0] < BEGV
1190 || search_regs
.start
[0] > search_regs
.end
[0]
1191 || search_regs
.end
[0] > ZV
)
1192 args_out_of_range (make_number (search_regs
.start
[0]),
1193 make_number (search_regs
.end
[0]));
1195 if (NILP (fixedcase
))
1197 /* Decide how to casify by examining the matched text. */
1199 last
= search_regs
.end
[0];
1201 case_action
= all_caps
;
1203 /* some_multiletter_word is set nonzero if any original word
1204 is more than one letter long. */
1205 some_multiletter_word
= 0;
1207 some_lowercase_initial
= 0;
1209 for (pos
= search_regs
.start
[0]; pos
< last
; pos
++)
1211 c
= FETCH_CHAR (pos
);
1214 /* Cannot be all caps if any original char is lower case */
1217 if (SYNTAX (prevc
) != Sword
)
1218 some_lowercase_initial
= 1;
1220 some_multiletter_word
= 1;
1222 else if (!NOCASEP (c
))
1224 if (SYNTAX (prevc
) != Sword
)
1227 some_multiletter_word
= 1;
1233 /* Convert to all caps if the old text is all caps
1234 and has at least one multiletter word. */
1235 if (! some_lowercase
&& some_multiletter_word
)
1236 case_action
= all_caps
;
1237 /* Capitalize each word, if the old text has all capitalized words. */
1238 else if (!some_lowercase_initial
&& some_multiletter_word
)
1239 case_action
= cap_initial
;
1241 case_action
= nochange
;
1244 /* We insert the replacement text before the old text, and then
1245 delete the original text. This means that markers at the
1246 beginning or end of the original will float to the corresponding
1247 position in the replacement. */
1248 SET_PT (search_regs
.start
[0]);
1249 if (!NILP (literal
))
1250 Finsert_and_inherit (1, &newtext
);
1253 struct gcpro gcpro1
;
1256 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
1258 int offset
= point
- search_regs
.start
[0];
1260 c
= XSTRING (newtext
)->data
[pos
];
1263 c
= XSTRING (newtext
)->data
[++pos
];
1265 Finsert_buffer_substring
1266 (Fcurrent_buffer (),
1267 make_number (search_regs
.start
[0] + offset
),
1268 make_number (search_regs
.end
[0] + offset
));
1269 else if (c
>= '1' && c
<= search_regs
.num_regs
+ '0')
1271 if (search_regs
.start
[c
- '0'] >= 1)
1272 Finsert_buffer_substring
1273 (Fcurrent_buffer (),
1274 make_number (search_regs
.start
[c
- '0'] + offset
),
1275 make_number (search_regs
.end
[c
- '0'] + offset
));
1286 inslen
= point
- (search_regs
.start
[0]);
1287 del_range (search_regs
.start
[0] + inslen
, search_regs
.end
[0] + inslen
);
1289 if (case_action
== all_caps
)
1290 Fupcase_region (make_number (point
- inslen
), make_number (point
));
1291 else if (case_action
== cap_initial
)
1292 upcase_initials_region (make_number (point
- inslen
), make_number (point
));
1297 match_limit (num
, beginningp
)
1303 CHECK_NUMBER (num
, 0);
1305 if (n
< 0 || n
>= search_regs
.num_regs
)
1306 args_out_of_range (num
, make_number (search_regs
.num_regs
));
1307 if (search_regs
.num_regs
<= 0
1308 || search_regs
.start
[n
] < 0)
1310 return (make_number ((beginningp
) ? search_regs
.start
[n
]
1311 : search_regs
.end
[n
]));
1314 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
1315 "Return position of start of text matched by last search.\n\
1316 NUM specifies which parenthesized expression in the last regexp.\n\
1317 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.\n\
1318 Zero means the entire text matched by the whole regexp or whole string.")
1322 return match_limit (num
, 1);
1325 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
1326 "Return position of end of text matched by last search.\n\
1327 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1328 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1329 Zero means the entire text matched by the whole regexp or whole string.")
1333 return match_limit (num
, 0);
1336 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 0, 0,
1337 "Return a list containing all info on what the last search matched.\n\
1338 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1339 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1340 if the last match was on a buffer; integers or nil if a string was matched.\n\
1341 Use `store-match-data' to reinstate the data in this list.")
1347 if (NILP (last_thing_searched
))
1348 error ("match-data called before any match found");
1350 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
1351 * sizeof (Lisp_Object
));
1354 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1356 int start
= search_regs
.start
[i
];
1359 if (EQ (last_thing_searched
, Qt
))
1361 XFASTINT (data
[2 * i
]) = start
;
1362 XFASTINT (data
[2 * i
+ 1]) = search_regs
.end
[i
];
1364 else if (XTYPE (last_thing_searched
) == Lisp_Buffer
)
1366 data
[2 * i
] = Fmake_marker ();
1367 Fset_marker (data
[2 * i
],
1368 make_number (start
),
1369 last_thing_searched
);
1370 data
[2 * i
+ 1] = Fmake_marker ();
1371 Fset_marker (data
[2 * i
+ 1],
1372 make_number (search_regs
.end
[i
]),
1373 last_thing_searched
);
1376 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1382 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
1384 return Flist (2 * len
+ 2, data
);
1388 DEFUN ("store-match-data", Fstore_match_data
, Sstore_match_data
, 1, 1, 0,
1389 "Set internal data on last search match from elements of LIST.\n\
1390 LIST should have been created by calling `match-data' previously.")
1392 register Lisp_Object list
;
1395 register Lisp_Object marker
;
1397 if (!CONSP (list
) && !NILP (list
))
1398 list
= wrong_type_argument (Qconsp
, list
);
1400 /* Unless we find a marker with a buffer in LIST, assume that this
1401 match data came from a string. */
1402 last_thing_searched
= Qt
;
1404 /* Allocate registers if they don't already exist. */
1406 int length
= XFASTINT (Flength (list
)) / 2;
1408 if (length
> search_regs
.num_regs
)
1410 if (search_regs
.num_regs
== 0)
1413 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1415 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1420 = (regoff_t
*) xrealloc (search_regs
.start
,
1421 length
* sizeof (regoff_t
));
1423 = (regoff_t
*) xrealloc (search_regs
.end
,
1424 length
* sizeof (regoff_t
));
1428 re_set_registers (&searchbuf
, &search_regs
, length
,
1429 search_regs
.start
, search_regs
.end
);
1434 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1436 marker
= Fcar (list
);
1439 search_regs
.start
[i
] = -1;
1444 if (XTYPE (marker
) == Lisp_Marker
)
1446 if (XMARKER (marker
)->buffer
== 0)
1447 XFASTINT (marker
) = 0;
1449 XSET (last_thing_searched
, Lisp_Buffer
,
1450 XMARKER (marker
)->buffer
);
1453 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1454 search_regs
.start
[i
] = XINT (marker
);
1457 marker
= Fcar (list
);
1458 if (XTYPE (marker
) == Lisp_Marker
1459 && XMARKER (marker
)->buffer
== 0)
1460 XFASTINT (marker
) = 0;
1462 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1463 search_regs
.end
[i
] = XINT (marker
);
1471 /* Quote a string to inactivate reg-expr chars */
1473 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
1474 "Return a regexp string which matches exactly STRING and nothing else.")
1478 register unsigned char *in
, *out
, *end
;
1479 register unsigned char *temp
;
1481 CHECK_STRING (str
, 0);
1483 temp
= (unsigned char *) alloca (XSTRING (str
)->size
* 2);
1485 /* Now copy the data into the new string, inserting escapes. */
1487 in
= XSTRING (str
)->data
;
1488 end
= in
+ XSTRING (str
)->size
;
1491 for (; in
!= end
; in
++)
1493 if (*in
== '[' || *in
== ']'
1494 || *in
== '*' || *in
== '.' || *in
== '\\'
1495 || *in
== '?' || *in
== '+'
1496 || *in
== '^' || *in
== '$')
1501 return make_string (temp
, out
- temp
);
1508 searchbuf
.allocated
= 100;
1509 searchbuf
.buffer
= (unsigned char *) malloc (searchbuf
.allocated
);
1510 searchbuf
.fastmap
= search_fastmap
;
1512 Qsearch_failed
= intern ("search-failed");
1513 staticpro (&Qsearch_failed
);
1514 Qinvalid_regexp
= intern ("invalid-regexp");
1515 staticpro (&Qinvalid_regexp
);
1517 Fput (Qsearch_failed
, Qerror_conditions
,
1518 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
1519 Fput (Qsearch_failed
, Qerror_message
,
1520 build_string ("Search failed"));
1522 Fput (Qinvalid_regexp
, Qerror_conditions
,
1523 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
1524 Fput (Qinvalid_regexp
, Qerror_message
,
1525 build_string ("Invalid regexp"));
1528 staticpro (&last_regexp
);
1530 last_thing_searched
= Qnil
;
1531 staticpro (&last_thing_searched
);
1533 defsubr (&Sstring_match
);
1534 defsubr (&Slooking_at
);
1535 defsubr (&Sskip_chars_forward
);
1536 defsubr (&Sskip_chars_backward
);
1537 defsubr (&Sskip_syntax_forward
);
1538 defsubr (&Sskip_syntax_backward
);
1539 defsubr (&Ssearch_forward
);
1540 defsubr (&Ssearch_backward
);
1541 defsubr (&Sword_search_forward
);
1542 defsubr (&Sword_search_backward
);
1543 defsubr (&Sre_search_forward
);
1544 defsubr (&Sre_search_backward
);
1545 defsubr (&Sreplace_match
);
1546 defsubr (&Smatch_beginning
);
1547 defsubr (&Smatch_end
);
1548 defsubr (&Smatch_data
);
1549 defsubr (&Sstore_match_data
);
1550 defsubr (&Sregexp_quote
);