(FORWARD_SIGNAL_TO_MAIN_THREAD): New define.
[emacs.git] / src / search.c
blob456d3d46a73ca1acff106d0678d4a211959c1519
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
24 #include <config.h>
25 #include "lisp.h"
26 #include "syntax.h"
27 #include "category.h"
28 #include "buffer.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
35 #include <sys/types.h>
36 #include "regex.h"
38 #define REGEXP_CACHE_SIZE 20
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp;
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
94 Lisp_Object Vsearch_spaces_regexp;
96 static void set_search_regs ();
97 static void save_search_regs ();
98 static int simple_search ();
99 static int boyer_moore ();
100 static int search_buffer ();
101 static void matcher_overflow () NO_RETURN;
103 static void
104 matcher_overflow ()
106 error ("Stack overflow in regexp matcher");
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data.
123 The behavior also depends on Vsearch_spaces_regexp. */
125 static void
126 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
127 struct regexp_cache *cp;
128 Lisp_Object pattern;
129 Lisp_Object translate;
130 struct re_registers *regp;
131 int posix;
132 int multibyte;
134 unsigned char *raw_pattern;
135 int raw_pattern_size;
136 char *val;
137 reg_syntax_t old;
139 /* MULTIBYTE says whether the text to be searched is multibyte.
140 We must convert PATTERN to match that, or we will not really
141 find things right. */
143 if (multibyte == STRING_MULTIBYTE (pattern))
145 raw_pattern = (unsigned char *) SDATA (pattern);
146 raw_pattern_size = SBYTES (pattern);
148 else if (multibyte)
150 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
151 SCHARS (pattern));
152 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
153 copy_text (SDATA (pattern), raw_pattern,
154 SCHARS (pattern), 0, 1);
156 else
158 /* Converting multibyte to single-byte.
160 ??? Perhaps this conversion should be done in a special way
161 by subtracting nonascii-insert-offset from each non-ASCII char,
162 so that only the multibyte chars which really correspond to
163 the chosen single-byte character set can possibly match. */
164 raw_pattern_size = SCHARS (pattern);
165 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
166 copy_text (SDATA (pattern), raw_pattern,
167 SBYTES (pattern), 1, 0);
170 cp->regexp = Qnil;
171 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
172 cp->posix = posix;
173 cp->buf.multibyte = multibyte;
174 if (STRINGP (Vsearch_spaces_regexp))
175 cp->whitespace_regexp = Vsearch_spaces_regexp;
176 else
177 cp->whitespace_regexp = Qnil;
179 /* rms: I think BLOCK_INPUT is not needed here any more,
180 because regex.c defines malloc to call xmalloc.
181 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
182 So let's turn it off. */
183 /* BLOCK_INPUT; */
184 old = re_set_syntax (RE_SYNTAX_EMACS
185 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
187 if (STRINGP (Vsearch_spaces_regexp))
188 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
189 else
190 re_set_whitespace_regexp (NULL);
192 val = (char *) re_compile_pattern ((char *)raw_pattern,
193 raw_pattern_size, &cp->buf);
195 /* If the compiled pattern hard codes some of the contents of the
196 syntax-table, it can only be reused with *this* syntax table. */
197 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
199 re_set_whitespace_regexp (NULL);
201 re_set_syntax (old);
202 /* UNBLOCK_INPUT; */
203 if (val)
204 xsignal1 (Qinvalid_regexp, build_string (val));
206 cp->regexp = Fcopy_sequence (pattern);
209 /* Shrink each compiled regexp buffer in the cache
210 to the size actually used right now.
211 This is called from garbage collection. */
213 void
214 shrink_regexp_cache ()
216 struct regexp_cache *cp;
218 for (cp = searchbuf_head; cp != 0; cp = cp->next)
220 cp->buf.allocated = cp->buf.used;
221 cp->buf.buffer
222 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
226 /* Clear the regexp cache w.r.t. a particular syntax table,
227 because it was changed.
228 There is no danger of memory leak here because re_compile_pattern
229 automagically manages the memory in each re_pattern_buffer struct,
230 based on its `allocated' and `buffer' values. */
231 void
232 clear_regexp_cache ()
234 int i;
236 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
237 /* It's tempting to compare with the syntax-table we've actually changd,
238 but it's not sufficient because char-table inheritance mewans that
239 modifying one syntax-table can change others at the same time. */
240 if (!EQ (searchbufs[i].syntax_table, Qt))
241 searchbufs[i].regexp = Qnil;
244 /* Compile a regexp if necessary, but first check to see if there's one in
245 the cache.
246 PATTERN is the pattern to compile.
247 TRANSLATE is a translation table for ignoring case, or nil for none.
248 REGP is the structure that says where to store the "register"
249 values that will result from matching this pattern.
250 If it is 0, we should compile the pattern not to record any
251 subexpression bounds.
252 POSIX is nonzero if we want full backtracking (POSIX style)
253 for this pattern. 0 means backtrack only enough to get a valid match. */
255 struct re_pattern_buffer *
256 compile_pattern (pattern, regp, translate, posix, multibyte)
257 Lisp_Object pattern;
258 struct re_registers *regp;
259 Lisp_Object translate;
260 int posix, multibyte;
262 struct regexp_cache *cp, **cpp;
264 for (cpp = &searchbuf_head; ; cpp = &cp->next)
266 cp = *cpp;
267 /* Entries are initialized to nil, and may be set to nil by
268 compile_pattern_1 if the pattern isn't valid. Don't apply
269 string accessors in those cases. However, compile_pattern_1
270 is only applied to the cache entry we pick here to reuse. So
271 nil should never appear before a non-nil entry. */
272 if (NILP (cp->regexp))
273 goto compile_it;
274 if (SCHARS (cp->regexp) == SCHARS (pattern)
275 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
276 && !NILP (Fstring_equal (cp->regexp, pattern))
277 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
278 && cp->posix == posix
279 && cp->buf.multibyte == multibyte
280 && (EQ (cp->syntax_table, Qt)
281 || EQ (cp->syntax_table, current_buffer->syntax_table))
282 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
283 break;
285 /* If we're at the end of the cache, compile into the nil cell
286 we found, or the last (least recently used) cell with a
287 string value. */
288 if (cp->next == 0)
290 compile_it:
291 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
292 break;
296 /* When we get here, cp (aka *cpp) contains the compiled pattern,
297 either because we found it in the cache or because we just compiled it.
298 Move it to the front of the queue to mark it as most recently used. */
299 *cpp = cp->next;
300 cp->next = searchbuf_head;
301 searchbuf_head = cp;
303 /* Advise the searching functions about the space we have allocated
304 for register data. */
305 if (regp)
306 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
308 return &cp->buf;
312 static Lisp_Object
313 looking_at_1 (string, posix)
314 Lisp_Object string;
315 int posix;
317 Lisp_Object val;
318 unsigned char *p1, *p2;
319 int s1, s2;
320 register int i;
321 struct re_pattern_buffer *bufp;
323 if (running_asynch_code)
324 save_search_regs ();
326 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
327 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
328 = current_buffer->case_eqv_table;
330 CHECK_STRING (string);
331 bufp = compile_pattern (string, &search_regs,
332 (!NILP (current_buffer->case_fold_search)
333 ? current_buffer->case_canon_table : Qnil),
334 posix,
335 !NILP (current_buffer->enable_multibyte_characters));
337 immediate_quit = 1;
338 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
340 /* Get pointers and sizes of the two strings
341 that make up the visible portion of the buffer. */
343 p1 = BEGV_ADDR;
344 s1 = GPT_BYTE - BEGV_BYTE;
345 p2 = GAP_END_ADDR;
346 s2 = ZV_BYTE - GPT_BYTE;
347 if (s1 < 0)
349 p2 = p1;
350 s2 = ZV_BYTE - BEGV_BYTE;
351 s1 = 0;
353 if (s2 < 0)
355 s1 = ZV_BYTE - BEGV_BYTE;
356 s2 = 0;
359 re_match_object = Qnil;
361 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
362 PT_BYTE - BEGV_BYTE, &search_regs,
363 ZV_BYTE - BEGV_BYTE);
364 immediate_quit = 0;
366 if (i == -2)
367 matcher_overflow ();
369 val = (0 <= i ? Qt : Qnil);
370 if (i >= 0)
371 for (i = 0; i < search_regs.num_regs; i++)
372 if (search_regs.start[i] >= 0)
374 search_regs.start[i]
375 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
376 search_regs.end[i]
377 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
379 XSETBUFFER (last_thing_searched, current_buffer);
380 return val;
383 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
384 doc: /* Return t if text after point matches regular expression REGEXP.
385 This function modifies the match data that `match-beginning',
386 `match-end' and `match-data' access; save and restore the match
387 data if you want to preserve them. */)
388 (regexp)
389 Lisp_Object regexp;
391 return looking_at_1 (regexp, 0);
394 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
395 doc: /* Return t if text after point matches regular expression REGEXP.
396 Find the longest match, in accord with Posix regular expression rules.
397 This function modifies the match data that `match-beginning',
398 `match-end' and `match-data' access; save and restore the match
399 data if you want to preserve them. */)
400 (regexp)
401 Lisp_Object regexp;
403 return looking_at_1 (regexp, 1);
406 static Lisp_Object
407 string_match_1 (regexp, string, start, posix)
408 Lisp_Object regexp, string, start;
409 int posix;
411 int val;
412 struct re_pattern_buffer *bufp;
413 int pos, pos_byte;
414 int i;
416 if (running_asynch_code)
417 save_search_regs ();
419 CHECK_STRING (regexp);
420 CHECK_STRING (string);
422 if (NILP (start))
423 pos = 0, pos_byte = 0;
424 else
426 int len = SCHARS (string);
428 CHECK_NUMBER (start);
429 pos = XINT (start);
430 if (pos < 0 && -pos <= len)
431 pos = len + pos;
432 else if (0 > pos || pos > len)
433 args_out_of_range (string, start);
434 pos_byte = string_char_to_byte (string, pos);
437 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
438 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
439 = current_buffer->case_eqv_table;
441 bufp = compile_pattern (regexp, &search_regs,
442 (!NILP (current_buffer->case_fold_search)
443 ? current_buffer->case_canon_table : Qnil),
444 posix,
445 STRING_MULTIBYTE (string));
446 immediate_quit = 1;
447 re_match_object = string;
449 val = re_search (bufp, (char *) SDATA (string),
450 SBYTES (string), pos_byte,
451 SBYTES (string) - pos_byte,
452 &search_regs);
453 immediate_quit = 0;
454 last_thing_searched = Qt;
455 if (val == -2)
456 matcher_overflow ();
457 if (val < 0) return Qnil;
459 for (i = 0; i < search_regs.num_regs; i++)
460 if (search_regs.start[i] >= 0)
462 search_regs.start[i]
463 = string_byte_to_char (string, search_regs.start[i]);
464 search_regs.end[i]
465 = string_byte_to_char (string, search_regs.end[i]);
468 return make_number (string_byte_to_char (string, val));
471 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
472 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
473 Matching ignores case if `case-fold-search' is non-nil.
474 If third arg START is non-nil, start search at that index in STRING.
475 For index of first char beyond the match, do (match-end 0).
476 `match-end' and `match-beginning' also give indices of substrings
477 matched by parenthesis constructs in the pattern.
479 You can use the function `match-string' to extract the substrings
480 matched by the parenthesis constructions in REGEXP. */)
481 (regexp, string, start)
482 Lisp_Object regexp, string, start;
484 return string_match_1 (regexp, string, start, 0);
487 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
488 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
489 Find the longest match, in accord with Posix regular expression rules.
490 Case is ignored if `case-fold-search' is non-nil in the current buffer.
491 If third arg START is non-nil, start search at that index in STRING.
492 For index of first char beyond the match, do (match-end 0).
493 `match-end' and `match-beginning' also give indices of substrings
494 matched by parenthesis constructs in the pattern. */)
495 (regexp, string, start)
496 Lisp_Object regexp, string, start;
498 return string_match_1 (regexp, string, start, 1);
501 /* Match REGEXP against STRING, searching all of STRING,
502 and return the index of the match, or negative on failure.
503 This does not clobber the match data. */
506 fast_string_match (regexp, string)
507 Lisp_Object regexp, string;
509 int val;
510 struct re_pattern_buffer *bufp;
512 bufp = compile_pattern (regexp, 0, Qnil,
513 0, STRING_MULTIBYTE (string));
514 immediate_quit = 1;
515 re_match_object = string;
517 val = re_search (bufp, (char *) SDATA (string),
518 SBYTES (string), 0,
519 SBYTES (string), 0);
520 immediate_quit = 0;
521 return val;
524 /* Match REGEXP against STRING, searching all of STRING ignoring case,
525 and return the index of the match, or negative on failure.
526 This does not clobber the match data.
527 We assume that STRING contains single-byte characters. */
529 extern Lisp_Object Vascii_downcase_table;
532 fast_c_string_match_ignore_case (regexp, string)
533 Lisp_Object regexp;
534 const char *string;
536 int val;
537 struct re_pattern_buffer *bufp;
538 int len = strlen (string);
540 regexp = string_make_unibyte (regexp);
541 re_match_object = Qt;
542 bufp = compile_pattern (regexp, 0,
543 Vascii_canon_table, 0,
545 immediate_quit = 1;
546 val = re_search (bufp, string, len, 0, len, 0);
547 immediate_quit = 0;
548 return val;
551 /* Like fast_string_match but ignore case. */
554 fast_string_match_ignore_case (regexp, string)
555 Lisp_Object regexp, string;
557 int val;
558 struct re_pattern_buffer *bufp;
560 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
561 0, STRING_MULTIBYTE (string));
562 immediate_quit = 1;
563 re_match_object = string;
565 val = re_search (bufp, (char *) SDATA (string),
566 SBYTES (string), 0,
567 SBYTES (string), 0);
568 immediate_quit = 0;
569 return val;
572 /* The newline cache: remembering which sections of text have no newlines. */
574 /* If the user has requested newline caching, make sure it's on.
575 Otherwise, make sure it's off.
576 This is our cheezy way of associating an action with the change of
577 state of a buffer-local variable. */
578 static void
579 newline_cache_on_off (buf)
580 struct buffer *buf;
582 if (NILP (buf->cache_long_line_scans))
584 /* It should be off. */
585 if (buf->newline_cache)
587 free_region_cache (buf->newline_cache);
588 buf->newline_cache = 0;
591 else
593 /* It should be on. */
594 if (buf->newline_cache == 0)
595 buf->newline_cache = new_region_cache ();
600 /* Search for COUNT instances of the character TARGET between START and END.
602 If COUNT is positive, search forwards; END must be >= START.
603 If COUNT is negative, search backwards for the -COUNTth instance;
604 END must be <= START.
605 If COUNT is zero, do anything you please; run rogue, for all I care.
607 If END is zero, use BEGV or ZV instead, as appropriate for the
608 direction indicated by COUNT.
610 If we find COUNT instances, set *SHORTAGE to zero, and return the
611 position past the COUNTth match. Note that for reverse motion
612 this is not the same as the usual convention for Emacs motion commands.
614 If we don't find COUNT instances before reaching END, set *SHORTAGE
615 to the number of TARGETs left unfound, and return END.
617 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
618 except when inside redisplay. */
621 scan_buffer (target, start, end, count, shortage, allow_quit)
622 register int target;
623 int start, end;
624 int count;
625 int *shortage;
626 int allow_quit;
628 struct region_cache *newline_cache;
629 int direction;
631 if (count > 0)
633 direction = 1;
634 if (! end) end = ZV;
636 else
638 direction = -1;
639 if (! end) end = BEGV;
642 newline_cache_on_off (current_buffer);
643 newline_cache = current_buffer->newline_cache;
645 if (shortage != 0)
646 *shortage = 0;
648 immediate_quit = allow_quit;
650 if (count > 0)
651 while (start != end)
653 /* Our innermost scanning loop is very simple; it doesn't know
654 about gaps, buffer ends, or the newline cache. ceiling is
655 the position of the last character before the next such
656 obstacle --- the last character the dumb search loop should
657 examine. */
658 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
659 int start_byte = CHAR_TO_BYTE (start);
660 int tem;
662 /* If we're looking for a newline, consult the newline cache
663 to see where we can avoid some scanning. */
664 if (target == '\n' && newline_cache)
666 int next_change;
667 immediate_quit = 0;
668 while (region_cache_forward
669 (current_buffer, newline_cache, start_byte, &next_change))
670 start_byte = next_change;
671 immediate_quit = allow_quit;
673 /* START should never be after END. */
674 if (start_byte > ceiling_byte)
675 start_byte = ceiling_byte;
677 /* Now the text after start is an unknown region, and
678 next_change is the position of the next known region. */
679 ceiling_byte = min (next_change - 1, ceiling_byte);
682 /* The dumb loop can only scan text stored in contiguous
683 bytes. BUFFER_CEILING_OF returns the last character
684 position that is contiguous, so the ceiling is the
685 position after that. */
686 tem = BUFFER_CEILING_OF (start_byte);
687 ceiling_byte = min (tem, ceiling_byte);
690 /* The termination address of the dumb loop. */
691 register unsigned char *ceiling_addr
692 = BYTE_POS_ADDR (ceiling_byte) + 1;
693 register unsigned char *cursor
694 = BYTE_POS_ADDR (start_byte);
695 unsigned char *base = cursor;
697 while (cursor < ceiling_addr)
699 unsigned char *scan_start = cursor;
701 /* The dumb loop. */
702 while (*cursor != target && ++cursor < ceiling_addr)
705 /* If we're looking for newlines, cache the fact that
706 the region from start to cursor is free of them. */
707 if (target == '\n' && newline_cache)
708 know_region_cache (current_buffer, newline_cache,
709 start_byte + scan_start - base,
710 start_byte + cursor - base);
712 /* Did we find the target character? */
713 if (cursor < ceiling_addr)
715 if (--count == 0)
717 immediate_quit = 0;
718 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
720 cursor++;
724 start = BYTE_TO_CHAR (start_byte + cursor - base);
727 else
728 while (start > end)
730 /* The last character to check before the next obstacle. */
731 int ceiling_byte = CHAR_TO_BYTE (end);
732 int start_byte = CHAR_TO_BYTE (start);
733 int tem;
735 /* Consult the newline cache, if appropriate. */
736 if (target == '\n' && newline_cache)
738 int next_change;
739 immediate_quit = 0;
740 while (region_cache_backward
741 (current_buffer, newline_cache, start_byte, &next_change))
742 start_byte = next_change;
743 immediate_quit = allow_quit;
745 /* Start should never be at or before end. */
746 if (start_byte <= ceiling_byte)
747 start_byte = ceiling_byte + 1;
749 /* Now the text before start is an unknown region, and
750 next_change is the position of the next known region. */
751 ceiling_byte = max (next_change, ceiling_byte);
754 /* Stop scanning before the gap. */
755 tem = BUFFER_FLOOR_OF (start_byte - 1);
756 ceiling_byte = max (tem, ceiling_byte);
759 /* The termination address of the dumb loop. */
760 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
761 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
762 unsigned char *base = cursor;
764 while (cursor >= ceiling_addr)
766 unsigned char *scan_start = cursor;
768 while (*cursor != target && --cursor >= ceiling_addr)
771 /* If we're looking for newlines, cache the fact that
772 the region from after the cursor to start is free of them. */
773 if (target == '\n' && newline_cache)
774 know_region_cache (current_buffer, newline_cache,
775 start_byte + cursor - base,
776 start_byte + scan_start - base);
778 /* Did we find the target character? */
779 if (cursor >= ceiling_addr)
781 if (++count >= 0)
783 immediate_quit = 0;
784 return BYTE_TO_CHAR (start_byte + cursor - base);
786 cursor--;
790 start = BYTE_TO_CHAR (start_byte + cursor - base);
794 immediate_quit = 0;
795 if (shortage != 0)
796 *shortage = count * direction;
797 return start;
800 /* Search for COUNT instances of a line boundary, which means either a
801 newline or (if selective display enabled) a carriage return.
802 Start at START. If COUNT is negative, search backwards.
804 We report the resulting position by calling TEMP_SET_PT_BOTH.
806 If we find COUNT instances. we position after (always after,
807 even if scanning backwards) the COUNTth match, and return 0.
809 If we don't find COUNT instances before reaching the end of the
810 buffer (or the beginning, if scanning backwards), we return
811 the number of line boundaries left unfound, and position at
812 the limit we bumped up against.
814 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
815 except in special cases. */
818 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
819 int start, start_byte;
820 int limit, limit_byte;
821 register int count;
822 int allow_quit;
824 int direction = ((count > 0) ? 1 : -1);
826 register unsigned char *cursor;
827 unsigned char *base;
829 register int ceiling;
830 register unsigned char *ceiling_addr;
832 int old_immediate_quit = immediate_quit;
834 /* The code that follows is like scan_buffer
835 but checks for either newline or carriage return. */
837 if (allow_quit)
838 immediate_quit++;
840 start_byte = CHAR_TO_BYTE (start);
842 if (count > 0)
844 while (start_byte < limit_byte)
846 ceiling = BUFFER_CEILING_OF (start_byte);
847 ceiling = min (limit_byte - 1, ceiling);
848 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
849 base = (cursor = BYTE_POS_ADDR (start_byte));
850 while (1)
852 while (*cursor != '\n' && ++cursor != ceiling_addr)
855 if (cursor != ceiling_addr)
857 if (--count == 0)
859 immediate_quit = old_immediate_quit;
860 start_byte = start_byte + cursor - base + 1;
861 start = BYTE_TO_CHAR (start_byte);
862 TEMP_SET_PT_BOTH (start, start_byte);
863 return 0;
865 else
866 if (++cursor == ceiling_addr)
867 break;
869 else
870 break;
872 start_byte += cursor - base;
875 else
877 while (start_byte > limit_byte)
879 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
880 ceiling = max (limit_byte, ceiling);
881 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
882 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
883 while (1)
885 while (--cursor != ceiling_addr && *cursor != '\n')
888 if (cursor != ceiling_addr)
890 if (++count == 0)
892 immediate_quit = old_immediate_quit;
893 /* Return the position AFTER the match we found. */
894 start_byte = start_byte + cursor - base + 1;
895 start = BYTE_TO_CHAR (start_byte);
896 TEMP_SET_PT_BOTH (start, start_byte);
897 return 0;
900 else
901 break;
903 /* Here we add 1 to compensate for the last decrement
904 of CURSOR, which took it past the valid range. */
905 start_byte += cursor - base + 1;
909 TEMP_SET_PT_BOTH (limit, limit_byte);
910 immediate_quit = old_immediate_quit;
912 return count * direction;
916 find_next_newline_no_quit (from, cnt)
917 register int from, cnt;
919 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
922 /* Like find_next_newline, but returns position before the newline,
923 not after, and only search up to TO. This isn't just
924 find_next_newline (...)-1, because you might hit TO. */
927 find_before_next_newline (from, to, cnt)
928 int from, to, cnt;
930 int shortage;
931 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
933 if (shortage == 0)
934 pos--;
936 return pos;
939 /* Subroutines of Lisp buffer search functions. */
941 static Lisp_Object
942 search_command (string, bound, noerror, count, direction, RE, posix)
943 Lisp_Object string, bound, noerror, count;
944 int direction;
945 int RE;
946 int posix;
948 register int np;
949 int lim, lim_byte;
950 int n = direction;
952 if (!NILP (count))
954 CHECK_NUMBER (count);
955 n *= XINT (count);
958 CHECK_STRING (string);
959 if (NILP (bound))
961 if (n > 0)
962 lim = ZV, lim_byte = ZV_BYTE;
963 else
964 lim = BEGV, lim_byte = BEGV_BYTE;
966 else
968 CHECK_NUMBER_COERCE_MARKER (bound);
969 lim = XINT (bound);
970 if (n > 0 ? lim < PT : lim > PT)
971 error ("Invalid search bound (wrong side of point)");
972 if (lim > ZV)
973 lim = ZV, lim_byte = ZV_BYTE;
974 else if (lim < BEGV)
975 lim = BEGV, lim_byte = BEGV_BYTE;
976 else
977 lim_byte = CHAR_TO_BYTE (lim);
980 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
981 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
982 = current_buffer->case_eqv_table;
984 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
985 (!NILP (current_buffer->case_fold_search)
986 ? current_buffer->case_canon_table
987 : Qnil),
988 (!NILP (current_buffer->case_fold_search)
989 ? current_buffer->case_eqv_table
990 : Qnil),
991 posix);
992 if (np <= 0)
994 if (NILP (noerror))
995 xsignal1 (Qsearch_failed, string);
997 if (!EQ (noerror, Qt))
999 if (lim < BEGV || lim > ZV)
1000 abort ();
1001 SET_PT_BOTH (lim, lim_byte);
1002 return Qnil;
1003 #if 0 /* This would be clean, but maybe programs depend on
1004 a value of nil here. */
1005 np = lim;
1006 #endif
1008 else
1009 return Qnil;
1012 if (np < BEGV || np > ZV)
1013 abort ();
1015 SET_PT (np);
1017 return make_number (np);
1020 /* Return 1 if REGEXP it matches just one constant string. */
1022 static int
1023 trivial_regexp_p (regexp)
1024 Lisp_Object regexp;
1026 int len = SBYTES (regexp);
1027 unsigned char *s = SDATA (regexp);
1028 while (--len >= 0)
1030 switch (*s++)
1032 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1033 return 0;
1034 case '\\':
1035 if (--len < 0)
1036 return 0;
1037 switch (*s++)
1039 case '|': case '(': case ')': case '`': case '\'': case 'b':
1040 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1041 case 'S': case '=': case '{': case '}': case '_':
1042 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1043 case '1': case '2': case '3': case '4': case '5':
1044 case '6': case '7': case '8': case '9':
1045 return 0;
1049 return 1;
1052 /* Search for the n'th occurrence of STRING in the current buffer,
1053 starting at position POS and stopping at position LIM,
1054 treating STRING as a literal string if RE is false or as
1055 a regular expression if RE is true.
1057 If N is positive, searching is forward and LIM must be greater than POS.
1058 If N is negative, searching is backward and LIM must be less than POS.
1060 Returns -x if x occurrences remain to be found (x > 0),
1061 or else the position at the beginning of the Nth occurrence
1062 (if searching backward) or the end (if searching forward).
1064 POSIX is nonzero if we want full backtracking (POSIX style)
1065 for this pattern. 0 means backtrack only enough to get a valid match. */
1067 #define TRANSLATE(out, trt, d) \
1068 do \
1070 if (! NILP (trt)) \
1072 Lisp_Object temp; \
1073 temp = Faref (trt, make_number (d)); \
1074 if (INTEGERP (temp)) \
1075 out = XINT (temp); \
1076 else \
1077 out = d; \
1079 else \
1080 out = d; \
1082 while (0)
1084 static int
1085 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1086 RE, trt, inverse_trt, posix)
1087 Lisp_Object string;
1088 int pos;
1089 int pos_byte;
1090 int lim;
1091 int lim_byte;
1092 int n;
1093 int RE;
1094 Lisp_Object trt;
1095 Lisp_Object inverse_trt;
1096 int posix;
1098 int len = SCHARS (string);
1099 int len_byte = SBYTES (string);
1100 register int i;
1102 if (running_asynch_code)
1103 save_search_regs ();
1105 /* Searching 0 times means don't move. */
1106 /* Null string is found at starting position. */
1107 if (len == 0 || n == 0)
1109 set_search_regs (pos_byte, 0);
1110 return pos;
1113 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1115 unsigned char *p1, *p2;
1116 int s1, s2;
1117 struct re_pattern_buffer *bufp;
1119 bufp = compile_pattern (string, &search_regs, trt, posix,
1120 !NILP (current_buffer->enable_multibyte_characters));
1122 immediate_quit = 1; /* Quit immediately if user types ^G,
1123 because letting this function finish
1124 can take too long. */
1125 QUIT; /* Do a pending quit right away,
1126 to avoid paradoxical behavior */
1127 /* Get pointers and sizes of the two strings
1128 that make up the visible portion of the buffer. */
1130 p1 = BEGV_ADDR;
1131 s1 = GPT_BYTE - BEGV_BYTE;
1132 p2 = GAP_END_ADDR;
1133 s2 = ZV_BYTE - GPT_BYTE;
1134 if (s1 < 0)
1136 p2 = p1;
1137 s2 = ZV_BYTE - BEGV_BYTE;
1138 s1 = 0;
1140 if (s2 < 0)
1142 s1 = ZV_BYTE - BEGV_BYTE;
1143 s2 = 0;
1145 re_match_object = Qnil;
1147 while (n < 0)
1149 int val;
1150 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1151 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1152 &search_regs,
1153 /* Don't allow match past current point */
1154 pos_byte - BEGV_BYTE);
1155 if (val == -2)
1157 matcher_overflow ();
1159 if (val >= 0)
1161 pos_byte = search_regs.start[0] + BEGV_BYTE;
1162 for (i = 0; i < search_regs.num_regs; i++)
1163 if (search_regs.start[i] >= 0)
1165 search_regs.start[i]
1166 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1167 search_regs.end[i]
1168 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1170 XSETBUFFER (last_thing_searched, current_buffer);
1171 /* Set pos to the new position. */
1172 pos = search_regs.start[0];
1174 else
1176 immediate_quit = 0;
1177 return (n);
1179 n++;
1181 while (n > 0)
1183 int val;
1184 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1185 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1186 &search_regs,
1187 lim_byte - BEGV_BYTE);
1188 if (val == -2)
1190 matcher_overflow ();
1192 if (val >= 0)
1194 pos_byte = search_regs.end[0] + BEGV_BYTE;
1195 for (i = 0; i < search_regs.num_regs; i++)
1196 if (search_regs.start[i] >= 0)
1198 search_regs.start[i]
1199 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1200 search_regs.end[i]
1201 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1203 XSETBUFFER (last_thing_searched, current_buffer);
1204 pos = search_regs.end[0];
1206 else
1208 immediate_quit = 0;
1209 return (0 - n);
1211 n--;
1213 immediate_quit = 0;
1214 return (pos);
1216 else /* non-RE case */
1218 unsigned char *raw_pattern, *pat;
1219 int raw_pattern_size;
1220 int raw_pattern_size_byte;
1221 unsigned char *patbuf;
1222 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1223 unsigned char *base_pat;
1224 /* Set to positive if we find a non-ASCII char that need
1225 translation. Otherwise set to zero later. */
1226 int charset_base = -1;
1227 int boyer_moore_ok = 1;
1229 /* MULTIBYTE says whether the text to be searched is multibyte.
1230 We must convert PATTERN to match that, or we will not really
1231 find things right. */
1233 if (multibyte == STRING_MULTIBYTE (string))
1235 raw_pattern = (unsigned char *) SDATA (string);
1236 raw_pattern_size = SCHARS (string);
1237 raw_pattern_size_byte = SBYTES (string);
1239 else if (multibyte)
1241 raw_pattern_size = SCHARS (string);
1242 raw_pattern_size_byte
1243 = count_size_as_multibyte (SDATA (string),
1244 raw_pattern_size);
1245 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1246 copy_text (SDATA (string), raw_pattern,
1247 SCHARS (string), 0, 1);
1249 else
1251 /* Converting multibyte to single-byte.
1253 ??? Perhaps this conversion should be done in a special way
1254 by subtracting nonascii-insert-offset from each non-ASCII char,
1255 so that only the multibyte chars which really correspond to
1256 the chosen single-byte character set can possibly match. */
1257 raw_pattern_size = SCHARS (string);
1258 raw_pattern_size_byte = SCHARS (string);
1259 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1260 copy_text (SDATA (string), raw_pattern,
1261 SBYTES (string), 1, 0);
1264 /* Copy and optionally translate the pattern. */
1265 len = raw_pattern_size;
1266 len_byte = raw_pattern_size_byte;
1267 patbuf = (unsigned char *) alloca (len_byte);
1268 pat = patbuf;
1269 base_pat = raw_pattern;
1270 if (multibyte)
1272 /* Fill patbuf by translated characters in STRING while
1273 checking if we can use boyer-moore search. If TRT is
1274 non-nil, we can use boyer-moore search only if TRT can be
1275 represented by the byte array of 256 elements. For that,
1276 all non-ASCII case-equivalents of all case-senstive
1277 characters in STRING must belong to the same charset and
1278 row. */
1280 while (--len >= 0)
1282 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1283 int c, translated, inverse;
1284 int in_charlen, charlen;
1286 /* If we got here and the RE flag is set, it's because we're
1287 dealing with a regexp known to be trivial, so the backslash
1288 just quotes the next character. */
1289 if (RE && *base_pat == '\\')
1291 len--;
1292 raw_pattern_size--;
1293 len_byte--;
1294 base_pat++;
1297 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1299 if (NILP (trt))
1301 str = base_pat;
1302 charlen = in_charlen;
1304 else
1306 /* Translate the character. */
1307 TRANSLATE (translated, trt, c);
1308 charlen = CHAR_STRING (translated, str_base);
1309 str = str_base;
1311 /* Check if C has any other case-equivalents. */
1312 TRANSLATE (inverse, inverse_trt, c);
1313 /* If so, check if we can use boyer-moore. */
1314 if (c != inverse && boyer_moore_ok)
1316 /* Check if all equivalents belong to the same
1317 charset & row. Note that the check of C
1318 itself is done by the last iteration. Note
1319 also that we don't have to check ASCII
1320 characters because boyer-moore search can
1321 always handle their translation. */
1322 while (1)
1324 if (ASCII_BYTE_P (inverse))
1326 if (charset_base > 0)
1328 boyer_moore_ok = 0;
1329 break;
1331 charset_base = 0;
1333 else if (SINGLE_BYTE_CHAR_P (inverse))
1335 /* Boyer-moore search can't handle a
1336 translation of an eight-bit
1337 character. */
1338 boyer_moore_ok = 0;
1339 break;
1341 else if (charset_base < 0)
1342 charset_base = inverse & ~CHAR_FIELD3_MASK;
1343 else if ((inverse & ~CHAR_FIELD3_MASK)
1344 != charset_base)
1346 boyer_moore_ok = 0;
1347 break;
1349 if (c == inverse)
1350 break;
1351 TRANSLATE (inverse, inverse_trt, inverse);
1355 if (charset_base < 0)
1356 charset_base = 0;
1358 /* Store this character into the translated pattern. */
1359 bcopy (str, pat, charlen);
1360 pat += charlen;
1361 base_pat += in_charlen;
1362 len_byte -= in_charlen;
1365 else
1367 /* Unibyte buffer. */
1368 charset_base = 0;
1369 while (--len >= 0)
1371 int c, translated;
1373 /* If we got here and the RE flag is set, it's because we're
1374 dealing with a regexp known to be trivial, so the backslash
1375 just quotes the next character. */
1376 if (RE && *base_pat == '\\')
1378 len--;
1379 raw_pattern_size--;
1380 base_pat++;
1382 c = *base_pat++;
1383 TRANSLATE (translated, trt, c);
1384 *pat++ = translated;
1388 len_byte = pat - patbuf;
1389 len = raw_pattern_size;
1390 pat = base_pat = patbuf;
1392 if (boyer_moore_ok)
1393 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1394 pos, pos_byte, lim, lim_byte,
1395 charset_base);
1396 else
1397 return simple_search (n, pat, len, len_byte, trt,
1398 pos, pos_byte, lim, lim_byte);
1402 /* Do a simple string search N times for the string PAT,
1403 whose length is LEN/LEN_BYTE,
1404 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1405 TRT is the translation table.
1407 Return the character position where the match is found.
1408 Otherwise, if M matches remained to be found, return -M.
1410 This kind of search works regardless of what is in PAT and
1411 regardless of what is in TRT. It is used in cases where
1412 boyer_moore cannot work. */
1414 static int
1415 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1416 int n;
1417 unsigned char *pat;
1418 int len, len_byte;
1419 Lisp_Object trt;
1420 int pos, pos_byte;
1421 int lim, lim_byte;
1423 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1424 int forward = n > 0;
1426 if (lim > pos && multibyte)
1427 while (n > 0)
1429 while (1)
1431 /* Try matching at position POS. */
1432 int this_pos = pos;
1433 int this_pos_byte = pos_byte;
1434 int this_len = len;
1435 int this_len_byte = len_byte;
1436 unsigned char *p = pat;
1437 if (pos + len > lim)
1438 goto stop;
1440 while (this_len > 0)
1442 int charlen, buf_charlen;
1443 int pat_ch, buf_ch;
1445 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1446 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1447 ZV_BYTE - this_pos_byte,
1448 buf_charlen);
1449 TRANSLATE (buf_ch, trt, buf_ch);
1451 if (buf_ch != pat_ch)
1452 break;
1454 this_len_byte -= charlen;
1455 this_len--;
1456 p += charlen;
1458 this_pos_byte += buf_charlen;
1459 this_pos++;
1462 if (this_len == 0)
1464 pos += len;
1465 pos_byte += len_byte;
1466 break;
1469 INC_BOTH (pos, pos_byte);
1472 n--;
1474 else if (lim > pos)
1475 while (n > 0)
1477 while (1)
1479 /* Try matching at position POS. */
1480 int this_pos = pos;
1481 int this_len = len;
1482 unsigned char *p = pat;
1484 if (pos + len > lim)
1485 goto stop;
1487 while (this_len > 0)
1489 int pat_ch = *p++;
1490 int buf_ch = FETCH_BYTE (this_pos);
1491 TRANSLATE (buf_ch, trt, buf_ch);
1493 if (buf_ch != pat_ch)
1494 break;
1496 this_len--;
1497 this_pos++;
1500 if (this_len == 0)
1502 pos += len;
1503 break;
1506 pos++;
1509 n--;
1511 /* Backwards search. */
1512 else if (lim < pos && multibyte)
1513 while (n < 0)
1515 while (1)
1517 /* Try matching at position POS. */
1518 int this_pos = pos - len;
1519 int this_pos_byte = pos_byte - len_byte;
1520 int this_len = len;
1521 int this_len_byte = len_byte;
1522 unsigned char *p = pat;
1524 if (this_pos < lim || this_pos_byte < lim_byte)
1525 goto stop;
1527 while (this_len > 0)
1529 int charlen, buf_charlen;
1530 int pat_ch, buf_ch;
1532 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1533 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1534 ZV_BYTE - this_pos_byte,
1535 buf_charlen);
1536 TRANSLATE (buf_ch, trt, buf_ch);
1538 if (buf_ch != pat_ch)
1539 break;
1541 this_len_byte -= charlen;
1542 this_len--;
1543 p += charlen;
1544 this_pos_byte += buf_charlen;
1545 this_pos++;
1548 if (this_len == 0)
1550 pos -= len;
1551 pos_byte -= len_byte;
1552 break;
1555 DEC_BOTH (pos, pos_byte);
1558 n++;
1560 else if (lim < pos)
1561 while (n < 0)
1563 while (1)
1565 /* Try matching at position POS. */
1566 int this_pos = pos - len;
1567 int this_len = len;
1568 unsigned char *p = pat;
1570 if (pos - len < lim)
1571 goto stop;
1573 while (this_len > 0)
1575 int pat_ch = *p++;
1576 int buf_ch = FETCH_BYTE (this_pos);
1577 TRANSLATE (buf_ch, trt, buf_ch);
1579 if (buf_ch != pat_ch)
1580 break;
1581 this_len--;
1582 this_pos++;
1585 if (this_len == 0)
1587 pos -= len;
1588 break;
1591 pos--;
1594 n++;
1597 stop:
1598 if (n == 0)
1600 if (forward)
1601 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1602 else
1603 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1605 return pos;
1607 else if (n > 0)
1608 return -n;
1609 else
1610 return n;
1613 /* Do Boyer-Moore search N times for the string BASE_PAT,
1614 whose length is LEN/LEN_BYTE,
1615 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1616 DIRECTION says which direction we search in.
1617 TRT and INVERSE_TRT are translation tables.
1618 Characters in PAT are already translated by TRT.
1620 This kind of search works if all the characters in BASE_PAT that
1621 have nontrivial translation are the same aside from the last byte.
1622 This makes it possible to translate just the last byte of a
1623 character, and do so after just a simple test of the context.
1624 CHARSET_BASE is nonzero if there is such a non-ASCII character.
1626 If that criterion is not satisfied, do not call this function. */
1628 static int
1629 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1630 pos, pos_byte, lim, lim_byte, charset_base)
1631 int n;
1632 unsigned char *base_pat;
1633 int len, len_byte;
1634 Lisp_Object trt;
1635 Lisp_Object inverse_trt;
1636 int pos, pos_byte;
1637 int lim, lim_byte;
1638 int charset_base;
1640 int direction = ((n > 0) ? 1 : -1);
1641 register int dirlen;
1642 int infinity, limit, stride_for_teases = 0;
1643 register int *BM_tab;
1644 int *BM_tab_base;
1645 register unsigned char *cursor, *p_limit;
1646 register int i, j;
1647 unsigned char *pat, *pat_end;
1648 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1650 unsigned char simple_translate[0400];
1651 /* These are set to the preceding bytes of a byte to be translated
1652 if charset_base is nonzero. As the maximum byte length of a
1653 multibyte character is 4, we have to check at most three previous
1654 bytes. */
1655 int translate_prev_byte1 = 0;
1656 int translate_prev_byte2 = 0;
1657 int translate_prev_byte3 = 0;
1659 #ifdef C_ALLOCA
1660 int BM_tab_space[0400];
1661 BM_tab = &BM_tab_space[0];
1662 #else
1663 BM_tab = (int *) alloca (0400 * sizeof (int));
1664 #endif
1665 /* The general approach is that we are going to maintain that we know */
1666 /* the first (closest to the present position, in whatever direction */
1667 /* we're searching) character that could possibly be the last */
1668 /* (furthest from present position) character of a valid match. We */
1669 /* advance the state of our knowledge by looking at that character */
1670 /* and seeing whether it indeed matches the last character of the */
1671 /* pattern. If it does, we take a closer look. If it does not, we */
1672 /* move our pointer (to putative last characters) as far as is */
1673 /* logically possible. This amount of movement, which I call a */
1674 /* stride, will be the length of the pattern if the actual character */
1675 /* appears nowhere in the pattern, otherwise it will be the distance */
1676 /* from the last occurrence of that character to the end of the */
1677 /* pattern. */
1678 /* As a coding trick, an enormous stride is coded into the table for */
1679 /* characters that match the last character. This allows use of only */
1680 /* a single test, a test for having gone past the end of the */
1681 /* permissible match region, to test for both possible matches (when */
1682 /* the stride goes past the end immediately) and failure to */
1683 /* match (where you get nudged past the end one stride at a time). */
1685 /* Here we make a "mickey mouse" BM table. The stride of the search */
1686 /* is determined only by the last character of the putative match. */
1687 /* If that character does not match, we will stride the proper */
1688 /* distance to propose a match that superimposes it on the last */
1689 /* instance of a character that matches it (per trt), or misses */
1690 /* it entirely if there is none. */
1692 dirlen = len_byte * direction;
1693 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1695 /* Record position after the end of the pattern. */
1696 pat_end = base_pat + len_byte;
1697 /* BASE_PAT points to a character that we start scanning from.
1698 It is the first character in a forward search,
1699 the last character in a backward search. */
1700 if (direction < 0)
1701 base_pat = pat_end - 1;
1703 BM_tab_base = BM_tab;
1704 BM_tab += 0400;
1705 j = dirlen; /* to get it in a register */
1706 /* A character that does not appear in the pattern induces a */
1707 /* stride equal to the pattern length. */
1708 while (BM_tab_base != BM_tab)
1710 *--BM_tab = j;
1711 *--BM_tab = j;
1712 *--BM_tab = j;
1713 *--BM_tab = j;
1716 /* We use this for translation, instead of TRT itself.
1717 We fill this in to handle the characters that actually
1718 occur in the pattern. Others don't matter anyway! */
1719 bzero (simple_translate, sizeof simple_translate);
1720 for (i = 0; i < 0400; i++)
1721 simple_translate[i] = i;
1723 if (charset_base)
1725 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1726 byte following them are the target of translation. */
1727 int sample_char = charset_base | 0x20;
1728 unsigned char str[MAX_MULTIBYTE_LENGTH];
1729 int len = CHAR_STRING (sample_char, str);
1731 translate_prev_byte1 = str[len - 2];
1732 if (len > 2)
1734 translate_prev_byte2 = str[len - 3];
1735 if (len > 3)
1736 translate_prev_byte3 = str[len - 4];
1740 i = 0;
1741 while (i != infinity)
1743 unsigned char *ptr = base_pat + i;
1744 i += direction;
1745 if (i == dirlen)
1746 i = infinity;
1747 if (! NILP (trt))
1749 /* If the byte currently looking at is the last of a
1750 character to check case-equivalents, set CH to that
1751 character. An ASCII character and a non-ASCII character
1752 matching with CHARSET_BASE are to be checked. */
1753 int ch = -1;
1755 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1756 ch = *ptr;
1757 else if (charset_base
1758 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1760 unsigned char *charstart = ptr - 1;
1762 while (! (CHAR_HEAD_P (*charstart)))
1763 charstart--;
1764 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1765 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1766 ch = -1;
1769 if (ch >= 0400)
1770 j = ((unsigned char) ch) | 0200;
1771 else
1772 j = *ptr;
1774 if (i == infinity)
1775 stride_for_teases = BM_tab[j];
1777 BM_tab[j] = dirlen - i;
1778 /* A translation table is accompanied by its inverse -- see */
1779 /* comment following downcase_table for details */
1780 if (ch >= 0)
1782 int starting_ch = ch;
1783 int starting_j = j;
1785 while (1)
1787 TRANSLATE (ch, inverse_trt, ch);
1788 if (ch >= 0400)
1789 j = ((unsigned char) ch) | 0200;
1790 else
1791 j = (unsigned char) ch;
1793 /* For all the characters that map into CH,
1794 set up simple_translate to map the last byte
1795 into STARTING_J. */
1796 simple_translate[j] = starting_j;
1797 if (ch == starting_ch)
1798 break;
1799 BM_tab[j] = dirlen - i;
1803 else
1805 j = *ptr;
1807 if (i == infinity)
1808 stride_for_teases = BM_tab[j];
1809 BM_tab[j] = dirlen - i;
1811 /* stride_for_teases tells how much to stride if we get a */
1812 /* match on the far character but are subsequently */
1813 /* disappointed, by recording what the stride would have been */
1814 /* for that character if the last character had been */
1815 /* different. */
1817 infinity = dirlen - infinity;
1818 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1819 /* loop invariant - POS_BYTE points at where last char (first
1820 char if reverse) of pattern would align in a possible match. */
1821 while (n != 0)
1823 int tail_end;
1824 unsigned char *tail_end_ptr;
1826 /* It's been reported that some (broken) compiler thinks that
1827 Boolean expressions in an arithmetic context are unsigned.
1828 Using an explicit ?1:0 prevents this. */
1829 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1830 < 0)
1831 return (n * (0 - direction));
1832 /* First we do the part we can by pointers (maybe nothing) */
1833 QUIT;
1834 pat = base_pat;
1835 limit = pos_byte - dirlen + direction;
1836 if (direction > 0)
1838 limit = BUFFER_CEILING_OF (limit);
1839 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1840 can take on without hitting edge of buffer or the gap. */
1841 limit = min (limit, pos_byte + 20000);
1842 limit = min (limit, lim_byte - 1);
1844 else
1846 limit = BUFFER_FLOOR_OF (limit);
1847 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1848 can take on without hitting edge of buffer or the gap. */
1849 limit = max (limit, pos_byte - 20000);
1850 limit = max (limit, lim_byte);
1852 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1853 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1855 if ((limit - pos_byte) * direction > 20)
1857 unsigned char *p2;
1859 p_limit = BYTE_POS_ADDR (limit);
1860 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1861 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1862 while (1) /* use one cursor setting as long as i can */
1864 if (direction > 0) /* worth duplicating */
1866 /* Use signed comparison if appropriate
1867 to make cursor+infinity sure to be > p_limit.
1868 Assuming that the buffer lies in a range of addresses
1869 that are all "positive" (as ints) or all "negative",
1870 either kind of comparison will work as long
1871 as we don't step by infinity. So pick the kind
1872 that works when we do step by infinity. */
1873 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1874 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1875 cursor += BM_tab[*cursor];
1876 else
1877 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1878 cursor += BM_tab[*cursor];
1880 else
1882 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1883 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1884 cursor += BM_tab[*cursor];
1885 else
1886 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1887 cursor += BM_tab[*cursor];
1889 /* If you are here, cursor is beyond the end of the searched region. */
1890 /* This can happen if you match on the far character of the pattern, */
1891 /* because the "stride" of that character is infinity, a number able */
1892 /* to throw you well beyond the end of the search. It can also */
1893 /* happen if you fail to match within the permitted region and would */
1894 /* otherwise try a character beyond that region */
1895 if ((cursor - p_limit) * direction <= len_byte)
1896 break; /* a small overrun is genuine */
1897 cursor -= infinity; /* large overrun = hit */
1898 i = dirlen - direction;
1899 if (! NILP (trt))
1901 while ((i -= direction) + direction != 0)
1903 int ch;
1904 cursor -= direction;
1905 /* Translate only the last byte of a character. */
1906 if (! multibyte
1907 || ((cursor == tail_end_ptr
1908 || CHAR_HEAD_P (cursor[1]))
1909 && (CHAR_HEAD_P (cursor[0])
1910 /* Check if this is the last byte of
1911 a translable character. */
1912 || (translate_prev_byte1 == cursor[-1]
1913 && (CHAR_HEAD_P (translate_prev_byte1)
1914 || (translate_prev_byte2 == cursor[-2]
1915 && (CHAR_HEAD_P (translate_prev_byte2)
1916 || (translate_prev_byte3 == cursor[-3]))))))))
1917 ch = simple_translate[*cursor];
1918 else
1919 ch = *cursor;
1920 if (pat[i] != ch)
1921 break;
1924 else
1926 while ((i -= direction) + direction != 0)
1928 cursor -= direction;
1929 if (pat[i] != *cursor)
1930 break;
1933 cursor += dirlen - i - direction; /* fix cursor */
1934 if (i + direction == 0)
1936 int position;
1938 cursor -= direction;
1940 position = pos_byte + cursor - p2 + ((direction > 0)
1941 ? 1 - len_byte : 0);
1942 set_search_regs (position, len_byte);
1944 if ((n -= direction) != 0)
1945 cursor += dirlen; /* to resume search */
1946 else
1947 return ((direction > 0)
1948 ? search_regs.end[0] : search_regs.start[0]);
1950 else
1951 cursor += stride_for_teases; /* <sigh> we lose - */
1953 pos_byte += cursor - p2;
1955 else
1956 /* Now we'll pick up a clump that has to be done the hard */
1957 /* way because it covers a discontinuity */
1959 limit = ((direction > 0)
1960 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1961 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1962 limit = ((direction > 0)
1963 ? min (limit + len_byte, lim_byte - 1)
1964 : max (limit - len_byte, lim_byte));
1965 /* LIMIT is now the last value POS_BYTE can have
1966 and still be valid for a possible match. */
1967 while (1)
1969 /* This loop can be coded for space rather than */
1970 /* speed because it will usually run only once. */
1971 /* (the reach is at most len + 21, and typically */
1972 /* does not exceed len) */
1973 while ((limit - pos_byte) * direction >= 0)
1974 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1975 /* now run the same tests to distinguish going off the */
1976 /* end, a match or a phony match. */
1977 if ((pos_byte - limit) * direction <= len_byte)
1978 break; /* ran off the end */
1979 /* Found what might be a match.
1980 Set POS_BYTE back to last (first if reverse) pos. */
1981 pos_byte -= infinity;
1982 i = dirlen - direction;
1983 while ((i -= direction) + direction != 0)
1985 int ch;
1986 unsigned char *ptr;
1987 pos_byte -= direction;
1988 ptr = BYTE_POS_ADDR (pos_byte);
1989 /* Translate only the last byte of a character. */
1990 if (! multibyte
1991 || ((ptr == tail_end_ptr
1992 || CHAR_HEAD_P (ptr[1]))
1993 && (CHAR_HEAD_P (ptr[0])
1994 /* Check if this is the last byte of a
1995 translable character. */
1996 || (translate_prev_byte1 == ptr[-1]
1997 && (CHAR_HEAD_P (translate_prev_byte1)
1998 || (translate_prev_byte2 == ptr[-2]
1999 && (CHAR_HEAD_P (translate_prev_byte2)
2000 || translate_prev_byte3 == ptr[-3])))))))
2001 ch = simple_translate[*ptr];
2002 else
2003 ch = *ptr;
2004 if (pat[i] != ch)
2005 break;
2007 /* Above loop has moved POS_BYTE part or all the way
2008 back to the first pos (last pos if reverse).
2009 Set it once again at the last (first if reverse) char. */
2010 pos_byte += dirlen - i- direction;
2011 if (i + direction == 0)
2013 int position;
2014 pos_byte -= direction;
2016 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2018 set_search_regs (position, len_byte);
2020 if ((n -= direction) != 0)
2021 pos_byte += dirlen; /* to resume search */
2022 else
2023 return ((direction > 0)
2024 ? search_regs.end[0] : search_regs.start[0]);
2026 else
2027 pos_byte += stride_for_teases;
2030 /* We have done one clump. Can we continue? */
2031 if ((lim_byte - pos_byte) * direction < 0)
2032 return ((0 - n) * direction);
2034 return BYTE_TO_CHAR (pos_byte);
2037 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2038 for the overall match just found in the current buffer.
2039 Also clear out the match data for registers 1 and up. */
2041 static void
2042 set_search_regs (beg_byte, nbytes)
2043 int beg_byte, nbytes;
2045 int i;
2047 /* Make sure we have registers in which to store
2048 the match position. */
2049 if (search_regs.num_regs == 0)
2051 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2052 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2053 search_regs.num_regs = 2;
2056 /* Clear out the other registers. */
2057 for (i = 1; i < search_regs.num_regs; i++)
2059 search_regs.start[i] = -1;
2060 search_regs.end[i] = -1;
2063 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2064 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2065 XSETBUFFER (last_thing_searched, current_buffer);
2068 /* Given a string of words separated by word delimiters,
2069 compute a regexp that matches those exact words
2070 separated by arbitrary punctuation. */
2072 static Lisp_Object
2073 wordify (string)
2074 Lisp_Object string;
2076 register unsigned char *p, *o;
2077 register int i, i_byte, len, punct_count = 0, word_count = 0;
2078 Lisp_Object val;
2079 int prev_c = 0;
2080 int adjust;
2082 CHECK_STRING (string);
2083 p = SDATA (string);
2084 len = SCHARS (string);
2086 for (i = 0, i_byte = 0; i < len; )
2088 int c;
2090 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2092 if (SYNTAX (c) != Sword)
2094 punct_count++;
2095 if (i > 0 && SYNTAX (prev_c) == Sword)
2096 word_count++;
2099 prev_c = c;
2102 if (SYNTAX (prev_c) == Sword)
2103 word_count++;
2104 if (!word_count)
2105 return empty_string;
2107 adjust = - punct_count + 5 * (word_count - 1) + 4;
2108 if (STRING_MULTIBYTE (string))
2109 val = make_uninit_multibyte_string (len + adjust,
2110 SBYTES (string)
2111 + adjust);
2112 else
2113 val = make_uninit_string (len + adjust);
2115 o = SDATA (val);
2116 *o++ = '\\';
2117 *o++ = 'b';
2118 prev_c = 0;
2120 for (i = 0, i_byte = 0; i < len; )
2122 int c;
2123 int i_byte_orig = i_byte;
2125 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2127 if (SYNTAX (c) == Sword)
2129 bcopy (SDATA (string) + i_byte_orig, o,
2130 i_byte - i_byte_orig);
2131 o += i_byte - i_byte_orig;
2133 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2135 *o++ = '\\';
2136 *o++ = 'W';
2137 *o++ = '\\';
2138 *o++ = 'W';
2139 *o++ = '*';
2142 prev_c = c;
2145 *o++ = '\\';
2146 *o++ = 'b';
2148 return val;
2151 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2152 "MSearch backward: ",
2153 doc: /* Search backward from point for STRING.
2154 Set point to the beginning of the occurrence found, and return point.
2155 An optional second argument bounds the search; it is a buffer position.
2156 The match found must not extend before that position.
2157 Optional third argument, if t, means if fail just return nil (no error).
2158 If not nil and not t, position at limit of search and return nil.
2159 Optional fourth argument is repeat count--search for successive occurrences.
2161 Search case-sensitivity is determined by the value of the variable
2162 `case-fold-search', which see.
2164 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2165 (string, bound, noerror, count)
2166 Lisp_Object string, bound, noerror, count;
2168 return search_command (string, bound, noerror, count, -1, 0, 0);
2171 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2172 doc: /* Search forward from point for STRING.
2173 Set point to the end of the occurrence found, and return point.
2174 An optional second argument bounds the search; it is a buffer position.
2175 The match found must not extend after that position. A value of nil is
2176 equivalent to (point-max).
2177 Optional third argument, if t, means if fail just return nil (no error).
2178 If not nil and not t, move to limit of search and return nil.
2179 Optional fourth argument is repeat count--search for successive occurrences.
2181 Search case-sensitivity is determined by the value of the variable
2182 `case-fold-search', which see.
2184 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2185 (string, bound, noerror, count)
2186 Lisp_Object string, bound, noerror, count;
2188 return search_command (string, bound, noerror, count, 1, 0, 0);
2191 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2192 "sWord search backward: ",
2193 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2194 Set point to the beginning of the occurrence found, and return point.
2195 An optional second argument bounds the search; it is a buffer position.
2196 The match found must not extend before that position.
2197 Optional third argument, if t, means if fail just return nil (no error).
2198 If not nil and not t, move to limit of search and return nil.
2199 Optional fourth argument is repeat count--search for successive occurrences. */)
2200 (string, bound, noerror, count)
2201 Lisp_Object string, bound, noerror, count;
2203 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2206 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2207 "sWord search: ",
2208 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2209 Set point to the end of the occurrence found, and return point.
2210 An optional second argument bounds the search; it is a buffer position.
2211 The match found must not extend after that position.
2212 Optional third argument, if t, means if fail just return nil (no error).
2213 If not nil and not t, move to limit of search and return nil.
2214 Optional fourth argument is repeat count--search for successive occurrences. */)
2215 (string, bound, noerror, count)
2216 Lisp_Object string, bound, noerror, count;
2218 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2221 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2222 "sRE search backward: ",
2223 doc: /* Search backward from point for match for regular expression REGEXP.
2224 Set point to the beginning of the match, and return point.
2225 The match found is the one starting last in the buffer
2226 and yet ending before the origin of the search.
2227 An optional second argument bounds the search; it is a buffer position.
2228 The match found must start at or after that position.
2229 Optional third argument, if t, means if fail just return nil (no error).
2230 If not nil and not t, move to limit of search and return nil.
2231 Optional fourth argument is repeat count--search for successive occurrences.
2232 See also the functions `match-beginning', `match-end', `match-string',
2233 and `replace-match'. */)
2234 (regexp, bound, noerror, count)
2235 Lisp_Object regexp, bound, noerror, count;
2237 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2240 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2241 "sRE search: ",
2242 doc: /* Search forward from point for regular expression REGEXP.
2243 Set point to the end of the occurrence found, and return point.
2244 An optional second argument bounds the search; it is a buffer position.
2245 The match found must not extend after that position.
2246 Optional third argument, if t, means if fail just return nil (no error).
2247 If not nil and not t, move to limit of search and return nil.
2248 Optional fourth argument is repeat count--search for successive occurrences.
2249 See also the functions `match-beginning', `match-end', `match-string',
2250 and `replace-match'. */)
2251 (regexp, bound, noerror, count)
2252 Lisp_Object regexp, bound, noerror, count;
2254 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2257 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2258 "sPosix search backward: ",
2259 doc: /* Search backward from point for match for regular expression REGEXP.
2260 Find the longest match in accord with Posix regular expression rules.
2261 Set point to the beginning of the match, and return point.
2262 The match found is the one starting last in the buffer
2263 and yet ending before the origin of the search.
2264 An optional second argument bounds the search; it is a buffer position.
2265 The match found must start at or after that position.
2266 Optional third argument, if t, means if fail just return nil (no error).
2267 If not nil and not t, move to limit of search and return nil.
2268 Optional fourth argument is repeat count--search for successive occurrences.
2269 See also the functions `match-beginning', `match-end', `match-string',
2270 and `replace-match'. */)
2271 (regexp, bound, noerror, count)
2272 Lisp_Object regexp, bound, noerror, count;
2274 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2277 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2278 "sPosix search: ",
2279 doc: /* Search forward from point for regular expression REGEXP.
2280 Find the longest match in accord with Posix regular expression rules.
2281 Set point to the end of the occurrence found, and return point.
2282 An optional second argument bounds the search; it is a buffer position.
2283 The match found must not extend after that position.
2284 Optional third argument, if t, means if fail just return nil (no error).
2285 If not nil and not t, move to limit of search and return nil.
2286 Optional fourth argument is repeat count--search for successive occurrences.
2287 See also the functions `match-beginning', `match-end', `match-string',
2288 and `replace-match'. */)
2289 (regexp, bound, noerror, count)
2290 Lisp_Object regexp, bound, noerror, count;
2292 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2295 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2296 doc: /* Replace text matched by last search with NEWTEXT.
2297 Leave point at the end of the replacement text.
2299 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2300 Otherwise maybe capitalize the whole text, or maybe just word initials,
2301 based on the replaced text.
2302 If the replaced text has only capital letters
2303 and has at least one multiletter word, convert NEWTEXT to all caps.
2304 Otherwise if all words are capitalized in the replaced text,
2305 capitalize each word in NEWTEXT.
2307 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2308 Otherwise treat `\\' as special:
2309 `\\&' in NEWTEXT means substitute original matched text.
2310 `\\N' means substitute what matched the Nth `\\(...\\)'.
2311 If Nth parens didn't match, substitute nothing.
2312 `\\\\' means insert one `\\'.
2313 Case conversion does not apply to these substitutions.
2315 FIXEDCASE and LITERAL are optional arguments.
2317 The optional fourth argument STRING can be a string to modify.
2318 This is meaningful when the previous match was done against STRING,
2319 using `string-match'. When used this way, `replace-match'
2320 creates and returns a new string made by copying STRING and replacing
2321 the part of STRING that was matched.
2323 The optional fifth argument SUBEXP specifies a subexpression;
2324 it says to replace just that subexpression with NEWTEXT,
2325 rather than replacing the entire matched text.
2326 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2327 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2328 NEWTEXT in place of subexp N.
2329 This is useful only after a regular expression search or match,
2330 since only regular expressions have distinguished subexpressions. */)
2331 (newtext, fixedcase, literal, string, subexp)
2332 Lisp_Object newtext, fixedcase, literal, string, subexp;
2334 enum { nochange, all_caps, cap_initial } case_action;
2335 register int pos, pos_byte;
2336 int some_multiletter_word;
2337 int some_lowercase;
2338 int some_uppercase;
2339 int some_nonuppercase_initial;
2340 register int c, prevc;
2341 int sub;
2342 int opoint, newpoint;
2344 CHECK_STRING (newtext);
2346 if (! NILP (string))
2347 CHECK_STRING (string);
2349 case_action = nochange; /* We tried an initialization */
2350 /* but some C compilers blew it */
2352 if (search_regs.num_regs <= 0)
2353 error ("`replace-match' called before any match found");
2355 if (NILP (subexp))
2356 sub = 0;
2357 else
2359 CHECK_NUMBER (subexp);
2360 sub = XINT (subexp);
2361 if (sub < 0 || sub >= search_regs.num_regs)
2362 args_out_of_range (subexp, make_number (search_regs.num_regs));
2365 if (NILP (string))
2367 if (search_regs.start[sub] < BEGV
2368 || search_regs.start[sub] > search_regs.end[sub]
2369 || search_regs.end[sub] > ZV)
2370 args_out_of_range (make_number (search_regs.start[sub]),
2371 make_number (search_regs.end[sub]));
2373 else
2375 if (search_regs.start[sub] < 0
2376 || search_regs.start[sub] > search_regs.end[sub]
2377 || search_regs.end[sub] > SCHARS (string))
2378 args_out_of_range (make_number (search_regs.start[sub]),
2379 make_number (search_regs.end[sub]));
2382 if (NILP (fixedcase))
2384 /* Decide how to casify by examining the matched text. */
2385 int last;
2387 pos = search_regs.start[sub];
2388 last = search_regs.end[sub];
2390 if (NILP (string))
2391 pos_byte = CHAR_TO_BYTE (pos);
2392 else
2393 pos_byte = string_char_to_byte (string, pos);
2395 prevc = '\n';
2396 case_action = all_caps;
2398 /* some_multiletter_word is set nonzero if any original word
2399 is more than one letter long. */
2400 some_multiletter_word = 0;
2401 some_lowercase = 0;
2402 some_nonuppercase_initial = 0;
2403 some_uppercase = 0;
2405 while (pos < last)
2407 if (NILP (string))
2409 c = FETCH_CHAR (pos_byte);
2410 INC_BOTH (pos, pos_byte);
2412 else
2413 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2415 if (LOWERCASEP (c))
2417 /* Cannot be all caps if any original char is lower case */
2419 some_lowercase = 1;
2420 if (SYNTAX (prevc) != Sword)
2421 some_nonuppercase_initial = 1;
2422 else
2423 some_multiletter_word = 1;
2425 else if (UPPERCASEP (c))
2427 some_uppercase = 1;
2428 if (SYNTAX (prevc) != Sword)
2430 else
2431 some_multiletter_word = 1;
2433 else
2435 /* If the initial is a caseless word constituent,
2436 treat that like a lowercase initial. */
2437 if (SYNTAX (prevc) != Sword)
2438 some_nonuppercase_initial = 1;
2441 prevc = c;
2444 /* Convert to all caps if the old text is all caps
2445 and has at least one multiletter word. */
2446 if (! some_lowercase && some_multiletter_word)
2447 case_action = all_caps;
2448 /* Capitalize each word, if the old text has all capitalized words. */
2449 else if (!some_nonuppercase_initial && some_multiletter_word)
2450 case_action = cap_initial;
2451 else if (!some_nonuppercase_initial && some_uppercase)
2452 /* Should x -> yz, operating on X, give Yz or YZ?
2453 We'll assume the latter. */
2454 case_action = all_caps;
2455 else
2456 case_action = nochange;
2459 /* Do replacement in a string. */
2460 if (!NILP (string))
2462 Lisp_Object before, after;
2464 before = Fsubstring (string, make_number (0),
2465 make_number (search_regs.start[sub]));
2466 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2468 /* Substitute parts of the match into NEWTEXT
2469 if desired. */
2470 if (NILP (literal))
2472 int lastpos = 0;
2473 int lastpos_byte = 0;
2474 /* We build up the substituted string in ACCUM. */
2475 Lisp_Object accum;
2476 Lisp_Object middle;
2477 int length = SBYTES (newtext);
2479 accum = Qnil;
2481 for (pos_byte = 0, pos = 0; pos_byte < length;)
2483 int substart = -1;
2484 int subend = 0;
2485 int delbackslash = 0;
2487 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2489 if (c == '\\')
2491 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2493 if (c == '&')
2495 substart = search_regs.start[sub];
2496 subend = search_regs.end[sub];
2498 else if (c >= '1' && c <= '9')
2500 if (search_regs.start[c - '0'] >= 0
2501 && c <= search_regs.num_regs + '0')
2503 substart = search_regs.start[c - '0'];
2504 subend = search_regs.end[c - '0'];
2506 else
2508 /* If that subexp did not match,
2509 replace \\N with nothing. */
2510 substart = 0;
2511 subend = 0;
2514 else if (c == '\\')
2515 delbackslash = 1;
2516 else
2517 error ("Invalid use of `\\' in replacement text");
2519 if (substart >= 0)
2521 if (pos - 2 != lastpos)
2522 middle = substring_both (newtext, lastpos,
2523 lastpos_byte,
2524 pos - 2, pos_byte - 2);
2525 else
2526 middle = Qnil;
2527 accum = concat3 (accum, middle,
2528 Fsubstring (string,
2529 make_number (substart),
2530 make_number (subend)));
2531 lastpos = pos;
2532 lastpos_byte = pos_byte;
2534 else if (delbackslash)
2536 middle = substring_both (newtext, lastpos,
2537 lastpos_byte,
2538 pos - 1, pos_byte - 1);
2540 accum = concat2 (accum, middle);
2541 lastpos = pos;
2542 lastpos_byte = pos_byte;
2546 if (pos != lastpos)
2547 middle = substring_both (newtext, lastpos,
2548 lastpos_byte,
2549 pos, pos_byte);
2550 else
2551 middle = Qnil;
2553 newtext = concat2 (accum, middle);
2556 /* Do case substitution in NEWTEXT if desired. */
2557 if (case_action == all_caps)
2558 newtext = Fupcase (newtext);
2559 else if (case_action == cap_initial)
2560 newtext = Fupcase_initials (newtext);
2562 return concat3 (before, newtext, after);
2565 /* Record point, then move (quietly) to the start of the match. */
2566 if (PT >= search_regs.end[sub])
2567 opoint = PT - ZV;
2568 else if (PT > search_regs.start[sub])
2569 opoint = search_regs.end[sub] - ZV;
2570 else
2571 opoint = PT;
2573 /* If we want non-literal replacement,
2574 perform substitution on the replacement string. */
2575 if (NILP (literal))
2577 int length = SBYTES (newtext);
2578 unsigned char *substed;
2579 int substed_alloc_size, substed_len;
2580 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2581 int str_multibyte = STRING_MULTIBYTE (newtext);
2582 Lisp_Object rev_tbl;
2583 int really_changed = 0;
2585 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2586 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2587 make_number (0))
2588 : Qnil);
2590 substed_alloc_size = length * 2 + 100;
2591 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2592 substed_len = 0;
2594 /* Go thru NEWTEXT, producing the actual text to insert in
2595 SUBSTED while adjusting multibyteness to that of the current
2596 buffer. */
2598 for (pos_byte = 0, pos = 0; pos_byte < length;)
2600 unsigned char str[MAX_MULTIBYTE_LENGTH];
2601 unsigned char *add_stuff = NULL;
2602 int add_len = 0;
2603 int idx = -1;
2605 if (str_multibyte)
2607 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2608 if (!buf_multibyte)
2609 c = multibyte_char_to_unibyte (c, rev_tbl);
2611 else
2613 /* Note that we don't have to increment POS. */
2614 c = SREF (newtext, pos_byte++);
2615 if (buf_multibyte)
2616 c = unibyte_char_to_multibyte (c);
2619 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2620 or set IDX to a match index, which means put that part
2621 of the buffer text into SUBSTED. */
2623 if (c == '\\')
2625 really_changed = 1;
2627 if (str_multibyte)
2629 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2630 pos, pos_byte);
2631 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2632 c = multibyte_char_to_unibyte (c, rev_tbl);
2634 else
2636 c = SREF (newtext, pos_byte++);
2637 if (buf_multibyte)
2638 c = unibyte_char_to_multibyte (c);
2641 if (c == '&')
2642 idx = sub;
2643 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2645 if (search_regs.start[c - '0'] >= 1)
2646 idx = c - '0';
2648 else if (c == '\\')
2649 add_len = 1, add_stuff = "\\";
2650 else
2652 xfree (substed);
2653 error ("Invalid use of `\\' in replacement text");
2656 else
2658 add_len = CHAR_STRING (c, str);
2659 add_stuff = str;
2662 /* If we want to copy part of a previous match,
2663 set up ADD_STUFF and ADD_LEN to point to it. */
2664 if (idx >= 0)
2666 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2667 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2668 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2669 move_gap (search_regs.start[idx]);
2670 add_stuff = BYTE_POS_ADDR (begbyte);
2673 /* Now the stuff we want to add to SUBSTED
2674 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2676 /* Make sure SUBSTED is big enough. */
2677 if (substed_len + add_len >= substed_alloc_size)
2679 substed_alloc_size = substed_len + add_len + 500;
2680 substed = (unsigned char *) xrealloc (substed,
2681 substed_alloc_size + 1);
2684 /* Now add to the end of SUBSTED. */
2685 if (add_stuff)
2687 bcopy (add_stuff, substed + substed_len, add_len);
2688 substed_len += add_len;
2692 if (really_changed)
2694 if (buf_multibyte)
2696 int nchars = multibyte_chars_in_text (substed, substed_len);
2698 newtext = make_multibyte_string (substed, nchars, substed_len);
2700 else
2701 newtext = make_unibyte_string (substed, substed_len);
2703 xfree (substed);
2706 /* Replace the old text with the new in the cleanest possible way. */
2707 replace_range (search_regs.start[sub], search_regs.end[sub],
2708 newtext, 1, 0, 1);
2709 newpoint = search_regs.start[sub] + SCHARS (newtext);
2711 if (case_action == all_caps)
2712 Fupcase_region (make_number (search_regs.start[sub]),
2713 make_number (newpoint));
2714 else if (case_action == cap_initial)
2715 Fupcase_initials_region (make_number (search_regs.start[sub]),
2716 make_number (newpoint));
2718 /* Adjust search data for this change. */
2720 int oldend = search_regs.end[sub];
2721 int oldstart = search_regs.start[sub];
2722 int change = newpoint - search_regs.end[sub];
2723 int i;
2725 for (i = 0; i < search_regs.num_regs; i++)
2727 if (search_regs.start[i] >= oldend)
2728 search_regs.start[i] += change;
2729 else if (search_regs.start[i] > oldstart)
2730 search_regs.start[i] = oldstart;
2731 if (search_regs.end[i] >= oldend)
2732 search_regs.end[i] += change;
2733 else if (search_regs.end[i] > oldstart)
2734 search_regs.end[i] = oldstart;
2738 /* Put point back where it was in the text. */
2739 if (opoint <= 0)
2740 TEMP_SET_PT (opoint + ZV);
2741 else
2742 TEMP_SET_PT (opoint);
2744 /* Now move point "officially" to the start of the inserted replacement. */
2745 move_if_not_intangible (newpoint);
2747 return Qnil;
2750 static Lisp_Object
2751 match_limit (num, beginningp)
2752 Lisp_Object num;
2753 int beginningp;
2755 register int n;
2757 CHECK_NUMBER (num);
2758 n = XINT (num);
2759 if (n < 0)
2760 args_out_of_range (num, make_number (0));
2761 if (search_regs.num_regs <= 0)
2762 error ("No match data, because no search succeeded");
2763 if (n >= search_regs.num_regs
2764 || search_regs.start[n] < 0)
2765 return Qnil;
2766 return (make_number ((beginningp) ? search_regs.start[n]
2767 : search_regs.end[n]));
2770 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2771 doc: /* Return position of start of text matched by last search.
2772 SUBEXP, a number, specifies which parenthesized expression in the last
2773 regexp.
2774 Value is nil if SUBEXPth pair didn't match, or there were less than
2775 SUBEXP pairs.
2776 Zero means the entire text matched by the whole regexp or whole string. */)
2777 (subexp)
2778 Lisp_Object subexp;
2780 return match_limit (subexp, 1);
2783 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2784 doc: /* Return position of end of text matched by last search.
2785 SUBEXP, a number, specifies which parenthesized expression in the last
2786 regexp.
2787 Value is nil if SUBEXPth pair didn't match, or there were less than
2788 SUBEXP pairs.
2789 Zero means the entire text matched by the whole regexp or whole string. */)
2790 (subexp)
2791 Lisp_Object subexp;
2793 return match_limit (subexp, 0);
2796 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2797 doc: /* Return a list containing all info on what the last search matched.
2798 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2799 All the elements are markers or nil (nil if the Nth pair didn't match)
2800 if the last match was on a buffer; integers or nil if a string was matched.
2801 Use `store-match-data' to reinstate the data in this list.
2803 If INTEGERS (the optional first argument) is non-nil, always use
2804 integers \(rather than markers) to represent buffer positions. In
2805 this case, and if the last match was in a buffer, the buffer will get
2806 stored as one additional element at the end of the list.
2808 If REUSE is a list, reuse it as part of the value. If REUSE is long
2809 enough to hold all the values, and if INTEGERS is non-nil, no consing
2810 is done.
2812 If optional third arg RESEAT is non-nil, any previous markers on the
2813 REUSE list will be modified to point to nowhere.
2815 Return value is undefined if the last search failed. */)
2816 (integers, reuse, reseat)
2817 Lisp_Object integers, reuse, reseat;
2819 Lisp_Object tail, prev;
2820 Lisp_Object *data;
2821 int i, len;
2823 if (!NILP (reseat))
2824 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2825 if (MARKERP (XCAR (tail)))
2827 unchain_marker (XMARKER (XCAR (tail)));
2828 XSETCAR (tail, Qnil);
2831 if (NILP (last_thing_searched))
2832 return Qnil;
2834 prev = Qnil;
2836 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2837 * sizeof (Lisp_Object));
2839 len = 0;
2840 for (i = 0; i < search_regs.num_regs; i++)
2842 int start = search_regs.start[i];
2843 if (start >= 0)
2845 if (EQ (last_thing_searched, Qt)
2846 || ! NILP (integers))
2848 XSETFASTINT (data[2 * i], start);
2849 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2851 else if (BUFFERP (last_thing_searched))
2853 data[2 * i] = Fmake_marker ();
2854 Fset_marker (data[2 * i],
2855 make_number (start),
2856 last_thing_searched);
2857 data[2 * i + 1] = Fmake_marker ();
2858 Fset_marker (data[2 * i + 1],
2859 make_number (search_regs.end[i]),
2860 last_thing_searched);
2862 else
2863 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2864 abort ();
2866 len = 2 * i + 2;
2868 else
2869 data[2 * i] = data[2 * i + 1] = Qnil;
2872 if (BUFFERP (last_thing_searched) && !NILP (integers))
2874 data[len] = last_thing_searched;
2875 len++;
2878 /* If REUSE is not usable, cons up the values and return them. */
2879 if (! CONSP (reuse))
2880 return Flist (len, data);
2882 /* If REUSE is a list, store as many value elements as will fit
2883 into the elements of REUSE. */
2884 for (i = 0, tail = reuse; CONSP (tail);
2885 i++, tail = XCDR (tail))
2887 if (i < len)
2888 XSETCAR (tail, data[i]);
2889 else
2890 XSETCAR (tail, Qnil);
2891 prev = tail;
2894 /* If we couldn't fit all value elements into REUSE,
2895 cons up the rest of them and add them to the end of REUSE. */
2896 if (i < len)
2897 XSETCDR (prev, Flist (len - i, data + i));
2899 return reuse;
2902 /* We used to have an internal use variant of `reseat' described as:
2904 If RESEAT is `evaporate', put the markers back on the free list
2905 immediately. No other references to the markers must exist in this
2906 case, so it is used only internally on the unwind stack and
2907 save-match-data from Lisp.
2909 But it was ill-conceived: those supposedly-internal markers get exposed via
2910 the undo-list, so freeing them here is unsafe. */
2912 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2913 doc: /* Set internal data on last search match from elements of LIST.
2914 LIST should have been created by calling `match-data' previously.
2916 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2917 (list, reseat)
2918 register Lisp_Object list, reseat;
2920 register int i;
2921 register Lisp_Object marker;
2923 if (running_asynch_code)
2924 save_search_regs ();
2926 CHECK_LIST (list);
2928 /* Unless we find a marker with a buffer or an explicit buffer
2929 in LIST, assume that this match data came from a string. */
2930 last_thing_searched = Qt;
2932 /* Allocate registers if they don't already exist. */
2934 int length = XFASTINT (Flength (list)) / 2;
2936 if (length > search_regs.num_regs)
2938 if (search_regs.num_regs == 0)
2940 search_regs.start
2941 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2942 search_regs.end
2943 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2945 else
2947 search_regs.start
2948 = (regoff_t *) xrealloc (search_regs.start,
2949 length * sizeof (regoff_t));
2950 search_regs.end
2951 = (regoff_t *) xrealloc (search_regs.end,
2952 length * sizeof (regoff_t));
2955 for (i = search_regs.num_regs; i < length; i++)
2956 search_regs.start[i] = -1;
2958 search_regs.num_regs = length;
2961 for (i = 0; CONSP (list); i++)
2963 marker = XCAR (list);
2964 if (BUFFERP (marker))
2966 last_thing_searched = marker;
2967 break;
2969 if (i >= length)
2970 break;
2971 if (NILP (marker))
2973 search_regs.start[i] = -1;
2974 list = XCDR (list);
2976 else
2978 int from;
2979 Lisp_Object m;
2981 m = marker;
2982 if (MARKERP (marker))
2984 if (XMARKER (marker)->buffer == 0)
2985 XSETFASTINT (marker, 0);
2986 else
2987 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2990 CHECK_NUMBER_COERCE_MARKER (marker);
2991 from = XINT (marker);
2993 if (!NILP (reseat) && MARKERP (m))
2995 unchain_marker (XMARKER (m));
2996 XSETCAR (list, Qnil);
2999 if ((list = XCDR (list), !CONSP (list)))
3000 break;
3002 m = marker = XCAR (list);
3004 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3005 XSETFASTINT (marker, 0);
3007 CHECK_NUMBER_COERCE_MARKER (marker);
3008 search_regs.start[i] = from;
3009 search_regs.end[i] = XINT (marker);
3011 if (!NILP (reseat) && MARKERP (m))
3013 unchain_marker (XMARKER (m));
3014 XSETCAR (list, Qnil);
3017 list = XCDR (list);
3020 for (; i < search_regs.num_regs; i++)
3021 search_regs.start[i] = -1;
3024 return Qnil;
3027 /* If non-zero the match data have been saved in saved_search_regs
3028 during the execution of a sentinel or filter. */
3029 static int search_regs_saved;
3030 static struct re_registers saved_search_regs;
3031 static Lisp_Object saved_last_thing_searched;
3033 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3034 if asynchronous code (filter or sentinel) is running. */
3035 static void
3036 save_search_regs ()
3038 if (!search_regs_saved)
3040 saved_search_regs.num_regs = search_regs.num_regs;
3041 saved_search_regs.start = search_regs.start;
3042 saved_search_regs.end = search_regs.end;
3043 saved_last_thing_searched = last_thing_searched;
3044 last_thing_searched = Qnil;
3045 search_regs.num_regs = 0;
3046 search_regs.start = 0;
3047 search_regs.end = 0;
3049 search_regs_saved = 1;
3053 /* Called upon exit from filters and sentinels. */
3054 void
3055 restore_search_regs ()
3057 if (search_regs_saved)
3059 if (search_regs.num_regs > 0)
3061 xfree (search_regs.start);
3062 xfree (search_regs.end);
3064 search_regs.num_regs = saved_search_regs.num_regs;
3065 search_regs.start = saved_search_regs.start;
3066 search_regs.end = saved_search_regs.end;
3067 last_thing_searched = saved_last_thing_searched;
3068 saved_last_thing_searched = Qnil;
3069 search_regs_saved = 0;
3073 static Lisp_Object
3074 unwind_set_match_data (list)
3075 Lisp_Object list;
3077 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3078 return Fset_match_data (list, Qt);
3081 /* Called to unwind protect the match data. */
3082 void
3083 record_unwind_save_match_data ()
3085 record_unwind_protect (unwind_set_match_data,
3086 Fmatch_data (Qnil, Qnil, Qnil));
3089 /* Quote a string to inactivate reg-expr chars */
3091 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3092 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3093 (string)
3094 Lisp_Object string;
3096 register unsigned char *in, *out, *end;
3097 register unsigned char *temp;
3098 int backslashes_added = 0;
3100 CHECK_STRING (string);
3102 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3104 /* Now copy the data into the new string, inserting escapes. */
3106 in = SDATA (string);
3107 end = in + SBYTES (string);
3108 out = temp;
3110 for (; in != end; in++)
3112 if (*in == '['
3113 || *in == '*' || *in == '.' || *in == '\\'
3114 || *in == '?' || *in == '+'
3115 || *in == '^' || *in == '$')
3116 *out++ = '\\', backslashes_added++;
3117 *out++ = *in;
3120 return make_specified_string (temp,
3121 SCHARS (string) + backslashes_added,
3122 out - temp,
3123 STRING_MULTIBYTE (string));
3126 void
3127 syms_of_search ()
3129 register int i;
3131 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3133 searchbufs[i].buf.allocated = 100;
3134 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3135 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3136 searchbufs[i].regexp = Qnil;
3137 searchbufs[i].whitespace_regexp = Qnil;
3138 searchbufs[i].syntax_table = Qnil;
3139 staticpro (&searchbufs[i].regexp);
3140 staticpro (&searchbufs[i].whitespace_regexp);
3141 staticpro (&searchbufs[i].syntax_table);
3142 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3144 searchbuf_head = &searchbufs[0];
3146 Qsearch_failed = intern ("search-failed");
3147 staticpro (&Qsearch_failed);
3148 Qinvalid_regexp = intern ("invalid-regexp");
3149 staticpro (&Qinvalid_regexp);
3151 Fput (Qsearch_failed, Qerror_conditions,
3152 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3153 Fput (Qsearch_failed, Qerror_message,
3154 build_string ("Search failed"));
3156 Fput (Qinvalid_regexp, Qerror_conditions,
3157 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3158 Fput (Qinvalid_regexp, Qerror_message,
3159 build_string ("Invalid regexp"));
3161 last_thing_searched = Qnil;
3162 staticpro (&last_thing_searched);
3164 saved_last_thing_searched = Qnil;
3165 staticpro (&saved_last_thing_searched);
3167 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3168 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3169 Some commands use this for user-specified regexps.
3170 Spaces that occur inside character classes or repetition operators
3171 or other such regexp constructs are not replaced with this.
3172 A value of nil (which is the normal value) means treat spaces literally. */);
3173 Vsearch_spaces_regexp = Qnil;
3175 defsubr (&Slooking_at);
3176 defsubr (&Sposix_looking_at);
3177 defsubr (&Sstring_match);
3178 defsubr (&Sposix_string_match);
3179 defsubr (&Ssearch_forward);
3180 defsubr (&Ssearch_backward);
3181 defsubr (&Sword_search_forward);
3182 defsubr (&Sword_search_backward);
3183 defsubr (&Sre_search_forward);
3184 defsubr (&Sre_search_backward);
3185 defsubr (&Sposix_search_forward);
3186 defsubr (&Sposix_search_backward);
3187 defsubr (&Sreplace_match);
3188 defsubr (&Smatch_beginning);
3189 defsubr (&Smatch_end);
3190 defsubr (&Smatch_data);
3191 defsubr (&Sset_match_data);
3192 defsubr (&Sregexp_quote);
3195 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3196 (do not change this comment) */