Use proper configure option `--without-file-notification'.
[emacs.git] / src / search.c
blob40ab5db495ad85948d071407bc2528b5f570332f
1 /* String search routines for GNU Emacs.
3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2014 Free Software
4 Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
22 #include <config.h>
24 #include "lisp.h"
25 #include "category.h"
26 #include "character.h"
27 #include "buffer.h"
28 #include "syntax.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
35 #include <sys/types.h>
36 #include "regex.h"
38 #define REGEXP_CACHE_SIZE 20
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* True means regexp was compiled to do full POSIX backtracking. */
53 bool posix;
56 /* The instances of that struct. */
57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59 /* The head of the linked list; points to the most recently used buffer. */
60 static struct regexp_cache *searchbuf_head;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
87 /* Error condition signaled when regexp compile_pattern fails. */
88 static Lisp_Object Qinvalid_regexp;
90 /* Error condition used for failing searches. */
91 static Lisp_Object Qsearch_failed;
93 static void set_search_regs (ptrdiff_t, ptrdiff_t);
94 static void save_search_regs (void);
95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
96 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
97 ptrdiff_t, ptrdiff_t);
98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
99 Lisp_Object, Lisp_Object, ptrdiff_t,
100 ptrdiff_t, int);
101 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
102 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
103 Lisp_Object, Lisp_Object, bool);
105 static _Noreturn void
106 matcher_overflow (void)
108 error ("Stack overflow in regexp matcher");
111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
112 PATTERN is the pattern to compile.
113 CP is the place to put the result.
114 TRANSLATE is a translation table for ignoring case, or nil for none.
115 POSIX is true if we want full backtracking (POSIX style) for this pattern.
116 False means backtrack only enough to get a valid match.
118 The behavior also depends on Vsearch_spaces_regexp. */
120 static void
121 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
122 Lisp_Object translate, bool posix)
124 char *val;
125 reg_syntax_t old;
127 cp->regexp = Qnil;
128 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
129 cp->posix = posix;
130 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
131 cp->buf.charset_unibyte = charset_unibyte;
132 if (STRINGP (Vsearch_spaces_regexp))
133 cp->whitespace_regexp = Vsearch_spaces_regexp;
134 else
135 cp->whitespace_regexp = Qnil;
137 /* rms: I think BLOCK_INPUT is not needed here any more,
138 because regex.c defines malloc to call xmalloc.
139 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
140 So let's turn it off. */
141 /* BLOCK_INPUT; */
142 old = re_set_syntax (RE_SYNTAX_EMACS
143 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
145 if (STRINGP (Vsearch_spaces_regexp))
146 re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
147 else
148 re_set_whitespace_regexp (NULL);
150 val = (char *) re_compile_pattern (SSDATA (pattern),
151 SBYTES (pattern), &cp->buf);
153 /* If the compiled pattern hard codes some of the contents of the
154 syntax-table, it can only be reused with *this* syntax table. */
155 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
157 re_set_whitespace_regexp (NULL);
159 re_set_syntax (old);
160 /* unblock_input (); */
161 if (val)
162 xsignal1 (Qinvalid_regexp, build_string (val));
164 cp->regexp = Fcopy_sequence (pattern);
167 /* Shrink each compiled regexp buffer in the cache
168 to the size actually used right now.
169 This is called from garbage collection. */
171 void
172 shrink_regexp_cache (void)
174 struct regexp_cache *cp;
176 for (cp = searchbuf_head; cp != 0; cp = cp->next)
178 cp->buf.allocated = cp->buf.used;
179 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
183 /* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
188 void
189 clear_regexp_cache (void)
191 int i;
193 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
194 /* It's tempting to compare with the syntax-table we've actually changed,
195 but it's not sufficient because char-table inheritance means that
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs[i].syntax_table, Qt))
198 searchbufs[i].regexp = Qnil;
201 /* Compile a regexp if necessary, but first check to see if there's one in
202 the cache.
203 PATTERN is the pattern to compile.
204 TRANSLATE is a translation table for ignoring case, or nil for none.
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
209 POSIX is true if we want full backtracking (POSIX style) for this pattern.
210 False means backtrack only enough to get a valid match. */
212 struct re_pattern_buffer *
213 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
214 Lisp_Object translate, bool posix, bool multibyte)
216 struct regexp_cache *cp, **cpp;
218 for (cpp = &searchbuf_head; ; cpp = &cp->next)
220 cp = *cpp;
221 /* Entries are initialized to nil, and may be set to nil by
222 compile_pattern_1 if the pattern isn't valid. Don't apply
223 string accessors in those cases. However, compile_pattern_1
224 is only applied to the cache entry we pick here to reuse. So
225 nil should never appear before a non-nil entry. */
226 if (NILP (cp->regexp))
227 goto compile_it;
228 if (SCHARS (cp->regexp) == SCHARS (pattern)
229 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
230 && !NILP (Fstring_equal (cp->regexp, pattern))
231 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
232 && cp->posix == posix
233 && (EQ (cp->syntax_table, Qt)
234 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
235 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
236 && cp->buf.charset_unibyte == charset_unibyte)
237 break;
239 /* If we're at the end of the cache, compile into the nil cell
240 we found, or the last (least recently used) cell with a
241 string value. */
242 if (cp->next == 0)
244 compile_it:
245 compile_pattern_1 (cp, pattern, translate, posix);
246 break;
250 /* When we get here, cp (aka *cpp) contains the compiled pattern,
251 either because we found it in the cache or because we just compiled it.
252 Move it to the front of the queue to mark it as most recently used. */
253 *cpp = cp->next;
254 cp->next = searchbuf_head;
255 searchbuf_head = cp;
257 /* Advise the searching functions about the space we have allocated
258 for register data. */
259 if (regp)
260 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
262 /* The compiled pattern can be used both for multibyte and unibyte
263 target. But, we have to tell which the pattern is used for. */
264 cp->buf.target_multibyte = multibyte;
266 return &cp->buf;
270 static Lisp_Object
271 looking_at_1 (Lisp_Object string, bool posix)
273 Lisp_Object val;
274 unsigned char *p1, *p2;
275 ptrdiff_t s1, s2;
276 register ptrdiff_t i;
277 struct re_pattern_buffer *bufp;
279 if (running_asynch_code)
280 save_search_regs ();
282 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
283 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
284 BVAR (current_buffer, case_eqv_table));
286 CHECK_STRING (string);
287 bufp = compile_pattern (string,
288 (NILP (Vinhibit_changing_match_data)
289 ? &search_regs : NULL),
290 (!NILP (BVAR (current_buffer, case_fold_search))
291 ? BVAR (current_buffer, case_canon_table) : Qnil),
292 posix,
293 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
295 immediate_quit = 1;
296 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
298 /* Get pointers and sizes of the two strings
299 that make up the visible portion of the buffer. */
301 p1 = BEGV_ADDR;
302 s1 = GPT_BYTE - BEGV_BYTE;
303 p2 = GAP_END_ADDR;
304 s2 = ZV_BYTE - GPT_BYTE;
305 if (s1 < 0)
307 p2 = p1;
308 s2 = ZV_BYTE - BEGV_BYTE;
309 s1 = 0;
311 if (s2 < 0)
313 s1 = ZV_BYTE - BEGV_BYTE;
314 s2 = 0;
317 re_match_object = Qnil;
319 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
320 PT_BYTE - BEGV_BYTE,
321 (NILP (Vinhibit_changing_match_data)
322 ? &search_regs : NULL),
323 ZV_BYTE - BEGV_BYTE);
324 immediate_quit = 0;
326 if (i == -2)
327 matcher_overflow ();
329 val = (i >= 0 ? Qt : Qnil);
330 if (NILP (Vinhibit_changing_match_data) && i >= 0)
332 for (i = 0; i < search_regs.num_regs; i++)
333 if (search_regs.start[i] >= 0)
335 search_regs.start[i]
336 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
337 search_regs.end[i]
338 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
340 /* Set last_thing_searched only when match data is changed. */
341 XSETBUFFER (last_thing_searched, current_buffer);
344 return val;
347 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
348 doc: /* Return t if text after point matches regular expression REGEXP.
349 This function modifies the match data that `match-beginning',
350 `match-end' and `match-data' access; save and restore the match
351 data if you want to preserve them. */)
352 (Lisp_Object regexp)
354 return looking_at_1 (regexp, 0);
357 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
358 doc: /* Return t if text after point matches regular expression REGEXP.
359 Find the longest match, in accord with Posix regular expression rules.
360 This function modifies the match data that `match-beginning',
361 `match-end' and `match-data' access; save and restore the match
362 data if you want to preserve them. */)
363 (Lisp_Object regexp)
365 return looking_at_1 (regexp, 1);
368 static Lisp_Object
369 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
370 bool posix)
372 ptrdiff_t val;
373 struct re_pattern_buffer *bufp;
374 EMACS_INT pos;
375 ptrdiff_t pos_byte, i;
377 if (running_asynch_code)
378 save_search_regs ();
380 CHECK_STRING (regexp);
381 CHECK_STRING (string);
383 if (NILP (start))
384 pos = 0, pos_byte = 0;
385 else
387 ptrdiff_t len = SCHARS (string);
389 CHECK_NUMBER (start);
390 pos = XINT (start);
391 if (pos < 0 && -pos <= len)
392 pos = len + pos;
393 else if (0 > pos || pos > len)
394 args_out_of_range (string, start);
395 pos_byte = string_char_to_byte (string, pos);
398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
399 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
400 BVAR (current_buffer, case_eqv_table));
402 bufp = compile_pattern (regexp,
403 (NILP (Vinhibit_changing_match_data)
404 ? &search_regs : NULL),
405 (!NILP (BVAR (current_buffer, case_fold_search))
406 ? BVAR (current_buffer, case_canon_table) : Qnil),
407 posix,
408 STRING_MULTIBYTE (string));
409 immediate_quit = 1;
410 re_match_object = string;
412 val = re_search (bufp, SSDATA (string),
413 SBYTES (string), pos_byte,
414 SBYTES (string) - pos_byte,
415 (NILP (Vinhibit_changing_match_data)
416 ? &search_regs : NULL));
417 immediate_quit = 0;
419 /* Set last_thing_searched only when match data is changed. */
420 if (NILP (Vinhibit_changing_match_data))
421 last_thing_searched = Qt;
423 if (val == -2)
424 matcher_overflow ();
425 if (val < 0) return Qnil;
427 if (NILP (Vinhibit_changing_match_data))
428 for (i = 0; i < search_regs.num_regs; i++)
429 if (search_regs.start[i] >= 0)
431 search_regs.start[i]
432 = string_byte_to_char (string, search_regs.start[i]);
433 search_regs.end[i]
434 = string_byte_to_char (string, search_regs.end[i]);
437 return make_number (string_byte_to_char (string, val));
440 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
441 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
442 Matching ignores case if `case-fold-search' is non-nil.
443 If third arg START is non-nil, start search at that index in STRING.
444 For index of first char beyond the match, do (match-end 0).
445 `match-end' and `match-beginning' also give indices of substrings
446 matched by parenthesis constructs in the pattern.
448 You can use the function `match-string' to extract the substrings
449 matched by the parenthesis constructions in REGEXP. */)
450 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
452 return string_match_1 (regexp, string, start, 0);
455 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
456 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
457 Find the longest match, in accord with Posix regular expression rules.
458 Case is ignored if `case-fold-search' is non-nil in the current buffer.
459 If third arg START is non-nil, start search at that index in STRING.
460 For index of first char beyond the match, do (match-end 0).
461 `match-end' and `match-beginning' also give indices of substrings
462 matched by parenthesis constructs in the pattern. */)
463 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
465 return string_match_1 (regexp, string, start, 1);
468 /* Match REGEXP against STRING, searching all of STRING,
469 and return the index of the match, or negative on failure.
470 This does not clobber the match data. */
472 ptrdiff_t
473 fast_string_match (Lisp_Object regexp, Lisp_Object string)
475 ptrdiff_t val;
476 struct re_pattern_buffer *bufp;
478 bufp = compile_pattern (regexp, 0, Qnil,
479 0, STRING_MULTIBYTE (string));
480 immediate_quit = 1;
481 re_match_object = string;
483 val = re_search (bufp, SSDATA (string),
484 SBYTES (string), 0,
485 SBYTES (string), 0);
486 immediate_quit = 0;
487 return val;
490 /* Match REGEXP against STRING, searching all of STRING ignoring case,
491 and return the index of the match, or negative on failure.
492 This does not clobber the match data.
493 We assume that STRING contains single-byte characters. */
495 ptrdiff_t
496 fast_c_string_match_ignore_case (Lisp_Object regexp,
497 const char *string, ptrdiff_t len)
499 ptrdiff_t val;
500 struct re_pattern_buffer *bufp;
502 regexp = string_make_unibyte (regexp);
503 re_match_object = Qt;
504 bufp = compile_pattern (regexp, 0,
505 Vascii_canon_table, 0,
507 immediate_quit = 1;
508 val = re_search (bufp, string, len, 0, len, 0);
509 immediate_quit = 0;
510 return val;
513 /* Like fast_string_match but ignore case. */
515 ptrdiff_t
516 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
518 ptrdiff_t val;
519 struct re_pattern_buffer *bufp;
521 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
522 0, STRING_MULTIBYTE (string));
523 immediate_quit = 1;
524 re_match_object = string;
526 val = re_search (bufp, SSDATA (string),
527 SBYTES (string), 0,
528 SBYTES (string), 0);
529 immediate_quit = 0;
530 return val;
533 /* Match REGEXP against the characters after POS to LIMIT, and return
534 the number of matched characters. If STRING is non-nil, match
535 against the characters in it. In that case, POS and LIMIT are
536 indices into the string. This function doesn't modify the match
537 data. */
539 ptrdiff_t
540 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
541 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
543 bool multibyte;
544 struct re_pattern_buffer *buf;
545 unsigned char *p1, *p2;
546 ptrdiff_t s1, s2;
547 ptrdiff_t len;
549 if (STRINGP (string))
551 if (pos_byte < 0)
552 pos_byte = string_char_to_byte (string, pos);
553 if (limit_byte < 0)
554 limit_byte = string_char_to_byte (string, limit);
555 p1 = NULL;
556 s1 = 0;
557 p2 = SDATA (string);
558 s2 = SBYTES (string);
559 re_match_object = string;
560 multibyte = STRING_MULTIBYTE (string);
562 else
564 if (pos_byte < 0)
565 pos_byte = CHAR_TO_BYTE (pos);
566 if (limit_byte < 0)
567 limit_byte = CHAR_TO_BYTE (limit);
568 pos_byte -= BEGV_BYTE;
569 limit_byte -= BEGV_BYTE;
570 p1 = BEGV_ADDR;
571 s1 = GPT_BYTE - BEGV_BYTE;
572 p2 = GAP_END_ADDR;
573 s2 = ZV_BYTE - GPT_BYTE;
574 if (s1 < 0)
576 p2 = p1;
577 s2 = ZV_BYTE - BEGV_BYTE;
578 s1 = 0;
580 if (s2 < 0)
582 s1 = ZV_BYTE - BEGV_BYTE;
583 s2 = 0;
585 re_match_object = Qnil;
586 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
589 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
590 immediate_quit = 1;
591 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
592 pos_byte, NULL, limit_byte);
593 immediate_quit = 0;
595 return len;
599 /* The newline cache: remembering which sections of text have no newlines. */
601 /* If the user has requested the long scans caching, make sure it's on.
602 Otherwise, make sure it's off.
603 This is our cheezy way of associating an action with the change of
604 state of a buffer-local variable. */
605 static struct region_cache *
606 newline_cache_on_off (struct buffer *buf)
608 struct buffer *base_buf = buf;
609 bool indirect_p = false;
611 if (buf->base_buffer)
613 base_buf = buf->base_buffer;
614 indirect_p = true;
617 /* Don't turn on or off the cache in the base buffer, if the value
618 of cache-long-scans of the base buffer is inconsistent with that.
619 This is because doing so will just make the cache pure overhead,
620 since if we turn it on via indirect buffer, it will be
621 immediately turned off by its base buffer. */
622 if (NILP (BVAR (buf, cache_long_scans)))
624 if (!indirect_p
625 || NILP (BVAR (base_buf, cache_long_scans)))
627 /* It should be off. */
628 if (base_buf->newline_cache)
630 free_region_cache (base_buf->newline_cache);
631 base_buf->newline_cache = 0;
634 return NULL;
636 else
638 if (!indirect_p
639 || !NILP (BVAR (base_buf, cache_long_scans)))
641 /* It should be on. */
642 if (base_buf->newline_cache == 0)
643 base_buf->newline_cache = new_region_cache ();
645 return base_buf->newline_cache;
650 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
652 If COUNT is positive, search forwards; END must be >= START.
653 If COUNT is negative, search backwards for the -COUNTth instance;
654 END must be <= START.
655 If COUNT is zero, do anything you please; run rogue, for all I care.
657 If END is zero, use BEGV or ZV instead, as appropriate for the
658 direction indicated by COUNT.
660 If we find COUNT instances, set *SHORTAGE to zero, and return the
661 position past the COUNTth match. Note that for reverse motion
662 this is not the same as the usual convention for Emacs motion commands.
664 If we don't find COUNT instances before reaching END, set *SHORTAGE
665 to the number of newlines left unfound, and return END.
667 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
668 to the returned character position.
670 If ALLOW_QUIT, set immediate_quit. That's good to do
671 except when inside redisplay. */
673 ptrdiff_t
674 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
675 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
676 ptrdiff_t *bytepos, bool allow_quit)
678 struct region_cache *newline_cache;
679 int direction;
680 struct buffer *cache_buffer;
682 if (count > 0)
684 direction = 1;
685 if (!end)
686 end = ZV, end_byte = ZV_BYTE;
688 else
690 direction = -1;
691 if (!end)
692 end = BEGV, end_byte = BEGV_BYTE;
694 if (end_byte == -1)
695 end_byte = CHAR_TO_BYTE (end);
697 newline_cache = newline_cache_on_off (current_buffer);
698 if (current_buffer->base_buffer)
699 cache_buffer = current_buffer->base_buffer;
700 else
701 cache_buffer = current_buffer;
703 if (shortage != 0)
704 *shortage = 0;
706 immediate_quit = allow_quit;
708 if (count > 0)
709 while (start != end)
711 /* Our innermost scanning loop is very simple; it doesn't know
712 about gaps, buffer ends, or the newline cache. ceiling is
713 the position of the last character before the next such
714 obstacle --- the last character the dumb search loop should
715 examine. */
716 ptrdiff_t tem, ceiling_byte = end_byte - 1;
718 /* If we're looking for a newline, consult the newline cache
719 to see where we can avoid some scanning. */
720 if (newline_cache)
722 ptrdiff_t next_change;
723 immediate_quit = 0;
724 while (region_cache_forward
725 (cache_buffer, newline_cache, start, &next_change))
726 start = next_change;
727 immediate_quit = allow_quit;
729 start_byte = CHAR_TO_BYTE (start);
731 /* START should never be after END. */
732 if (start_byte > ceiling_byte)
733 start_byte = ceiling_byte;
735 /* Now the text after start is an unknown region, and
736 next_change is the position of the next known region. */
737 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
739 else if (start_byte == -1)
740 start_byte = CHAR_TO_BYTE (start);
742 /* The dumb loop can only scan text stored in contiguous
743 bytes. BUFFER_CEILING_OF returns the last character
744 position that is contiguous, so the ceiling is the
745 position after that. */
746 tem = BUFFER_CEILING_OF (start_byte);
747 ceiling_byte = min (tem, ceiling_byte);
750 /* The termination address of the dumb loop. */
751 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
752 ptrdiff_t lim_byte = ceiling_byte + 1;
754 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
755 of the base, the cursor, and the next line. */
756 ptrdiff_t base = start_byte - lim_byte;
757 ptrdiff_t cursor, next;
759 for (cursor = base; cursor < 0; cursor = next)
761 /* The dumb loop. */
762 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
763 next = nl ? nl - lim_addr : 0;
765 /* If we're looking for newlines, cache the fact that
766 this line's region is free of them. */
767 if (newline_cache)
769 know_region_cache (cache_buffer, newline_cache,
770 BYTE_TO_CHAR (lim_byte + cursor),
771 BYTE_TO_CHAR (lim_byte + next));
772 /* know_region_cache can relocate buffer text. */
773 lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
776 if (! nl)
777 break;
778 next++;
780 if (--count == 0)
782 immediate_quit = 0;
783 if (bytepos)
784 *bytepos = lim_byte + next;
785 return BYTE_TO_CHAR (lim_byte + next);
789 start_byte = lim_byte;
790 start = BYTE_TO_CHAR (start_byte);
793 else
794 while (start > end)
796 /* The last character to check before the next obstacle. */
797 ptrdiff_t tem, ceiling_byte = end_byte;
799 /* Consult the newline cache, if appropriate. */
800 if (newline_cache)
802 ptrdiff_t next_change;
803 immediate_quit = 0;
804 while (region_cache_backward
805 (cache_buffer, newline_cache, start, &next_change))
806 start = next_change;
807 immediate_quit = allow_quit;
809 start_byte = CHAR_TO_BYTE (start);
811 /* Start should never be at or before end. */
812 if (start_byte <= ceiling_byte)
813 start_byte = ceiling_byte + 1;
815 /* Now the text before start is an unknown region, and
816 next_change is the position of the next known region. */
817 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
819 else if (start_byte == -1)
820 start_byte = CHAR_TO_BYTE (start);
822 /* Stop scanning before the gap. */
823 tem = BUFFER_FLOOR_OF (start_byte - 1);
824 ceiling_byte = max (tem, ceiling_byte);
827 /* The termination address of the dumb loop. */
828 unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
830 /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
831 the base, the cursor, and the previous line. These
832 offsets are at least -1. */
833 ptrdiff_t base = start_byte - ceiling_byte;
834 ptrdiff_t cursor, prev;
836 for (cursor = base; 0 < cursor; cursor = prev)
838 unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
839 prev = nl ? nl - ceiling_addr : -1;
841 /* If we're looking for newlines, cache the fact that
842 this line's region is free of them. */
843 if (newline_cache)
845 know_region_cache (cache_buffer, newline_cache,
846 BYTE_TO_CHAR (ceiling_byte + prev + 1),
847 BYTE_TO_CHAR (ceiling_byte + cursor));
848 /* know_region_cache can relocate buffer text. */
849 ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
852 if (! nl)
853 break;
855 if (++count >= 0)
857 immediate_quit = 0;
858 if (bytepos)
859 *bytepos = ceiling_byte + prev + 1;
860 return BYTE_TO_CHAR (ceiling_byte + prev + 1);
864 start_byte = ceiling_byte;
865 start = BYTE_TO_CHAR (start_byte);
869 immediate_quit = 0;
870 if (shortage)
871 *shortage = count * direction;
872 if (bytepos)
874 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
875 eassert (*bytepos == CHAR_TO_BYTE (start));
877 return start;
880 /* Search for COUNT instances of a line boundary.
881 Start at START. If COUNT is negative, search backwards.
883 We report the resulting position by calling TEMP_SET_PT_BOTH.
885 If we find COUNT instances. we position after (always after,
886 even if scanning backwards) the COUNTth match, and return 0.
888 If we don't find COUNT instances before reaching the end of the
889 buffer (or the beginning, if scanning backwards), we return
890 the number of line boundaries left unfound, and position at
891 the limit we bumped up against.
893 If ALLOW_QUIT, set immediate_quit. That's good to do
894 except in special cases. */
896 ptrdiff_t
897 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
898 ptrdiff_t limit, ptrdiff_t limit_byte,
899 ptrdiff_t count, bool allow_quit)
901 ptrdiff_t charpos, bytepos, shortage;
903 charpos = find_newline (start, start_byte, limit, limit_byte,
904 count, &shortage, &bytepos, allow_quit);
905 if (shortage)
906 TEMP_SET_PT_BOTH (limit, limit_byte);
907 else
908 TEMP_SET_PT_BOTH (charpos, bytepos);
909 return shortage;
912 /* Like find_newline, but doesn't allow QUITting and doesn't return
913 SHORTAGE. */
914 ptrdiff_t
915 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
916 ptrdiff_t cnt, ptrdiff_t *bytepos)
918 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
921 /* Like find_newline, but returns position before the newline, not
922 after, and only search up to TO.
923 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
925 ptrdiff_t
926 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
927 ptrdiff_t cnt, ptrdiff_t *bytepos)
929 ptrdiff_t shortage;
930 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
932 if (shortage == 0)
934 if (bytepos)
935 DEC_BOTH (pos, *bytepos);
936 else
937 pos--;
939 return pos;
942 /* Subroutines of Lisp buffer search functions. */
944 static Lisp_Object
945 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
946 Lisp_Object count, int direction, int RE, bool posix)
948 EMACS_INT np;
949 EMACS_INT lim;
950 ptrdiff_t lim_byte;
951 EMACS_INT n = direction;
953 if (!NILP (count))
955 CHECK_NUMBER (count);
956 n *= XINT (count);
959 CHECK_STRING (string);
960 if (NILP (bound))
962 if (n > 0)
963 lim = ZV, lim_byte = ZV_BYTE;
964 else
965 lim = BEGV, lim_byte = BEGV_BYTE;
967 else
969 CHECK_NUMBER_COERCE_MARKER (bound);
970 lim = XINT (bound);
971 if (n > 0 ? lim < PT : lim > PT)
972 error ("Invalid search bound (wrong side of point)");
973 if (lim > ZV)
974 lim = ZV, lim_byte = ZV_BYTE;
975 else if (lim < BEGV)
976 lim = BEGV, lim_byte = BEGV_BYTE;
977 else
978 lim_byte = CHAR_TO_BYTE (lim);
981 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
982 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
983 BVAR (current_buffer, case_eqv_table));
985 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
986 (!NILP (BVAR (current_buffer, case_fold_search))
987 ? BVAR (current_buffer, case_canon_table)
988 : Qnil),
989 (!NILP (BVAR (current_buffer, case_fold_search))
990 ? BVAR (current_buffer, case_eqv_table)
991 : Qnil),
992 posix);
993 if (np <= 0)
995 if (NILP (noerror))
996 xsignal1 (Qsearch_failed, string);
998 if (!EQ (noerror, Qt))
1000 eassert (BEGV <= lim && lim <= ZV);
1001 SET_PT_BOTH (lim, lim_byte);
1002 return Qnil;
1003 #if 0 /* This would be clean, but maybe programs depend on
1004 a value of nil here. */
1005 np = lim;
1006 #endif
1008 else
1009 return Qnil;
1012 eassert (BEGV <= np && np <= ZV);
1013 SET_PT (np);
1015 return make_number (np);
1018 /* Return true if REGEXP it matches just one constant string. */
1020 static bool
1021 trivial_regexp_p (Lisp_Object regexp)
1023 ptrdiff_t len = SBYTES (regexp);
1024 unsigned char *s = SDATA (regexp);
1025 while (--len >= 0)
1027 switch (*s++)
1029 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1030 return 0;
1031 case '\\':
1032 if (--len < 0)
1033 return 0;
1034 switch (*s++)
1036 case '|': case '(': case ')': case '`': case '\'': case 'b':
1037 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1038 case 'S': case '=': case '{': case '}': case '_':
1039 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1040 case '1': case '2': case '3': case '4': case '5':
1041 case '6': case '7': case '8': case '9':
1042 return 0;
1046 return 1;
1049 /* Search for the n'th occurrence of STRING in the current buffer,
1050 starting at position POS and stopping at position LIM,
1051 treating STRING as a literal string if RE is false or as
1052 a regular expression if RE is true.
1054 If N is positive, searching is forward and LIM must be greater than POS.
1055 If N is negative, searching is backward and LIM must be less than POS.
1057 Returns -x if x occurrences remain to be found (x > 0),
1058 or else the position at the beginning of the Nth occurrence
1059 (if searching backward) or the end (if searching forward).
1061 POSIX is nonzero if we want full backtracking (POSIX style)
1062 for this pattern. 0 means backtrack only enough to get a valid match. */
1064 #define TRANSLATE(out, trt, d) \
1065 do \
1067 if (! NILP (trt)) \
1069 Lisp_Object temp; \
1070 temp = Faref (trt, make_number (d)); \
1071 if (INTEGERP (temp)) \
1072 out = XINT (temp); \
1073 else \
1074 out = d; \
1076 else \
1077 out = d; \
1079 while (0)
1081 /* Only used in search_buffer, to record the end position of the match
1082 when searching regexps and SEARCH_REGS should not be changed
1083 (i.e. Vinhibit_changing_match_data is non-nil). */
1084 static struct re_registers search_regs_1;
1086 static EMACS_INT
1087 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1088 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1089 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1091 ptrdiff_t len = SCHARS (string);
1092 ptrdiff_t len_byte = SBYTES (string);
1093 register ptrdiff_t i;
1095 if (running_asynch_code)
1096 save_search_regs ();
1098 /* Searching 0 times means don't move. */
1099 /* Null string is found at starting position. */
1100 if (len == 0 || n == 0)
1102 set_search_regs (pos_byte, 0);
1103 return pos;
1106 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1108 unsigned char *p1, *p2;
1109 ptrdiff_t s1, s2;
1110 struct re_pattern_buffer *bufp;
1112 bufp = compile_pattern (string,
1113 (NILP (Vinhibit_changing_match_data)
1114 ? &search_regs : &search_regs_1),
1115 trt, posix,
1116 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1118 immediate_quit = 1; /* Quit immediately if user types ^G,
1119 because letting this function finish
1120 can take too long. */
1121 QUIT; /* Do a pending quit right away,
1122 to avoid paradoxical behavior */
1123 /* Get pointers and sizes of the two strings
1124 that make up the visible portion of the buffer. */
1126 p1 = BEGV_ADDR;
1127 s1 = GPT_BYTE - BEGV_BYTE;
1128 p2 = GAP_END_ADDR;
1129 s2 = ZV_BYTE - GPT_BYTE;
1130 if (s1 < 0)
1132 p2 = p1;
1133 s2 = ZV_BYTE - BEGV_BYTE;
1134 s1 = 0;
1136 if (s2 < 0)
1138 s1 = ZV_BYTE - BEGV_BYTE;
1139 s2 = 0;
1141 re_match_object = Qnil;
1143 while (n < 0)
1145 ptrdiff_t val;
1147 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1148 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1149 (NILP (Vinhibit_changing_match_data)
1150 ? &search_regs : &search_regs_1),
1151 /* Don't allow match past current point */
1152 pos_byte - BEGV_BYTE);
1153 if (val == -2)
1155 matcher_overflow ();
1157 if (val >= 0)
1159 if (NILP (Vinhibit_changing_match_data))
1161 pos_byte = search_regs.start[0] + BEGV_BYTE;
1162 for (i = 0; i < search_regs.num_regs; i++)
1163 if (search_regs.start[i] >= 0)
1165 search_regs.start[i]
1166 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1167 search_regs.end[i]
1168 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1170 XSETBUFFER (last_thing_searched, current_buffer);
1171 /* Set pos to the new position. */
1172 pos = search_regs.start[0];
1174 else
1176 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1177 /* Set pos to the new position. */
1178 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1181 else
1183 immediate_quit = 0;
1184 return (n);
1186 n++;
1188 while (n > 0)
1190 ptrdiff_t val;
1192 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1193 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1194 (NILP (Vinhibit_changing_match_data)
1195 ? &search_regs : &search_regs_1),
1196 lim_byte - BEGV_BYTE);
1197 if (val == -2)
1199 matcher_overflow ();
1201 if (val >= 0)
1203 if (NILP (Vinhibit_changing_match_data))
1205 pos_byte = search_regs.end[0] + BEGV_BYTE;
1206 for (i = 0; i < search_regs.num_regs; i++)
1207 if (search_regs.start[i] >= 0)
1209 search_regs.start[i]
1210 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1211 search_regs.end[i]
1212 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1214 XSETBUFFER (last_thing_searched, current_buffer);
1215 pos = search_regs.end[0];
1217 else
1219 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1220 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1223 else
1225 immediate_quit = 0;
1226 return (0 - n);
1228 n--;
1230 immediate_quit = 0;
1231 return (pos);
1233 else /* non-RE case */
1235 unsigned char *raw_pattern, *pat;
1236 ptrdiff_t raw_pattern_size;
1237 ptrdiff_t raw_pattern_size_byte;
1238 unsigned char *patbuf;
1239 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1240 unsigned char *base_pat;
1241 /* Set to positive if we find a non-ASCII char that need
1242 translation. Otherwise set to zero later. */
1243 int char_base = -1;
1244 bool boyer_moore_ok = 1;
1246 /* MULTIBYTE says whether the text to be searched is multibyte.
1247 We must convert PATTERN to match that, or we will not really
1248 find things right. */
1250 if (multibyte == STRING_MULTIBYTE (string))
1252 raw_pattern = SDATA (string);
1253 raw_pattern_size = SCHARS (string);
1254 raw_pattern_size_byte = SBYTES (string);
1256 else if (multibyte)
1258 raw_pattern_size = SCHARS (string);
1259 raw_pattern_size_byte
1260 = count_size_as_multibyte (SDATA (string),
1261 raw_pattern_size);
1262 raw_pattern = alloca (raw_pattern_size_byte + 1);
1263 copy_text (SDATA (string), raw_pattern,
1264 SCHARS (string), 0, 1);
1266 else
1268 /* Converting multibyte to single-byte.
1270 ??? Perhaps this conversion should be done in a special way
1271 by subtracting nonascii-insert-offset from each non-ASCII char,
1272 so that only the multibyte chars which really correspond to
1273 the chosen single-byte character set can possibly match. */
1274 raw_pattern_size = SCHARS (string);
1275 raw_pattern_size_byte = SCHARS (string);
1276 raw_pattern = alloca (raw_pattern_size + 1);
1277 copy_text (SDATA (string), raw_pattern,
1278 SBYTES (string), 1, 0);
1281 /* Copy and optionally translate the pattern. */
1282 len = raw_pattern_size;
1283 len_byte = raw_pattern_size_byte;
1284 patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
1285 pat = patbuf;
1286 base_pat = raw_pattern;
1287 if (multibyte)
1289 /* Fill patbuf by translated characters in STRING while
1290 checking if we can use boyer-moore search. If TRT is
1291 non-nil, we can use boyer-moore search only if TRT can be
1292 represented by the byte array of 256 elements. For that,
1293 all non-ASCII case-equivalents of all case-sensitive
1294 characters in STRING must belong to the same character
1295 group (two characters belong to the same group iff their
1296 multibyte forms are the same except for the last byte;
1297 i.e. every 64 characters form a group; U+0000..U+003F,
1298 U+0040..U+007F, U+0080..U+00BF, ...). */
1300 while (--len >= 0)
1302 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1303 int c, translated, inverse;
1304 int in_charlen, charlen;
1306 /* If we got here and the RE flag is set, it's because we're
1307 dealing with a regexp known to be trivial, so the backslash
1308 just quotes the next character. */
1309 if (RE && *base_pat == '\\')
1311 len--;
1312 raw_pattern_size--;
1313 len_byte--;
1314 base_pat++;
1317 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1319 if (NILP (trt))
1321 str = base_pat;
1322 charlen = in_charlen;
1324 else
1326 /* Translate the character. */
1327 TRANSLATE (translated, trt, c);
1328 charlen = CHAR_STRING (translated, str_base);
1329 str = str_base;
1331 /* Check if C has any other case-equivalents. */
1332 TRANSLATE (inverse, inverse_trt, c);
1333 /* If so, check if we can use boyer-moore. */
1334 if (c != inverse && boyer_moore_ok)
1336 /* Check if all equivalents belong to the same
1337 group of characters. Note that the check of C
1338 itself is done by the last iteration. */
1339 int this_char_base = -1;
1341 while (boyer_moore_ok)
1343 if (ASCII_BYTE_P (inverse))
1345 if (this_char_base > 0)
1346 boyer_moore_ok = 0;
1347 else
1348 this_char_base = 0;
1350 else if (CHAR_BYTE8_P (inverse))
1351 /* Boyer-moore search can't handle a
1352 translation of an eight-bit
1353 character. */
1354 boyer_moore_ok = 0;
1355 else if (this_char_base < 0)
1357 this_char_base = inverse & ~0x3F;
1358 if (char_base < 0)
1359 char_base = this_char_base;
1360 else if (this_char_base != char_base)
1361 boyer_moore_ok = 0;
1363 else if ((inverse & ~0x3F) != this_char_base)
1364 boyer_moore_ok = 0;
1365 if (c == inverse)
1366 break;
1367 TRANSLATE (inverse, inverse_trt, inverse);
1372 /* Store this character into the translated pattern. */
1373 memcpy (pat, str, charlen);
1374 pat += charlen;
1375 base_pat += in_charlen;
1376 len_byte -= in_charlen;
1379 /* If char_base is still negative we didn't find any translated
1380 non-ASCII characters. */
1381 if (char_base < 0)
1382 char_base = 0;
1384 else
1386 /* Unibyte buffer. */
1387 char_base = 0;
1388 while (--len >= 0)
1390 int c, translated, inverse;
1392 /* If we got here and the RE flag is set, it's because we're
1393 dealing with a regexp known to be trivial, so the backslash
1394 just quotes the next character. */
1395 if (RE && *base_pat == '\\')
1397 len--;
1398 raw_pattern_size--;
1399 base_pat++;
1401 c = *base_pat++;
1402 TRANSLATE (translated, trt, c);
1403 *pat++ = translated;
1404 /* Check that none of C's equivalents violates the
1405 assumptions of boyer_moore. */
1406 TRANSLATE (inverse, inverse_trt, c);
1407 while (1)
1409 if (inverse >= 0200)
1411 boyer_moore_ok = 0;
1412 break;
1414 if (c == inverse)
1415 break;
1416 TRANSLATE (inverse, inverse_trt, inverse);
1421 len_byte = pat - patbuf;
1422 pat = base_pat = patbuf;
1424 if (boyer_moore_ok)
1425 return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1426 pos_byte, lim_byte,
1427 char_base);
1428 else
1429 return simple_search (n, pat, raw_pattern_size, len_byte, trt,
1430 pos, pos_byte, lim, lim_byte);
1434 /* Do a simple string search N times for the string PAT,
1435 whose length is LEN/LEN_BYTE,
1436 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1437 TRT is the translation table.
1439 Return the character position where the match is found.
1440 Otherwise, if M matches remained to be found, return -M.
1442 This kind of search works regardless of what is in PAT and
1443 regardless of what is in TRT. It is used in cases where
1444 boyer_moore cannot work. */
1446 static EMACS_INT
1447 simple_search (EMACS_INT n, unsigned char *pat,
1448 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1449 ptrdiff_t pos, ptrdiff_t pos_byte,
1450 ptrdiff_t lim, ptrdiff_t lim_byte)
1452 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1453 bool forward = n > 0;
1454 /* Number of buffer bytes matched. Note that this may be different
1455 from len_byte in a multibyte buffer. */
1456 ptrdiff_t match_byte = PTRDIFF_MIN;
1458 if (lim > pos && multibyte)
1459 while (n > 0)
1461 while (1)
1463 /* Try matching at position POS. */
1464 ptrdiff_t this_pos = pos;
1465 ptrdiff_t this_pos_byte = pos_byte;
1466 ptrdiff_t this_len = len;
1467 unsigned char *p = pat;
1468 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1469 goto stop;
1471 while (this_len > 0)
1473 int charlen, buf_charlen;
1474 int pat_ch, buf_ch;
1476 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1477 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1478 buf_charlen);
1479 TRANSLATE (buf_ch, trt, buf_ch);
1481 if (buf_ch != pat_ch)
1482 break;
1484 this_len--;
1485 p += charlen;
1487 this_pos_byte += buf_charlen;
1488 this_pos++;
1491 if (this_len == 0)
1493 match_byte = this_pos_byte - pos_byte;
1494 pos += len;
1495 pos_byte += match_byte;
1496 break;
1499 INC_BOTH (pos, pos_byte);
1502 n--;
1504 else if (lim > pos)
1505 while (n > 0)
1507 while (1)
1509 /* Try matching at position POS. */
1510 ptrdiff_t this_pos = pos;
1511 ptrdiff_t this_len = len;
1512 unsigned char *p = pat;
1514 if (pos + len > lim)
1515 goto stop;
1517 while (this_len > 0)
1519 int pat_ch = *p++;
1520 int buf_ch = FETCH_BYTE (this_pos);
1521 TRANSLATE (buf_ch, trt, buf_ch);
1523 if (buf_ch != pat_ch)
1524 break;
1526 this_len--;
1527 this_pos++;
1530 if (this_len == 0)
1532 match_byte = len;
1533 pos += len;
1534 break;
1537 pos++;
1540 n--;
1542 /* Backwards search. */
1543 else if (lim < pos && multibyte)
1544 while (n < 0)
1546 while (1)
1548 /* Try matching at position POS. */
1549 ptrdiff_t this_pos = pos;
1550 ptrdiff_t this_pos_byte = pos_byte;
1551 ptrdiff_t this_len = len;
1552 const unsigned char *p = pat + len_byte;
1554 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1555 goto stop;
1557 while (this_len > 0)
1559 int pat_ch, buf_ch;
1561 DEC_BOTH (this_pos, this_pos_byte);
1562 PREV_CHAR_BOUNDARY (p, pat);
1563 pat_ch = STRING_CHAR (p);
1564 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1565 TRANSLATE (buf_ch, trt, buf_ch);
1567 if (buf_ch != pat_ch)
1568 break;
1570 this_len--;
1573 if (this_len == 0)
1575 match_byte = pos_byte - this_pos_byte;
1576 pos = this_pos;
1577 pos_byte = this_pos_byte;
1578 break;
1581 DEC_BOTH (pos, pos_byte);
1584 n++;
1586 else if (lim < pos)
1587 while (n < 0)
1589 while (1)
1591 /* Try matching at position POS. */
1592 ptrdiff_t this_pos = pos - len;
1593 ptrdiff_t this_len = len;
1594 unsigned char *p = pat;
1596 if (this_pos < lim)
1597 goto stop;
1599 while (this_len > 0)
1601 int pat_ch = *p++;
1602 int buf_ch = FETCH_BYTE (this_pos);
1603 TRANSLATE (buf_ch, trt, buf_ch);
1605 if (buf_ch != pat_ch)
1606 break;
1607 this_len--;
1608 this_pos++;
1611 if (this_len == 0)
1613 match_byte = len;
1614 pos -= len;
1615 break;
1618 pos--;
1621 n++;
1624 stop:
1625 if (n == 0)
1627 eassert (match_byte != PTRDIFF_MIN);
1628 if (forward)
1629 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1630 else
1631 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1633 return pos;
1635 else if (n > 0)
1636 return -n;
1637 else
1638 return n;
1641 /* Do Boyer-Moore search N times for the string BASE_PAT,
1642 whose length is LEN_BYTE,
1643 from buffer position POS_BYTE until LIM_BYTE.
1644 DIRECTION says which direction we search in.
1645 TRT and INVERSE_TRT are translation tables.
1646 Characters in PAT are already translated by TRT.
1648 This kind of search works if all the characters in BASE_PAT that
1649 have nontrivial translation are the same aside from the last byte.
1650 This makes it possible to translate just the last byte of a
1651 character, and do so after just a simple test of the context.
1652 CHAR_BASE is nonzero if there is such a non-ASCII character.
1654 If that criterion is not satisfied, do not call this function. */
1656 static EMACS_INT
1657 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1658 ptrdiff_t len_byte,
1659 Lisp_Object trt, Lisp_Object inverse_trt,
1660 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1661 int char_base)
1663 int direction = ((n > 0) ? 1 : -1);
1664 register ptrdiff_t dirlen;
1665 ptrdiff_t limit;
1666 int stride_for_teases = 0;
1667 int BM_tab[0400];
1668 register unsigned char *cursor, *p_limit;
1669 register ptrdiff_t i;
1670 register int j;
1671 unsigned char *pat, *pat_end;
1672 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1674 unsigned char simple_translate[0400];
1675 /* These are set to the preceding bytes of a byte to be translated
1676 if char_base is nonzero. As the maximum byte length of a
1677 multibyte character is 5, we have to check at most four previous
1678 bytes. */
1679 int translate_prev_byte1 = 0;
1680 int translate_prev_byte2 = 0;
1681 int translate_prev_byte3 = 0;
1683 /* The general approach is that we are going to maintain that we know
1684 the first (closest to the present position, in whatever direction
1685 we're searching) character that could possibly be the last
1686 (furthest from present position) character of a valid match. We
1687 advance the state of our knowledge by looking at that character
1688 and seeing whether it indeed matches the last character of the
1689 pattern. If it does, we take a closer look. If it does not, we
1690 move our pointer (to putative last characters) as far as is
1691 logically possible. This amount of movement, which I call a
1692 stride, will be the length of the pattern if the actual character
1693 appears nowhere in the pattern, otherwise it will be the distance
1694 from the last occurrence of that character to the end of the
1695 pattern. If the amount is zero we have a possible match. */
1697 /* Here we make a "mickey mouse" BM table. The stride of the search
1698 is determined only by the last character of the putative match.
1699 If that character does not match, we will stride the proper
1700 distance to propose a match that superimposes it on the last
1701 instance of a character that matches it (per trt), or misses
1702 it entirely if there is none. */
1704 dirlen = len_byte * direction;
1706 /* Record position after the end of the pattern. */
1707 pat_end = base_pat + len_byte;
1708 /* BASE_PAT points to a character that we start scanning from.
1709 It is the first character in a forward search,
1710 the last character in a backward search. */
1711 if (direction < 0)
1712 base_pat = pat_end - 1;
1714 /* A character that does not appear in the pattern induces a
1715 stride equal to the pattern length. */
1716 for (i = 0; i < 0400; i++)
1717 BM_tab[i] = dirlen;
1719 /* We use this for translation, instead of TRT itself.
1720 We fill this in to handle the characters that actually
1721 occur in the pattern. Others don't matter anyway! */
1722 for (i = 0; i < 0400; i++)
1723 simple_translate[i] = i;
1725 if (char_base)
1727 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1728 byte following them are the target of translation. */
1729 unsigned char str[MAX_MULTIBYTE_LENGTH];
1730 int cblen = CHAR_STRING (char_base, str);
1732 translate_prev_byte1 = str[cblen - 2];
1733 if (cblen > 2)
1735 translate_prev_byte2 = str[cblen - 3];
1736 if (cblen > 3)
1737 translate_prev_byte3 = str[cblen - 4];
1741 i = 0;
1742 while (i != dirlen)
1744 unsigned char *ptr = base_pat + i;
1745 i += direction;
1746 if (! NILP (trt))
1748 /* If the byte currently looking at is the last of a
1749 character to check case-equivalents, set CH to that
1750 character. An ASCII character and a non-ASCII character
1751 matching with CHAR_BASE are to be checked. */
1752 int ch = -1;
1754 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1755 ch = *ptr;
1756 else if (char_base
1757 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1759 unsigned char *charstart = ptr - 1;
1761 while (! (CHAR_HEAD_P (*charstart)))
1762 charstart--;
1763 ch = STRING_CHAR (charstart);
1764 if (char_base != (ch & ~0x3F))
1765 ch = -1;
1768 if (ch >= 0200 && multibyte)
1769 j = (ch & 0x3F) | 0200;
1770 else
1771 j = *ptr;
1773 if (i == dirlen)
1774 stride_for_teases = BM_tab[j];
1776 BM_tab[j] = dirlen - i;
1777 /* A translation table is accompanied by its inverse -- see
1778 comment following downcase_table for details. */
1779 if (ch >= 0)
1781 int starting_ch = ch;
1782 int starting_j = j;
1784 while (1)
1786 TRANSLATE (ch, inverse_trt, ch);
1787 if (ch >= 0200 && multibyte)
1788 j = (ch & 0x3F) | 0200;
1789 else
1790 j = ch;
1792 /* For all the characters that map into CH,
1793 set up simple_translate to map the last byte
1794 into STARTING_J. */
1795 simple_translate[j] = starting_j;
1796 if (ch == starting_ch)
1797 break;
1798 BM_tab[j] = dirlen - i;
1802 else
1804 j = *ptr;
1806 if (i == dirlen)
1807 stride_for_teases = BM_tab[j];
1808 BM_tab[j] = dirlen - i;
1810 /* stride_for_teases tells how much to stride if we get a
1811 match on the far character but are subsequently
1812 disappointed, by recording what the stride would have been
1813 for that character if the last character had been
1814 different. */
1816 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1817 /* loop invariant - POS_BYTE points at where last char (first
1818 char if reverse) of pattern would align in a possible match. */
1819 while (n != 0)
1821 ptrdiff_t tail_end;
1822 unsigned char *tail_end_ptr;
1824 /* It's been reported that some (broken) compiler thinks that
1825 Boolean expressions in an arithmetic context are unsigned.
1826 Using an explicit ?1:0 prevents this. */
1827 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1828 < 0)
1829 return (n * (0 - direction));
1830 /* First we do the part we can by pointers (maybe nothing) */
1831 QUIT;
1832 pat = base_pat;
1833 limit = pos_byte - dirlen + direction;
1834 if (direction > 0)
1836 limit = BUFFER_CEILING_OF (limit);
1837 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1838 can take on without hitting edge of buffer or the gap. */
1839 limit = min (limit, pos_byte + 20000);
1840 limit = min (limit, lim_byte - 1);
1842 else
1844 limit = BUFFER_FLOOR_OF (limit);
1845 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1846 can take on without hitting edge of buffer or the gap. */
1847 limit = max (limit, pos_byte - 20000);
1848 limit = max (limit, lim_byte);
1850 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1851 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1853 if ((limit - pos_byte) * direction > 20)
1855 unsigned char *p2;
1857 p_limit = BYTE_POS_ADDR (limit);
1858 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1859 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1860 while (1) /* use one cursor setting as long as i can */
1862 if (direction > 0) /* worth duplicating */
1864 while (cursor <= p_limit)
1866 if (BM_tab[*cursor] == 0)
1867 goto hit;
1868 cursor += BM_tab[*cursor];
1871 else
1873 while (cursor >= p_limit)
1875 if (BM_tab[*cursor] == 0)
1876 goto hit;
1877 cursor += BM_tab[*cursor];
1880 /* If you are here, cursor is beyond the end of the
1881 searched region. You fail to match within the
1882 permitted region and would otherwise try a character
1883 beyond that region. */
1884 break;
1886 hit:
1887 i = dirlen - direction;
1888 if (! NILP (trt))
1890 while ((i -= direction) + direction != 0)
1892 int ch;
1893 cursor -= direction;
1894 /* Translate only the last byte of a character. */
1895 if (! multibyte
1896 || ((cursor == tail_end_ptr
1897 || CHAR_HEAD_P (cursor[1]))
1898 && (CHAR_HEAD_P (cursor[0])
1899 /* Check if this is the last byte of
1900 a translatable character. */
1901 || (translate_prev_byte1 == cursor[-1]
1902 && (CHAR_HEAD_P (translate_prev_byte1)
1903 || (translate_prev_byte2 == cursor[-2]
1904 && (CHAR_HEAD_P (translate_prev_byte2)
1905 || (translate_prev_byte3 == cursor[-3]))))))))
1906 ch = simple_translate[*cursor];
1907 else
1908 ch = *cursor;
1909 if (pat[i] != ch)
1910 break;
1913 else
1915 while ((i -= direction) + direction != 0)
1917 cursor -= direction;
1918 if (pat[i] != *cursor)
1919 break;
1922 cursor += dirlen - i - direction; /* fix cursor */
1923 if (i + direction == 0)
1925 ptrdiff_t position, start, end;
1927 cursor -= direction;
1929 position = pos_byte + cursor - p2 + ((direction > 0)
1930 ? 1 - len_byte : 0);
1931 set_search_regs (position, len_byte);
1933 if (NILP (Vinhibit_changing_match_data))
1935 start = search_regs.start[0];
1936 end = search_regs.end[0];
1938 else
1939 /* If Vinhibit_changing_match_data is non-nil,
1940 search_regs will not be changed. So let's
1941 compute start and end here. */
1943 start = BYTE_TO_CHAR (position);
1944 end = BYTE_TO_CHAR (position + len_byte);
1947 if ((n -= direction) != 0)
1948 cursor += dirlen; /* to resume search */
1949 else
1950 return direction > 0 ? end : start;
1952 else
1953 cursor += stride_for_teases; /* <sigh> we lose - */
1955 pos_byte += cursor - p2;
1957 else
1958 /* Now we'll pick up a clump that has to be done the hard
1959 way because it covers a discontinuity. */
1961 limit = ((direction > 0)
1962 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1963 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1964 limit = ((direction > 0)
1965 ? min (limit + len_byte, lim_byte - 1)
1966 : max (limit - len_byte, lim_byte));
1967 /* LIMIT is now the last value POS_BYTE can have
1968 and still be valid for a possible match. */
1969 while (1)
1971 /* This loop can be coded for space rather than
1972 speed because it will usually run only once.
1973 (the reach is at most len + 21, and typically
1974 does not exceed len). */
1975 while ((limit - pos_byte) * direction >= 0)
1977 int ch = FETCH_BYTE (pos_byte);
1978 if (BM_tab[ch] == 0)
1979 goto hit2;
1980 pos_byte += BM_tab[ch];
1982 break; /* ran off the end */
1984 hit2:
1985 /* Found what might be a match. */
1986 i = dirlen - direction;
1987 while ((i -= direction) + direction != 0)
1989 int ch;
1990 unsigned char *ptr;
1991 pos_byte -= direction;
1992 ptr = BYTE_POS_ADDR (pos_byte);
1993 /* Translate only the last byte of a character. */
1994 if (! multibyte
1995 || ((ptr == tail_end_ptr
1996 || CHAR_HEAD_P (ptr[1]))
1997 && (CHAR_HEAD_P (ptr[0])
1998 /* Check if this is the last byte of a
1999 translatable character. */
2000 || (translate_prev_byte1 == ptr[-1]
2001 && (CHAR_HEAD_P (translate_prev_byte1)
2002 || (translate_prev_byte2 == ptr[-2]
2003 && (CHAR_HEAD_P (translate_prev_byte2)
2004 || translate_prev_byte3 == ptr[-3])))))))
2005 ch = simple_translate[*ptr];
2006 else
2007 ch = *ptr;
2008 if (pat[i] != ch)
2009 break;
2011 /* Above loop has moved POS_BYTE part or all the way
2012 back to the first pos (last pos if reverse).
2013 Set it once again at the last (first if reverse) char. */
2014 pos_byte += dirlen - i - direction;
2015 if (i + direction == 0)
2017 ptrdiff_t position, start, end;
2018 pos_byte -= direction;
2020 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2021 set_search_regs (position, len_byte);
2023 if (NILP (Vinhibit_changing_match_data))
2025 start = search_regs.start[0];
2026 end = search_regs.end[0];
2028 else
2029 /* If Vinhibit_changing_match_data is non-nil,
2030 search_regs will not be changed. So let's
2031 compute start and end here. */
2033 start = BYTE_TO_CHAR (position);
2034 end = BYTE_TO_CHAR (position + len_byte);
2037 if ((n -= direction) != 0)
2038 pos_byte += dirlen; /* to resume search */
2039 else
2040 return direction > 0 ? end : start;
2042 else
2043 pos_byte += stride_for_teases;
2046 /* We have done one clump. Can we continue? */
2047 if ((lim_byte - pos_byte) * direction < 0)
2048 return ((0 - n) * direction);
2050 return BYTE_TO_CHAR (pos_byte);
2053 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2054 for the overall match just found in the current buffer.
2055 Also clear out the match data for registers 1 and up. */
2057 static void
2058 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2060 ptrdiff_t i;
2062 if (!NILP (Vinhibit_changing_match_data))
2063 return;
2065 /* Make sure we have registers in which to store
2066 the match position. */
2067 if (search_regs.num_regs == 0)
2069 search_regs.start = xmalloc (2 * sizeof (regoff_t));
2070 search_regs.end = xmalloc (2 * sizeof (regoff_t));
2071 search_regs.num_regs = 2;
2074 /* Clear out the other registers. */
2075 for (i = 1; i < search_regs.num_regs; i++)
2077 search_regs.start[i] = -1;
2078 search_regs.end[i] = -1;
2081 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2082 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2083 XSETBUFFER (last_thing_searched, current_buffer);
2086 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2087 "MSearch backward: ",
2088 doc: /* Search backward from point for STRING.
2089 Set point to the beginning of the occurrence found, and return point.
2090 An optional second argument bounds the search; it is a buffer position.
2091 The match found must not extend before that position.
2092 Optional third argument, if t, means if fail just return nil (no error).
2093 If not nil and not t, position at limit of search and return nil.
2094 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2095 successive occurrences. If COUNT is negative, search forward,
2096 instead of backward, for -COUNT occurrences.
2098 Search case-sensitivity is determined by the value of the variable
2099 `case-fold-search', which see.
2101 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2102 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2104 return search_command (string, bound, noerror, count, -1, 0, 0);
2107 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2108 doc: /* Search forward from point for STRING.
2109 Set point to the end of the occurrence found, and return point.
2110 An optional second argument bounds the search; it is a buffer position.
2111 The match found must not extend after that position. A value of nil is
2112 equivalent to (point-max).
2113 Optional third argument, if t, means if fail just return nil (no error).
2114 If not nil and not t, move to limit of search and return nil.
2115 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2116 successive occurrences. If COUNT is negative, search backward,
2117 instead of forward, for -COUNT occurrences.
2119 Search case-sensitivity is determined by the value of the variable
2120 `case-fold-search', which see.
2122 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2123 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2125 return search_command (string, bound, noerror, count, 1, 0, 0);
2128 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2129 "sRE search backward: ",
2130 doc: /* Search backward from point for match for regular expression REGEXP.
2131 Set point to the beginning of the match, and return point.
2132 The match found is the one starting last in the buffer
2133 and yet ending before the origin of the search.
2134 An optional second argument bounds the search; it is a buffer position.
2135 The match found must start at or after that position.
2136 Optional third argument, if t, means if fail just return nil (no error).
2137 If not nil and not t, move to limit of search and return nil.
2138 Optional fourth argument is repeat count--search for successive occurrences.
2140 Search case-sensitivity is determined by the value of the variable
2141 `case-fold-search', which see.
2143 See also the functions `match-beginning', `match-end', `match-string',
2144 and `replace-match'. */)
2145 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2147 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2150 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2151 "sRE search: ",
2152 doc: /* Search forward from point for regular expression REGEXP.
2153 Set point to the end of the occurrence found, and return point.
2154 An optional second argument bounds the search; it is a buffer position.
2155 The match found must not extend after that position.
2156 Optional third argument, if t, means if fail just return nil (no error).
2157 If not nil and not t, move to limit of search and return nil.
2158 Optional fourth argument is repeat count--search for successive occurrences.
2160 Search case-sensitivity is determined by the value of the variable
2161 `case-fold-search', which see.
2163 See also the functions `match-beginning', `match-end', `match-string',
2164 and `replace-match'. */)
2165 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2167 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2170 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2171 "sPosix search backward: ",
2172 doc: /* Search backward from point for match for regular expression REGEXP.
2173 Find the longest match in accord with Posix regular expression rules.
2174 Set point to the beginning of the match, and return point.
2175 The match found is the one starting last in the buffer
2176 and yet ending before the origin of the search.
2177 An optional second argument bounds the search; it is a buffer position.
2178 The match found must start at or after that position.
2179 Optional third argument, if t, means if fail just return nil (no error).
2180 If not nil and not t, move to limit of search and return nil.
2181 Optional fourth argument is repeat count--search for successive occurrences.
2183 Search case-sensitivity is determined by the value of the variable
2184 `case-fold-search', which see.
2186 See also the functions `match-beginning', `match-end', `match-string',
2187 and `replace-match'. */)
2188 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2190 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2193 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2194 "sPosix search: ",
2195 doc: /* Search forward from point for regular expression REGEXP.
2196 Find the longest match in accord with Posix regular expression rules.
2197 Set point to the end of the occurrence found, and return point.
2198 An optional second argument bounds the search; it is a buffer position.
2199 The match found must not extend after that position.
2200 Optional third argument, if t, means if fail just return nil (no error).
2201 If not nil and not t, move to limit of search and return nil.
2202 Optional fourth argument is repeat count--search for successive occurrences.
2204 Search case-sensitivity is determined by the value of the variable
2205 `case-fold-search', which see.
2207 See also the functions `match-beginning', `match-end', `match-string',
2208 and `replace-match'. */)
2209 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2211 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2214 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2215 doc: /* Replace text matched by last search with NEWTEXT.
2216 Leave point at the end of the replacement text.
2218 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2219 the replacement text. Otherwise, maybe capitalize the whole text, or
2220 maybe just word initials, based on the replaced text. If the replaced
2221 text has only capital letters and has at least one multiletter word,
2222 convert NEWTEXT to all caps. Otherwise if all words are capitalized
2223 in the replaced text, capitalize each word in NEWTEXT.
2225 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2226 Otherwise treat `\\' as special:
2227 `\\&' in NEWTEXT means substitute original matched text.
2228 `\\N' means substitute what matched the Nth `\\(...\\)'.
2229 If Nth parens didn't match, substitute nothing.
2230 `\\\\' means insert one `\\'.
2231 `\\?' is treated literally
2232 (for compatibility with `query-replace-regexp').
2233 Any other character following `\\' signals an error.
2234 Case conversion does not apply to these substitutions.
2236 If optional fourth argument STRING is non-nil, it should be a string
2237 to act on; this should be the string on which the previous match was
2238 done via `string-match'. In this case, `replace-match' creates and
2239 returns a new string, made by copying STRING and replacing the part of
2240 STRING that was matched (the original STRING itself is not altered).
2242 The optional fifth argument SUBEXP specifies a subexpression;
2243 it says to replace just that subexpression with NEWTEXT,
2244 rather than replacing the entire matched text.
2245 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2246 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2247 NEWTEXT in place of subexp N.
2248 This is useful only after a regular expression search or match,
2249 since only regular expressions have distinguished subexpressions. */)
2250 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2252 enum { nochange, all_caps, cap_initial } case_action;
2253 ptrdiff_t pos, pos_byte;
2254 bool some_multiletter_word;
2255 bool some_lowercase;
2256 bool some_uppercase;
2257 bool some_nonuppercase_initial;
2258 int c, prevc;
2259 ptrdiff_t sub;
2260 ptrdiff_t opoint, newpoint;
2262 CHECK_STRING (newtext);
2264 if (! NILP (string))
2265 CHECK_STRING (string);
2267 case_action = nochange; /* We tried an initialization */
2268 /* but some C compilers blew it */
2270 if (search_regs.num_regs <= 0)
2271 error ("`replace-match' called before any match found");
2273 if (NILP (subexp))
2274 sub = 0;
2275 else
2277 CHECK_NUMBER (subexp);
2278 if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2279 args_out_of_range (subexp, make_number (search_regs.num_regs));
2280 sub = XINT (subexp);
2283 if (NILP (string))
2285 if (search_regs.start[sub] < BEGV
2286 || search_regs.start[sub] > search_regs.end[sub]
2287 || search_regs.end[sub] > ZV)
2288 args_out_of_range (make_number (search_regs.start[sub]),
2289 make_number (search_regs.end[sub]));
2291 else
2293 if (search_regs.start[sub] < 0
2294 || search_regs.start[sub] > search_regs.end[sub]
2295 || search_regs.end[sub] > SCHARS (string))
2296 args_out_of_range (make_number (search_regs.start[sub]),
2297 make_number (search_regs.end[sub]));
2300 if (NILP (fixedcase))
2302 /* Decide how to casify by examining the matched text. */
2303 ptrdiff_t last;
2305 pos = search_regs.start[sub];
2306 last = search_regs.end[sub];
2308 if (NILP (string))
2309 pos_byte = CHAR_TO_BYTE (pos);
2310 else
2311 pos_byte = string_char_to_byte (string, pos);
2313 prevc = '\n';
2314 case_action = all_caps;
2316 /* some_multiletter_word is set nonzero if any original word
2317 is more than one letter long. */
2318 some_multiletter_word = 0;
2319 some_lowercase = 0;
2320 some_nonuppercase_initial = 0;
2321 some_uppercase = 0;
2323 while (pos < last)
2325 if (NILP (string))
2327 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2328 INC_BOTH (pos, pos_byte);
2330 else
2331 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2333 if (lowercasep (c))
2335 /* Cannot be all caps if any original char is lower case */
2337 some_lowercase = 1;
2338 if (SYNTAX (prevc) != Sword)
2339 some_nonuppercase_initial = 1;
2340 else
2341 some_multiletter_word = 1;
2343 else if (uppercasep (c))
2345 some_uppercase = 1;
2346 if (SYNTAX (prevc) != Sword)
2348 else
2349 some_multiletter_word = 1;
2351 else
2353 /* If the initial is a caseless word constituent,
2354 treat that like a lowercase initial. */
2355 if (SYNTAX (prevc) != Sword)
2356 some_nonuppercase_initial = 1;
2359 prevc = c;
2362 /* Convert to all caps if the old text is all caps
2363 and has at least one multiletter word. */
2364 if (! some_lowercase && some_multiletter_word)
2365 case_action = all_caps;
2366 /* Capitalize each word, if the old text has all capitalized words. */
2367 else if (!some_nonuppercase_initial && some_multiletter_word)
2368 case_action = cap_initial;
2369 else if (!some_nonuppercase_initial && some_uppercase)
2370 /* Should x -> yz, operating on X, give Yz or YZ?
2371 We'll assume the latter. */
2372 case_action = all_caps;
2373 else
2374 case_action = nochange;
2377 /* Do replacement in a string. */
2378 if (!NILP (string))
2380 Lisp_Object before, after;
2382 before = Fsubstring (string, make_number (0),
2383 make_number (search_regs.start[sub]));
2384 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2386 /* Substitute parts of the match into NEWTEXT
2387 if desired. */
2388 if (NILP (literal))
2390 ptrdiff_t lastpos = 0;
2391 ptrdiff_t lastpos_byte = 0;
2392 /* We build up the substituted string in ACCUM. */
2393 Lisp_Object accum;
2394 Lisp_Object middle;
2395 ptrdiff_t length = SBYTES (newtext);
2397 accum = Qnil;
2399 for (pos_byte = 0, pos = 0; pos_byte < length;)
2401 ptrdiff_t substart = -1;
2402 ptrdiff_t subend = 0;
2403 bool delbackslash = 0;
2405 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2407 if (c == '\\')
2409 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2411 if (c == '&')
2413 substart = search_regs.start[sub];
2414 subend = search_regs.end[sub];
2416 else if (c >= '1' && c <= '9')
2418 if (c - '0' < search_regs.num_regs
2419 && search_regs.start[c - '0'] >= 0)
2421 substart = search_regs.start[c - '0'];
2422 subend = search_regs.end[c - '0'];
2424 else
2426 /* If that subexp did not match,
2427 replace \\N with nothing. */
2428 substart = 0;
2429 subend = 0;
2432 else if (c == '\\')
2433 delbackslash = 1;
2434 else if (c != '?')
2435 error ("Invalid use of `\\' in replacement text");
2437 if (substart >= 0)
2439 if (pos - 2 != lastpos)
2440 middle = substring_both (newtext, lastpos,
2441 lastpos_byte,
2442 pos - 2, pos_byte - 2);
2443 else
2444 middle = Qnil;
2445 accum = concat3 (accum, middle,
2446 Fsubstring (string,
2447 make_number (substart),
2448 make_number (subend)));
2449 lastpos = pos;
2450 lastpos_byte = pos_byte;
2452 else if (delbackslash)
2454 middle = substring_both (newtext, lastpos,
2455 lastpos_byte,
2456 pos - 1, pos_byte - 1);
2458 accum = concat2 (accum, middle);
2459 lastpos = pos;
2460 lastpos_byte = pos_byte;
2464 if (pos != lastpos)
2465 middle = substring_both (newtext, lastpos,
2466 lastpos_byte,
2467 pos, pos_byte);
2468 else
2469 middle = Qnil;
2471 newtext = concat2 (accum, middle);
2474 /* Do case substitution in NEWTEXT if desired. */
2475 if (case_action == all_caps)
2476 newtext = Fupcase (newtext);
2477 else if (case_action == cap_initial)
2478 newtext = Fupcase_initials (newtext);
2480 return concat3 (before, newtext, after);
2483 /* Record point, then move (quietly) to the start of the match. */
2484 if (PT >= search_regs.end[sub])
2485 opoint = PT - ZV;
2486 else if (PT > search_regs.start[sub])
2487 opoint = search_regs.end[sub] - ZV;
2488 else
2489 opoint = PT;
2491 /* If we want non-literal replacement,
2492 perform substitution on the replacement string. */
2493 if (NILP (literal))
2495 ptrdiff_t length = SBYTES (newtext);
2496 unsigned char *substed;
2497 ptrdiff_t substed_alloc_size, substed_len;
2498 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2499 bool str_multibyte = STRING_MULTIBYTE (newtext);
2500 bool really_changed = 0;
2502 substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2503 ? length * 2 + 100
2504 : STRING_BYTES_BOUND);
2505 substed = xmalloc (substed_alloc_size);
2506 substed_len = 0;
2508 /* Go thru NEWTEXT, producing the actual text to insert in
2509 SUBSTED while adjusting multibyteness to that of the current
2510 buffer. */
2512 for (pos_byte = 0, pos = 0; pos_byte < length;)
2514 unsigned char str[MAX_MULTIBYTE_LENGTH];
2515 const unsigned char *add_stuff = NULL;
2516 ptrdiff_t add_len = 0;
2517 ptrdiff_t idx = -1;
2519 if (str_multibyte)
2521 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2522 if (!buf_multibyte)
2523 c = multibyte_char_to_unibyte (c);
2525 else
2527 /* Note that we don't have to increment POS. */
2528 c = SREF (newtext, pos_byte++);
2529 if (buf_multibyte)
2530 MAKE_CHAR_MULTIBYTE (c);
2533 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2534 or set IDX to a match index, which means put that part
2535 of the buffer text into SUBSTED. */
2537 if (c == '\\')
2539 really_changed = 1;
2541 if (str_multibyte)
2543 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2544 pos, pos_byte);
2545 if (!buf_multibyte && !ASCII_CHAR_P (c))
2546 c = multibyte_char_to_unibyte (c);
2548 else
2550 c = SREF (newtext, pos_byte++);
2551 if (buf_multibyte)
2552 MAKE_CHAR_MULTIBYTE (c);
2555 if (c == '&')
2556 idx = sub;
2557 else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2559 if (search_regs.start[c - '0'] >= 1)
2560 idx = c - '0';
2562 else if (c == '\\')
2563 add_len = 1, add_stuff = (unsigned char *) "\\";
2564 else
2566 xfree (substed);
2567 error ("Invalid use of `\\' in replacement text");
2570 else
2572 add_len = CHAR_STRING (c, str);
2573 add_stuff = str;
2576 /* If we want to copy part of a previous match,
2577 set up ADD_STUFF and ADD_LEN to point to it. */
2578 if (idx >= 0)
2580 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2581 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2582 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2583 move_gap_both (search_regs.start[idx], begbyte);
2584 add_stuff = BYTE_POS_ADDR (begbyte);
2587 /* Now the stuff we want to add to SUBSTED
2588 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2590 /* Make sure SUBSTED is big enough. */
2591 if (substed_alloc_size - substed_len < add_len)
2592 substed =
2593 xpalloc (substed, &substed_alloc_size,
2594 add_len - (substed_alloc_size - substed_len),
2595 STRING_BYTES_BOUND, 1);
2597 /* Now add to the end of SUBSTED. */
2598 if (add_stuff)
2600 memcpy (substed + substed_len, add_stuff, add_len);
2601 substed_len += add_len;
2605 if (really_changed)
2607 if (buf_multibyte)
2609 ptrdiff_t nchars =
2610 multibyte_chars_in_text (substed, substed_len);
2612 newtext = make_multibyte_string ((char *) substed, nchars,
2613 substed_len);
2615 else
2616 newtext = make_unibyte_string ((char *) substed, substed_len);
2618 xfree (substed);
2621 /* Replace the old text with the new in the cleanest possible way. */
2622 replace_range (search_regs.start[sub], search_regs.end[sub],
2623 newtext, 1, 0, 1);
2624 newpoint = search_regs.start[sub] + SCHARS (newtext);
2626 if (case_action == all_caps)
2627 Fupcase_region (make_number (search_regs.start[sub]),
2628 make_number (newpoint));
2629 else if (case_action == cap_initial)
2630 Fupcase_initials_region (make_number (search_regs.start[sub]),
2631 make_number (newpoint));
2633 /* Adjust search data for this change. */
2635 ptrdiff_t oldend = search_regs.end[sub];
2636 ptrdiff_t oldstart = search_regs.start[sub];
2637 ptrdiff_t change = newpoint - search_regs.end[sub];
2638 ptrdiff_t i;
2640 for (i = 0; i < search_regs.num_regs; i++)
2642 if (search_regs.start[i] >= oldend)
2643 search_regs.start[i] += change;
2644 else if (search_regs.start[i] > oldstart)
2645 search_regs.start[i] = oldstart;
2646 if (search_regs.end[i] >= oldend)
2647 search_regs.end[i] += change;
2648 else if (search_regs.end[i] > oldstart)
2649 search_regs.end[i] = oldstart;
2653 /* Put point back where it was in the text. */
2654 if (opoint <= 0)
2655 TEMP_SET_PT (opoint + ZV);
2656 else
2657 TEMP_SET_PT (opoint);
2659 /* Now move point "officially" to the start of the inserted replacement. */
2660 move_if_not_intangible (newpoint);
2662 return Qnil;
2665 static Lisp_Object
2666 match_limit (Lisp_Object num, bool beginningp)
2668 EMACS_INT n;
2670 CHECK_NUMBER (num);
2671 n = XINT (num);
2672 if (n < 0)
2673 args_out_of_range (num, make_number (0));
2674 if (search_regs.num_regs <= 0)
2675 error ("No match data, because no search succeeded");
2676 if (n >= search_regs.num_regs
2677 || search_regs.start[n] < 0)
2678 return Qnil;
2679 return (make_number ((beginningp) ? search_regs.start[n]
2680 : search_regs.end[n]));
2683 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2684 doc: /* Return position of start of text matched by last search.
2685 SUBEXP, a number, specifies which parenthesized expression in the last
2686 regexp.
2687 Value is nil if SUBEXPth pair didn't match, or there were less than
2688 SUBEXP pairs.
2689 Zero means the entire text matched by the whole regexp or whole string. */)
2690 (Lisp_Object subexp)
2692 return match_limit (subexp, 1);
2695 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2696 doc: /* Return position of end of text matched by last search.
2697 SUBEXP, a number, specifies which parenthesized expression in the last
2698 regexp.
2699 Value is nil if SUBEXPth pair didn't match, or there were less than
2700 SUBEXP pairs.
2701 Zero means the entire text matched by the whole regexp or whole string. */)
2702 (Lisp_Object subexp)
2704 return match_limit (subexp, 0);
2707 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2708 doc: /* Return a list containing all info on what the last search matched.
2709 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2710 All the elements are markers or nil (nil if the Nth pair didn't match)
2711 if the last match was on a buffer; integers or nil if a string was matched.
2712 Use `set-match-data' to reinstate the data in this list.
2714 If INTEGERS (the optional first argument) is non-nil, always use
2715 integers \(rather than markers) to represent buffer positions. In
2716 this case, and if the last match was in a buffer, the buffer will get
2717 stored as one additional element at the end of the list.
2719 If REUSE is a list, reuse it as part of the value. If REUSE is long
2720 enough to hold all the values, and if INTEGERS is non-nil, no consing
2721 is done.
2723 If optional third arg RESEAT is non-nil, any previous markers on the
2724 REUSE list will be modified to point to nowhere.
2726 Return value is undefined if the last search failed. */)
2727 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2729 Lisp_Object tail, prev;
2730 Lisp_Object *data;
2731 ptrdiff_t i, len;
2733 if (!NILP (reseat))
2734 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2735 if (MARKERP (XCAR (tail)))
2737 unchain_marker (XMARKER (XCAR (tail)));
2738 XSETCAR (tail, Qnil);
2741 if (NILP (last_thing_searched))
2742 return Qnil;
2744 prev = Qnil;
2746 data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
2748 len = 0;
2749 for (i = 0; i < search_regs.num_regs; i++)
2751 ptrdiff_t start = search_regs.start[i];
2752 if (start >= 0)
2754 if (EQ (last_thing_searched, Qt)
2755 || ! NILP (integers))
2757 XSETFASTINT (data[2 * i], start);
2758 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2760 else if (BUFFERP (last_thing_searched))
2762 data[2 * i] = Fmake_marker ();
2763 Fset_marker (data[2 * i],
2764 make_number (start),
2765 last_thing_searched);
2766 data[2 * i + 1] = Fmake_marker ();
2767 Fset_marker (data[2 * i + 1],
2768 make_number (search_regs.end[i]),
2769 last_thing_searched);
2771 else
2772 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2773 emacs_abort ();
2775 len = 2 * i + 2;
2777 else
2778 data[2 * i] = data[2 * i + 1] = Qnil;
2781 if (BUFFERP (last_thing_searched) && !NILP (integers))
2783 data[len] = last_thing_searched;
2784 len++;
2787 /* If REUSE is not usable, cons up the values and return them. */
2788 if (! CONSP (reuse))
2789 return Flist (len, data);
2791 /* If REUSE is a list, store as many value elements as will fit
2792 into the elements of REUSE. */
2793 for (i = 0, tail = reuse; CONSP (tail);
2794 i++, tail = XCDR (tail))
2796 if (i < len)
2797 XSETCAR (tail, data[i]);
2798 else
2799 XSETCAR (tail, Qnil);
2800 prev = tail;
2803 /* If we couldn't fit all value elements into REUSE,
2804 cons up the rest of them and add them to the end of REUSE. */
2805 if (i < len)
2806 XSETCDR (prev, Flist (len - i, data + i));
2808 return reuse;
2811 /* We used to have an internal use variant of `reseat' described as:
2813 If RESEAT is `evaporate', put the markers back on the free list
2814 immediately. No other references to the markers must exist in this
2815 case, so it is used only internally on the unwind stack and
2816 save-match-data from Lisp.
2818 But it was ill-conceived: those supposedly-internal markers get exposed via
2819 the undo-list, so freeing them here is unsafe. */
2821 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2822 doc: /* Set internal data on last search match from elements of LIST.
2823 LIST should have been created by calling `match-data' previously.
2825 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2826 (register Lisp_Object list, Lisp_Object reseat)
2828 ptrdiff_t i;
2829 register Lisp_Object marker;
2831 if (running_asynch_code)
2832 save_search_regs ();
2834 CHECK_LIST (list);
2836 /* Unless we find a marker with a buffer or an explicit buffer
2837 in LIST, assume that this match data came from a string. */
2838 last_thing_searched = Qt;
2840 /* Allocate registers if they don't already exist. */
2842 EMACS_INT length = XFASTINT (Flength (list)) / 2;
2844 if (length > search_regs.num_regs)
2846 ptrdiff_t num_regs = search_regs.num_regs;
2847 if (PTRDIFF_MAX < length)
2848 memory_full (SIZE_MAX);
2849 search_regs.start =
2850 xpalloc (search_regs.start, &num_regs, length - num_regs,
2851 min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2852 search_regs.end =
2853 xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2855 for (i = search_regs.num_regs; i < num_regs; i++)
2856 search_regs.start[i] = -1;
2858 search_regs.num_regs = num_regs;
2861 for (i = 0; CONSP (list); i++)
2863 marker = XCAR (list);
2864 if (BUFFERP (marker))
2866 last_thing_searched = marker;
2867 break;
2869 if (i >= length)
2870 break;
2871 if (NILP (marker))
2873 search_regs.start[i] = -1;
2874 list = XCDR (list);
2876 else
2878 Lisp_Object from;
2879 Lisp_Object m;
2881 m = marker;
2882 if (MARKERP (marker))
2884 if (XMARKER (marker)->buffer == 0)
2885 XSETFASTINT (marker, 0);
2886 else
2887 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2890 CHECK_NUMBER_COERCE_MARKER (marker);
2891 from = marker;
2893 if (!NILP (reseat) && MARKERP (m))
2895 unchain_marker (XMARKER (m));
2896 XSETCAR (list, Qnil);
2899 if ((list = XCDR (list), !CONSP (list)))
2900 break;
2902 m = marker = XCAR (list);
2904 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2905 XSETFASTINT (marker, 0);
2907 CHECK_NUMBER_COERCE_MARKER (marker);
2908 if ((XINT (from) < 0
2909 ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2910 : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2911 && (XINT (marker) < 0
2912 ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2913 : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2915 search_regs.start[i] = XINT (from);
2916 search_regs.end[i] = XINT (marker);
2918 else
2920 search_regs.start[i] = -1;
2923 if (!NILP (reseat) && MARKERP (m))
2925 unchain_marker (XMARKER (m));
2926 XSETCAR (list, Qnil);
2929 list = XCDR (list);
2932 for (; i < search_regs.num_regs; i++)
2933 search_regs.start[i] = -1;
2936 return Qnil;
2939 /* If true the match data have been saved in saved_search_regs
2940 during the execution of a sentinel or filter. */
2941 static bool search_regs_saved;
2942 static struct re_registers saved_search_regs;
2943 static Lisp_Object saved_last_thing_searched;
2945 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2946 if asynchronous code (filter or sentinel) is running. */
2947 static void
2948 save_search_regs (void)
2950 if (!search_regs_saved)
2952 saved_search_regs.num_regs = search_regs.num_regs;
2953 saved_search_regs.start = search_regs.start;
2954 saved_search_regs.end = search_regs.end;
2955 saved_last_thing_searched = last_thing_searched;
2956 last_thing_searched = Qnil;
2957 search_regs.num_regs = 0;
2958 search_regs.start = 0;
2959 search_regs.end = 0;
2961 search_regs_saved = 1;
2965 /* Called upon exit from filters and sentinels. */
2966 void
2967 restore_search_regs (void)
2969 if (search_regs_saved)
2971 if (search_regs.num_regs > 0)
2973 xfree (search_regs.start);
2974 xfree (search_regs.end);
2976 search_regs.num_regs = saved_search_regs.num_regs;
2977 search_regs.start = saved_search_regs.start;
2978 search_regs.end = saved_search_regs.end;
2979 last_thing_searched = saved_last_thing_searched;
2980 saved_last_thing_searched = Qnil;
2981 search_regs_saved = 0;
2985 static void
2986 unwind_set_match_data (Lisp_Object list)
2988 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
2989 Fset_match_data (list, Qt);
2992 /* Called to unwind protect the match data. */
2993 void
2994 record_unwind_save_match_data (void)
2996 record_unwind_protect (unwind_set_match_data,
2997 Fmatch_data (Qnil, Qnil, Qnil));
3000 /* Quote a string to deactivate reg-expr chars */
3002 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3003 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3004 (Lisp_Object string)
3006 char *in, *out, *end;
3007 char *temp;
3008 ptrdiff_t backslashes_added = 0;
3010 CHECK_STRING (string);
3012 temp = alloca (SBYTES (string) * 2);
3014 /* Now copy the data into the new string, inserting escapes. */
3016 in = SSDATA (string);
3017 end = in + SBYTES (string);
3018 out = temp;
3020 for (; in != end; in++)
3022 if (*in == '['
3023 || *in == '*' || *in == '.' || *in == '\\'
3024 || *in == '?' || *in == '+'
3025 || *in == '^' || *in == '$')
3026 *out++ = '\\', backslashes_added++;
3027 *out++ = *in;
3030 return make_specified_string (temp,
3031 SCHARS (string) + backslashes_added,
3032 out - temp,
3033 STRING_MULTIBYTE (string));
3036 void
3037 syms_of_search (void)
3039 register int i;
3041 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3043 searchbufs[i].buf.allocated = 100;
3044 searchbufs[i].buf.buffer = xmalloc (100);
3045 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3046 searchbufs[i].regexp = Qnil;
3047 searchbufs[i].whitespace_regexp = Qnil;
3048 searchbufs[i].syntax_table = Qnil;
3049 staticpro (&searchbufs[i].regexp);
3050 staticpro (&searchbufs[i].whitespace_regexp);
3051 staticpro (&searchbufs[i].syntax_table);
3052 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3054 searchbuf_head = &searchbufs[0];
3056 DEFSYM (Qsearch_failed, "search-failed");
3057 DEFSYM (Qinvalid_regexp, "invalid-regexp");
3059 Fput (Qsearch_failed, Qerror_conditions,
3060 listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3061 Fput (Qsearch_failed, Qerror_message,
3062 build_pure_c_string ("Search failed"));
3064 Fput (Qinvalid_regexp, Qerror_conditions,
3065 listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3066 Fput (Qinvalid_regexp, Qerror_message,
3067 build_pure_c_string ("Invalid regexp"));
3069 last_thing_searched = Qnil;
3070 staticpro (&last_thing_searched);
3072 saved_last_thing_searched = Qnil;
3073 staticpro (&saved_last_thing_searched);
3075 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3076 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3077 Some commands use this for user-specified regexps.
3078 Spaces that occur inside character classes or repetition operators
3079 or other such regexp constructs are not replaced with this.
3080 A value of nil (which is the normal value) means treat spaces literally. */);
3081 Vsearch_spaces_regexp = Qnil;
3083 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3084 doc: /* Internal use only.
3085 If non-nil, the primitive searching and matching functions
3086 such as `looking-at', `string-match', `re-search-forward', etc.,
3087 do not set the match data. The proper way to use this variable
3088 is to bind it with `let' around a small expression. */);
3089 Vinhibit_changing_match_data = Qnil;
3091 defsubr (&Slooking_at);
3092 defsubr (&Sposix_looking_at);
3093 defsubr (&Sstring_match);
3094 defsubr (&Sposix_string_match);
3095 defsubr (&Ssearch_forward);
3096 defsubr (&Ssearch_backward);
3097 defsubr (&Sre_search_forward);
3098 defsubr (&Sre_search_backward);
3099 defsubr (&Sposix_search_forward);
3100 defsubr (&Sposix_search_backward);
3101 defsubr (&Sreplace_match);
3102 defsubr (&Smatch_beginning);
3103 defsubr (&Smatch_end);
3104 defsubr (&Smatch_data);
3105 defsubr (&Sset_match_data);
3106 defsubr (&Sregexp_quote);