* src/search.c (compile_pattern_1): Remove unused argument.
[emacs.git] / src / search.c
blobdceff94da72ecabbd5bba491cb6c9a6fe74f2a95
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2011
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
22 #include <setjmp.h>
23 #include "lisp.h"
24 #include "syntax.h"
25 #include "category.h"
26 #include "buffer.h"
27 #include "character.h"
28 #include "charset.h"
29 #include "region-cache.h"
30 #include "commands.h"
31 #include "blockinput.h"
32 #include "intervals.h"
34 #include <sys/types.h>
35 #include "regex.h"
37 #define REGEXP_CACHE_SIZE 20
39 /* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
41 struct regexp_cache
43 struct regexp_cache *next;
44 Lisp_Object regexp, whitespace_regexp;
45 /* Syntax table for which the regexp applies. We need this because
46 of character classes. If this is t, then the compiled pattern is valid
47 for any syntax-table. */
48 Lisp_Object syntax_table;
49 struct re_pattern_buffer buf;
50 char fastmap[0400];
51 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
52 char posix;
55 /* The instances of that struct. */
56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58 /* The head of the linked list; points to the most recently used buffer. */
59 struct regexp_cache *searchbuf_head;
62 /* Every call to re_match, etc., must pass &search_regs as the regs
63 argument unless you can show it is unnecessary (i.e., if re_match
64 is certainly going to be called again before region-around-match
65 can be called).
67 Since the registers are now dynamically allocated, we need to make
68 sure not to refer to the Nth register before checking that it has
69 been allocated by checking search_regs.num_regs.
71 The regex code keeps track of whether it has allocated the search
72 buffer using bits in the re_pattern_buffer. This means that whenever
73 you compile a new pattern, it completely forgets whether it has
74 allocated any registers, and will allocate new registers the next
75 time you call a searching or matching function. Therefore, we need
76 to call re_set_registers after compiling a new pattern or after
77 setting the match registers, so that the regex functions will be
78 able to free or re-allocate it properly. */
79 static struct re_registers search_regs;
81 /* The buffer in which the last search was performed, or
82 Qt if the last search was done in a string;
83 Qnil if no searching has been done yet. */
84 static Lisp_Object last_thing_searched;
86 /* error condition signaled when regexp compile_pattern fails */
88 Lisp_Object Qinvalid_regexp;
90 /* Error condition used for failing searches */
91 Lisp_Object Qsearch_failed;
93 static void set_search_regs (EMACS_INT, EMACS_INT);
94 static void save_search_regs (void);
95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, EMACS_INT,
96 EMACS_INT, Lisp_Object, EMACS_INT, EMACS_INT,
97 EMACS_INT, EMACS_INT);
98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, EMACS_INT, EMACS_INT,
99 Lisp_Object, Lisp_Object,
100 EMACS_INT, EMACS_INT,
101 EMACS_INT, EMACS_INT, int);
102 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
103 EMACS_INT, EMACS_INT, EMACS_INT, int,
104 Lisp_Object, Lisp_Object, int);
105 static void matcher_overflow (void) NO_RETURN;
107 static void
108 matcher_overflow (void)
110 error ("Stack overflow in regexp matcher");
113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
114 PATTERN is the pattern to compile.
115 CP is the place to put the result.
116 TRANSLATE is a translation table for ignoring case, or nil for none.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
120 The behavior also depends on Vsearch_spaces_regexp. */
122 static void
123 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix)
125 char *val;
126 reg_syntax_t old;
128 cp->regexp = Qnil;
129 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
130 cp->posix = posix;
131 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
132 cp->buf.charset_unibyte = charset_unibyte;
133 if (STRINGP (Vsearch_spaces_regexp))
134 cp->whitespace_regexp = Vsearch_spaces_regexp;
135 else
136 cp->whitespace_regexp = Qnil;
138 /* rms: I think BLOCK_INPUT is not needed here any more,
139 because regex.c defines malloc to call xmalloc.
140 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
141 So let's turn it off. */
142 /* BLOCK_INPUT; */
143 old = re_set_syntax (RE_SYNTAX_EMACS
144 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
146 if (STRINGP (Vsearch_spaces_regexp))
147 re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
148 else
149 re_set_whitespace_regexp (NULL);
151 val = (char *) re_compile_pattern (SSDATA (pattern),
152 SBYTES (pattern), &cp->buf);
154 /* If the compiled pattern hard codes some of the contents of the
155 syntax-table, it can only be reused with *this* syntax table. */
156 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
158 re_set_whitespace_regexp (NULL);
160 re_set_syntax (old);
161 /* UNBLOCK_INPUT; */
162 if (val)
163 xsignal1 (Qinvalid_regexp, build_string (val));
165 cp->regexp = Fcopy_sequence (pattern);
168 /* Shrink each compiled regexp buffer in the cache
169 to the size actually used right now.
170 This is called from garbage collection. */
172 void
173 shrink_regexp_cache (void)
175 struct regexp_cache *cp;
177 for (cp = searchbuf_head; cp != 0; cp = cp->next)
179 cp->buf.allocated = cp->buf.used;
180 cp->buf.buffer
181 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
185 /* Clear the regexp cache w.r.t. a particular syntax table,
186 because it was changed.
187 There is no danger of memory leak here because re_compile_pattern
188 automagically manages the memory in each re_pattern_buffer struct,
189 based on its `allocated' and `buffer' values. */
190 void
191 clear_regexp_cache (void)
193 int i;
195 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
196 /* It's tempting to compare with the syntax-table we've actually changed,
197 but it's not sufficient because char-table inheritance means that
198 modifying one syntax-table can change others at the same time. */
199 if (!EQ (searchbufs[i].syntax_table, Qt))
200 searchbufs[i].regexp = Qnil;
203 /* Compile a regexp if necessary, but first check to see if there's one in
204 the cache.
205 PATTERN is the pattern to compile.
206 TRANSLATE is a translation table for ignoring case, or nil for none.
207 REGP is the structure that says where to store the "register"
208 values that will result from matching this pattern.
209 If it is 0, we should compile the pattern not to record any
210 subexpression bounds.
211 POSIX is nonzero if we want full backtracking (POSIX style)
212 for this pattern. 0 means backtrack only enough to get a valid match. */
214 struct re_pattern_buffer *
215 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
217 struct regexp_cache *cp, **cpp;
219 for (cpp = &searchbuf_head; ; cpp = &cp->next)
221 cp = *cpp;
222 /* Entries are initialized to nil, and may be set to nil by
223 compile_pattern_1 if the pattern isn't valid. Don't apply
224 string accessors in those cases. However, compile_pattern_1
225 is only applied to the cache entry we pick here to reuse. So
226 nil should never appear before a non-nil entry. */
227 if (NILP (cp->regexp))
228 goto compile_it;
229 if (SCHARS (cp->regexp) == SCHARS (pattern)
230 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
231 && !NILP (Fstring_equal (cp->regexp, pattern))
232 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
233 && cp->posix == posix
234 && (EQ (cp->syntax_table, Qt)
235 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
236 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
237 && cp->buf.charset_unibyte == charset_unibyte)
238 break;
240 /* If we're at the end of the cache, compile into the nil cell
241 we found, or the last (least recently used) cell with a
242 string value. */
243 if (cp->next == 0)
245 compile_it:
246 compile_pattern_1 (cp, pattern, translate, posix);
247 break;
251 /* When we get here, cp (aka *cpp) contains the compiled pattern,
252 either because we found it in the cache or because we just compiled it.
253 Move it to the front of the queue to mark it as most recently used. */
254 *cpp = cp->next;
255 cp->next = searchbuf_head;
256 searchbuf_head = cp;
258 /* Advise the searching functions about the space we have allocated
259 for register data. */
260 if (regp)
261 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
263 /* The compiled pattern can be used both for multibyte and unibyte
264 target. But, we have to tell which the pattern is used for. */
265 cp->buf.target_multibyte = multibyte;
267 return &cp->buf;
271 static Lisp_Object
272 looking_at_1 (Lisp_Object string, int posix)
274 Lisp_Object val;
275 unsigned char *p1, *p2;
276 EMACS_INT s1, s2;
277 register EMACS_INT i;
278 struct re_pattern_buffer *bufp;
280 if (running_asynch_code)
281 save_search_regs ();
283 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
284 XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
285 = BVAR (current_buffer, case_eqv_table);
287 CHECK_STRING (string);
288 bufp = compile_pattern (string,
289 (NILP (Vinhibit_changing_match_data)
290 ? &search_regs : NULL),
291 (!NILP (BVAR (current_buffer, case_fold_search))
292 ? BVAR (current_buffer, case_canon_table) : Qnil),
293 posix,
294 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
296 immediate_quit = 1;
297 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
299 /* Get pointers and sizes of the two strings
300 that make up the visible portion of the buffer. */
302 p1 = BEGV_ADDR;
303 s1 = GPT_BYTE - BEGV_BYTE;
304 p2 = GAP_END_ADDR;
305 s2 = ZV_BYTE - GPT_BYTE;
306 if (s1 < 0)
308 p2 = p1;
309 s2 = ZV_BYTE - BEGV_BYTE;
310 s1 = 0;
312 if (s2 < 0)
314 s1 = ZV_BYTE - BEGV_BYTE;
315 s2 = 0;
318 re_match_object = Qnil;
320 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
321 PT_BYTE - BEGV_BYTE,
322 (NILP (Vinhibit_changing_match_data)
323 ? &search_regs : NULL),
324 ZV_BYTE - BEGV_BYTE);
325 immediate_quit = 0;
327 if (i == -2)
328 matcher_overflow ();
330 val = (0 <= i ? Qt : Qnil);
331 if (NILP (Vinhibit_changing_match_data) && i >= 0)
332 for (i = 0; i < search_regs.num_regs; i++)
333 if (search_regs.start[i] >= 0)
335 search_regs.start[i]
336 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
337 search_regs.end[i]
338 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
341 /* Set last_thing_searched only when match data is changed. */
342 if (NILP (Vinhibit_changing_match_data))
343 XSETBUFFER (last_thing_searched, current_buffer);
345 return val;
348 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
349 doc: /* Return t if text after point matches regular expression REGEXP.
350 This function modifies the match data that `match-beginning',
351 `match-end' and `match-data' access; save and restore the match
352 data if you want to preserve them. */)
353 (Lisp_Object regexp)
355 return looking_at_1 (regexp, 0);
358 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
359 doc: /* Return t if text after point matches regular expression REGEXP.
360 Find the longest match, in accord with Posix regular expression rules.
361 This function modifies the match data that `match-beginning',
362 `match-end' and `match-data' access; save and restore the match
363 data if you want to preserve them. */)
364 (Lisp_Object regexp)
366 return looking_at_1 (regexp, 1);
369 static Lisp_Object
370 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
372 int val;
373 struct re_pattern_buffer *bufp;
374 EMACS_INT pos, pos_byte;
375 int i;
377 if (running_asynch_code)
378 save_search_regs ();
380 CHECK_STRING (regexp);
381 CHECK_STRING (string);
383 if (NILP (start))
384 pos = 0, pos_byte = 0;
385 else
387 EMACS_INT len = SCHARS (string);
389 CHECK_NUMBER (start);
390 pos = XINT (start);
391 if (pos < 0 && -pos <= len)
392 pos = len + pos;
393 else if (0 > pos || pos > len)
394 args_out_of_range (string, start);
395 pos_byte = string_char_to_byte (string, pos);
398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
399 XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
400 = BVAR (current_buffer, case_eqv_table);
402 bufp = compile_pattern (regexp,
403 (NILP (Vinhibit_changing_match_data)
404 ? &search_regs : NULL),
405 (!NILP (BVAR (current_buffer, case_fold_search))
406 ? BVAR (current_buffer, case_canon_table) : Qnil),
407 posix,
408 STRING_MULTIBYTE (string));
409 immediate_quit = 1;
410 re_match_object = string;
412 val = re_search (bufp, SSDATA (string),
413 SBYTES (string), pos_byte,
414 SBYTES (string) - pos_byte,
415 (NILP (Vinhibit_changing_match_data)
416 ? &search_regs : NULL));
417 immediate_quit = 0;
419 /* Set last_thing_searched only when match data is changed. */
420 if (NILP (Vinhibit_changing_match_data))
421 last_thing_searched = Qt;
423 if (val == -2)
424 matcher_overflow ();
425 if (val < 0) return Qnil;
427 if (NILP (Vinhibit_changing_match_data))
428 for (i = 0; i < search_regs.num_regs; i++)
429 if (search_regs.start[i] >= 0)
431 search_regs.start[i]
432 = string_byte_to_char (string, search_regs.start[i]);
433 search_regs.end[i]
434 = string_byte_to_char (string, search_regs.end[i]);
437 return make_number (string_byte_to_char (string, val));
440 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
441 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
442 Matching ignores case if `case-fold-search' is non-nil.
443 If third arg START is non-nil, start search at that index in STRING.
444 For index of first char beyond the match, do (match-end 0).
445 `match-end' and `match-beginning' also give indices of substrings
446 matched by parenthesis constructs in the pattern.
448 You can use the function `match-string' to extract the substrings
449 matched by the parenthesis constructions in REGEXP. */)
450 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
452 return string_match_1 (regexp, string, start, 0);
455 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
456 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
457 Find the longest match, in accord with Posix regular expression rules.
458 Case is ignored if `case-fold-search' is non-nil in the current buffer.
459 If third arg START is non-nil, start search at that index in STRING.
460 For index of first char beyond the match, do (match-end 0).
461 `match-end' and `match-beginning' also give indices of substrings
462 matched by parenthesis constructs in the pattern. */)
463 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
465 return string_match_1 (regexp, string, start, 1);
468 /* Match REGEXP against STRING, searching all of STRING,
469 and return the index of the match, or negative on failure.
470 This does not clobber the match data. */
473 fast_string_match (Lisp_Object regexp, Lisp_Object string)
475 int val;
476 struct re_pattern_buffer *bufp;
478 bufp = compile_pattern (regexp, 0, Qnil,
479 0, STRING_MULTIBYTE (string));
480 immediate_quit = 1;
481 re_match_object = string;
483 val = re_search (bufp, SSDATA (string),
484 SBYTES (string), 0,
485 SBYTES (string), 0);
486 immediate_quit = 0;
487 return val;
490 /* Match REGEXP against STRING, searching all of STRING ignoring case,
491 and return the index of the match, or negative on failure.
492 This does not clobber the match data.
493 We assume that STRING contains single-byte characters. */
496 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
498 int val;
499 struct re_pattern_buffer *bufp;
500 size_t len = strlen (string);
502 regexp = string_make_unibyte (regexp);
503 re_match_object = Qt;
504 bufp = compile_pattern (regexp, 0,
505 Vascii_canon_table, 0,
507 immediate_quit = 1;
508 val = re_search (bufp, string, len, 0, len, 0);
509 immediate_quit = 0;
510 return val;
513 /* Like fast_string_match but ignore case. */
516 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
518 int val;
519 struct re_pattern_buffer *bufp;
521 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
522 0, STRING_MULTIBYTE (string));
523 immediate_quit = 1;
524 re_match_object = string;
526 val = re_search (bufp, SSDATA (string),
527 SBYTES (string), 0,
528 SBYTES (string), 0);
529 immediate_quit = 0;
530 return val;
533 /* Match REGEXP against the characters after POS to LIMIT, and return
534 the number of matched characters. If STRING is non-nil, match
535 against the characters in it. In that case, POS and LIMIT are
536 indices into the string. This function doesn't modify the match
537 data. */
539 EMACS_INT
540 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
542 int multibyte;
543 struct re_pattern_buffer *buf;
544 unsigned char *p1, *p2;
545 EMACS_INT s1, s2;
546 EMACS_INT len;
548 if (STRINGP (string))
550 if (pos_byte < 0)
551 pos_byte = string_char_to_byte (string, pos);
552 if (limit_byte < 0)
553 limit_byte = string_char_to_byte (string, limit);
554 p1 = NULL;
555 s1 = 0;
556 p2 = SDATA (string);
557 s2 = SBYTES (string);
558 re_match_object = string;
559 multibyte = STRING_MULTIBYTE (string);
561 else
563 if (pos_byte < 0)
564 pos_byte = CHAR_TO_BYTE (pos);
565 if (limit_byte < 0)
566 limit_byte = CHAR_TO_BYTE (limit);
567 pos_byte -= BEGV_BYTE;
568 limit_byte -= BEGV_BYTE;
569 p1 = BEGV_ADDR;
570 s1 = GPT_BYTE - BEGV_BYTE;
571 p2 = GAP_END_ADDR;
572 s2 = ZV_BYTE - GPT_BYTE;
573 if (s1 < 0)
575 p2 = p1;
576 s2 = ZV_BYTE - BEGV_BYTE;
577 s1 = 0;
579 if (s2 < 0)
581 s1 = ZV_BYTE - BEGV_BYTE;
582 s2 = 0;
584 re_match_object = Qnil;
585 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
588 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
589 immediate_quit = 1;
590 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
591 pos_byte, NULL, limit_byte);
592 immediate_quit = 0;
594 return len;
598 /* The newline cache: remembering which sections of text have no newlines. */
600 /* If the user has requested newline caching, make sure it's on.
601 Otherwise, make sure it's off.
602 This is our cheezy way of associating an action with the change of
603 state of a buffer-local variable. */
604 static void
605 newline_cache_on_off (struct buffer *buf)
607 if (NILP (BVAR (buf, cache_long_line_scans)))
609 /* It should be off. */
610 if (buf->newline_cache)
612 free_region_cache (buf->newline_cache);
613 buf->newline_cache = 0;
616 else
618 /* It should be on. */
619 if (buf->newline_cache == 0)
620 buf->newline_cache = new_region_cache ();
625 /* Search for COUNT instances of the character TARGET between START and END.
627 If COUNT is positive, search forwards; END must be >= START.
628 If COUNT is negative, search backwards for the -COUNTth instance;
629 END must be <= START.
630 If COUNT is zero, do anything you please; run rogue, for all I care.
632 If END is zero, use BEGV or ZV instead, as appropriate for the
633 direction indicated by COUNT.
635 If we find COUNT instances, set *SHORTAGE to zero, and return the
636 position past the COUNTth match. Note that for reverse motion
637 this is not the same as the usual convention for Emacs motion commands.
639 If we don't find COUNT instances before reaching END, set *SHORTAGE
640 to the number of TARGETs left unfound, and return END.
642 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
643 except when inside redisplay. */
645 EMACS_INT
646 scan_buffer (register int target, EMACS_INT start, EMACS_INT end,
647 EMACS_INT count, int *shortage, int allow_quit)
649 struct region_cache *newline_cache;
650 int direction;
652 if (count > 0)
654 direction = 1;
655 if (! end) end = ZV;
657 else
659 direction = -1;
660 if (! end) end = BEGV;
663 newline_cache_on_off (current_buffer);
664 newline_cache = current_buffer->newline_cache;
666 if (shortage != 0)
667 *shortage = 0;
669 immediate_quit = allow_quit;
671 if (count > 0)
672 while (start != end)
674 /* Our innermost scanning loop is very simple; it doesn't know
675 about gaps, buffer ends, or the newline cache. ceiling is
676 the position of the last character before the next such
677 obstacle --- the last character the dumb search loop should
678 examine. */
679 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
680 EMACS_INT start_byte = CHAR_TO_BYTE (start);
681 EMACS_INT tem;
683 /* If we're looking for a newline, consult the newline cache
684 to see where we can avoid some scanning. */
685 if (target == '\n' && newline_cache)
687 EMACS_INT next_change;
688 immediate_quit = 0;
689 while (region_cache_forward
690 (current_buffer, newline_cache, start_byte, &next_change))
691 start_byte = next_change;
692 immediate_quit = allow_quit;
694 /* START should never be after END. */
695 if (start_byte > ceiling_byte)
696 start_byte = ceiling_byte;
698 /* Now the text after start is an unknown region, and
699 next_change is the position of the next known region. */
700 ceiling_byte = min (next_change - 1, ceiling_byte);
703 /* The dumb loop can only scan text stored in contiguous
704 bytes. BUFFER_CEILING_OF returns the last character
705 position that is contiguous, so the ceiling is the
706 position after that. */
707 tem = BUFFER_CEILING_OF (start_byte);
708 ceiling_byte = min (tem, ceiling_byte);
711 /* The termination address of the dumb loop. */
712 register unsigned char *ceiling_addr
713 = BYTE_POS_ADDR (ceiling_byte) + 1;
714 register unsigned char *cursor
715 = BYTE_POS_ADDR (start_byte);
716 unsigned char *base = cursor;
718 while (cursor < ceiling_addr)
720 unsigned char *scan_start = cursor;
722 /* The dumb loop. */
723 while (*cursor != target && ++cursor < ceiling_addr)
726 /* If we're looking for newlines, cache the fact that
727 the region from start to cursor is free of them. */
728 if (target == '\n' && newline_cache)
729 know_region_cache (current_buffer, newline_cache,
730 start_byte + scan_start - base,
731 start_byte + cursor - base);
733 /* Did we find the target character? */
734 if (cursor < ceiling_addr)
736 if (--count == 0)
738 immediate_quit = 0;
739 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
741 cursor++;
745 start = BYTE_TO_CHAR (start_byte + cursor - base);
748 else
749 while (start > end)
751 /* The last character to check before the next obstacle. */
752 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
753 EMACS_INT start_byte = CHAR_TO_BYTE (start);
754 EMACS_INT tem;
756 /* Consult the newline cache, if appropriate. */
757 if (target == '\n' && newline_cache)
759 EMACS_INT next_change;
760 immediate_quit = 0;
761 while (region_cache_backward
762 (current_buffer, newline_cache, start_byte, &next_change))
763 start_byte = next_change;
764 immediate_quit = allow_quit;
766 /* Start should never be at or before end. */
767 if (start_byte <= ceiling_byte)
768 start_byte = ceiling_byte + 1;
770 /* Now the text before start is an unknown region, and
771 next_change is the position of the next known region. */
772 ceiling_byte = max (next_change, ceiling_byte);
775 /* Stop scanning before the gap. */
776 tem = BUFFER_FLOOR_OF (start_byte - 1);
777 ceiling_byte = max (tem, ceiling_byte);
780 /* The termination address of the dumb loop. */
781 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
782 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
783 unsigned char *base = cursor;
785 while (cursor >= ceiling_addr)
787 unsigned char *scan_start = cursor;
789 while (*cursor != target && --cursor >= ceiling_addr)
792 /* If we're looking for newlines, cache the fact that
793 the region from after the cursor to start is free of them. */
794 if (target == '\n' && newline_cache)
795 know_region_cache (current_buffer, newline_cache,
796 start_byte + cursor - base,
797 start_byte + scan_start - base);
799 /* Did we find the target character? */
800 if (cursor >= ceiling_addr)
802 if (++count >= 0)
804 immediate_quit = 0;
805 return BYTE_TO_CHAR (start_byte + cursor - base);
807 cursor--;
811 start = BYTE_TO_CHAR (start_byte + cursor - base);
815 immediate_quit = 0;
816 if (shortage != 0)
817 *shortage = count * direction;
818 return start;
821 /* Search for COUNT instances of a line boundary, which means either a
822 newline or (if selective display enabled) a carriage return.
823 Start at START. If COUNT is negative, search backwards.
825 We report the resulting position by calling TEMP_SET_PT_BOTH.
827 If we find COUNT instances. we position after (always after,
828 even if scanning backwards) the COUNTth match, and return 0.
830 If we don't find COUNT instances before reaching the end of the
831 buffer (or the beginning, if scanning backwards), we return
832 the number of line boundaries left unfound, and position at
833 the limit we bumped up against.
835 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
836 except in special cases. */
838 EMACS_INT
839 scan_newline (EMACS_INT start, EMACS_INT start_byte,
840 EMACS_INT limit, EMACS_INT limit_byte,
841 register EMACS_INT count, int allow_quit)
843 int direction = ((count > 0) ? 1 : -1);
845 register unsigned char *cursor;
846 unsigned char *base;
848 EMACS_INT ceiling;
849 register unsigned char *ceiling_addr;
851 int old_immediate_quit = immediate_quit;
853 /* The code that follows is like scan_buffer
854 but checks for either newline or carriage return. */
856 if (allow_quit)
857 immediate_quit++;
859 start_byte = CHAR_TO_BYTE (start);
861 if (count > 0)
863 while (start_byte < limit_byte)
865 ceiling = BUFFER_CEILING_OF (start_byte);
866 ceiling = min (limit_byte - 1, ceiling);
867 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
868 base = (cursor = BYTE_POS_ADDR (start_byte));
869 while (1)
871 while (*cursor != '\n' && ++cursor != ceiling_addr)
874 if (cursor != ceiling_addr)
876 if (--count == 0)
878 immediate_quit = old_immediate_quit;
879 start_byte = start_byte + cursor - base + 1;
880 start = BYTE_TO_CHAR (start_byte);
881 TEMP_SET_PT_BOTH (start, start_byte);
882 return 0;
884 else
885 if (++cursor == ceiling_addr)
886 break;
888 else
889 break;
891 start_byte += cursor - base;
894 else
896 while (start_byte > limit_byte)
898 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
899 ceiling = max (limit_byte, ceiling);
900 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
901 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
902 while (1)
904 while (--cursor != ceiling_addr && *cursor != '\n')
907 if (cursor != ceiling_addr)
909 if (++count == 0)
911 immediate_quit = old_immediate_quit;
912 /* Return the position AFTER the match we found. */
913 start_byte = start_byte + cursor - base + 1;
914 start = BYTE_TO_CHAR (start_byte);
915 TEMP_SET_PT_BOTH (start, start_byte);
916 return 0;
919 else
920 break;
922 /* Here we add 1 to compensate for the last decrement
923 of CURSOR, which took it past the valid range. */
924 start_byte += cursor - base + 1;
928 TEMP_SET_PT_BOTH (limit, limit_byte);
929 immediate_quit = old_immediate_quit;
931 return count * direction;
934 EMACS_INT
935 find_next_newline_no_quit (EMACS_INT from, EMACS_INT cnt)
937 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
940 /* Like find_next_newline, but returns position before the newline,
941 not after, and only search up to TO. This isn't just
942 find_next_newline (...)-1, because you might hit TO. */
944 EMACS_INT
945 find_before_next_newline (EMACS_INT from, EMACS_INT to, EMACS_INT cnt)
947 int shortage;
948 EMACS_INT pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
950 if (shortage == 0)
951 pos--;
953 return pos;
956 /* Subroutines of Lisp buffer search functions. */
958 static Lisp_Object
959 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
960 Lisp_Object count, int direction, int RE, int posix)
962 register int np;
963 EMACS_INT lim, lim_byte;
964 int n = direction;
966 if (!NILP (count))
968 CHECK_NUMBER (count);
969 n *= XINT (count);
972 CHECK_STRING (string);
973 if (NILP (bound))
975 if (n > 0)
976 lim = ZV, lim_byte = ZV_BYTE;
977 else
978 lim = BEGV, lim_byte = BEGV_BYTE;
980 else
982 CHECK_NUMBER_COERCE_MARKER (bound);
983 lim = XINT (bound);
984 if (n > 0 ? lim < PT : lim > PT)
985 error ("Invalid search bound (wrong side of point)");
986 if (lim > ZV)
987 lim = ZV, lim_byte = ZV_BYTE;
988 else if (lim < BEGV)
989 lim = BEGV, lim_byte = BEGV_BYTE;
990 else
991 lim_byte = CHAR_TO_BYTE (lim);
994 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
995 XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
996 = BVAR (current_buffer, case_eqv_table);
998 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
999 (!NILP (BVAR (current_buffer, case_fold_search))
1000 ? BVAR (current_buffer, case_canon_table)
1001 : Qnil),
1002 (!NILP (BVAR (current_buffer, case_fold_search))
1003 ? BVAR (current_buffer, case_eqv_table)
1004 : Qnil),
1005 posix);
1006 if (np <= 0)
1008 if (NILP (noerror))
1009 xsignal1 (Qsearch_failed, string);
1011 if (!EQ (noerror, Qt))
1013 if (lim < BEGV || lim > ZV)
1014 abort ();
1015 SET_PT_BOTH (lim, lim_byte);
1016 return Qnil;
1017 #if 0 /* This would be clean, but maybe programs depend on
1018 a value of nil here. */
1019 np = lim;
1020 #endif
1022 else
1023 return Qnil;
1026 if (np < BEGV || np > ZV)
1027 abort ();
1029 SET_PT (np);
1031 return make_number (np);
1034 /* Return 1 if REGEXP it matches just one constant string. */
1036 static int
1037 trivial_regexp_p (Lisp_Object regexp)
1039 EMACS_INT len = SBYTES (regexp);
1040 unsigned char *s = SDATA (regexp);
1041 while (--len >= 0)
1043 switch (*s++)
1045 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1046 return 0;
1047 case '\\':
1048 if (--len < 0)
1049 return 0;
1050 switch (*s++)
1052 case '|': case '(': case ')': case '`': case '\'': case 'b':
1053 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1054 case 'S': case '=': case '{': case '}': case '_':
1055 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1056 case '1': case '2': case '3': case '4': case '5':
1057 case '6': case '7': case '8': case '9':
1058 return 0;
1062 return 1;
1065 /* Search for the n'th occurrence of STRING in the current buffer,
1066 starting at position POS and stopping at position LIM,
1067 treating STRING as a literal string if RE is false or as
1068 a regular expression if RE is true.
1070 If N is positive, searching is forward and LIM must be greater than POS.
1071 If N is negative, searching is backward and LIM must be less than POS.
1073 Returns -x if x occurrences remain to be found (x > 0),
1074 or else the position at the beginning of the Nth occurrence
1075 (if searching backward) or the end (if searching forward).
1077 POSIX is nonzero if we want full backtracking (POSIX style)
1078 for this pattern. 0 means backtrack only enough to get a valid match. */
1080 #define TRANSLATE(out, trt, d) \
1081 do \
1083 if (! NILP (trt)) \
1085 Lisp_Object temp; \
1086 temp = Faref (trt, make_number (d)); \
1087 if (INTEGERP (temp)) \
1088 out = XINT (temp); \
1089 else \
1090 out = d; \
1092 else \
1093 out = d; \
1095 while (0)
1097 /* Only used in search_buffer, to record the end position of the match
1098 when searching regexps and SEARCH_REGS should not be changed
1099 (i.e. Vinhibit_changing_match_data is non-nil). */
1100 static struct re_registers search_regs_1;
1102 static EMACS_INT
1103 search_buffer (Lisp_Object string, EMACS_INT pos, EMACS_INT pos_byte,
1104 EMACS_INT lim, EMACS_INT lim_byte, EMACS_INT n,
1105 int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1107 EMACS_INT len = SCHARS (string);
1108 EMACS_INT len_byte = SBYTES (string);
1109 register int i;
1111 if (running_asynch_code)
1112 save_search_regs ();
1114 /* Searching 0 times means don't move. */
1115 /* Null string is found at starting position. */
1116 if (len == 0 || n == 0)
1118 set_search_regs (pos_byte, 0);
1119 return pos;
1122 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1124 unsigned char *p1, *p2;
1125 EMACS_INT s1, s2;
1126 struct re_pattern_buffer *bufp;
1128 bufp = compile_pattern (string,
1129 (NILP (Vinhibit_changing_match_data)
1130 ? &search_regs : &search_regs_1),
1131 trt, posix,
1132 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1134 immediate_quit = 1; /* Quit immediately if user types ^G,
1135 because letting this function finish
1136 can take too long. */
1137 QUIT; /* Do a pending quit right away,
1138 to avoid paradoxical behavior */
1139 /* Get pointers and sizes of the two strings
1140 that make up the visible portion of the buffer. */
1142 p1 = BEGV_ADDR;
1143 s1 = GPT_BYTE - BEGV_BYTE;
1144 p2 = GAP_END_ADDR;
1145 s2 = ZV_BYTE - GPT_BYTE;
1146 if (s1 < 0)
1148 p2 = p1;
1149 s2 = ZV_BYTE - BEGV_BYTE;
1150 s1 = 0;
1152 if (s2 < 0)
1154 s1 = ZV_BYTE - BEGV_BYTE;
1155 s2 = 0;
1157 re_match_object = Qnil;
1159 while (n < 0)
1161 EMACS_INT val;
1162 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1163 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1164 (NILP (Vinhibit_changing_match_data)
1165 ? &search_regs : &search_regs_1),
1166 /* Don't allow match past current point */
1167 pos_byte - BEGV_BYTE);
1168 if (val == -2)
1170 matcher_overflow ();
1172 if (val >= 0)
1174 if (NILP (Vinhibit_changing_match_data))
1176 pos_byte = search_regs.start[0] + BEGV_BYTE;
1177 for (i = 0; i < search_regs.num_regs; i++)
1178 if (search_regs.start[i] >= 0)
1180 search_regs.start[i]
1181 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1182 search_regs.end[i]
1183 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1185 XSETBUFFER (last_thing_searched, current_buffer);
1186 /* Set pos to the new position. */
1187 pos = search_regs.start[0];
1189 else
1191 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1192 /* Set pos to the new position. */
1193 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1196 else
1198 immediate_quit = 0;
1199 return (n);
1201 n++;
1203 while (n > 0)
1205 EMACS_INT val;
1206 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1207 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1208 (NILP (Vinhibit_changing_match_data)
1209 ? &search_regs : &search_regs_1),
1210 lim_byte - BEGV_BYTE);
1211 if (val == -2)
1213 matcher_overflow ();
1215 if (val >= 0)
1217 if (NILP (Vinhibit_changing_match_data))
1219 pos_byte = search_regs.end[0] + BEGV_BYTE;
1220 for (i = 0; i < search_regs.num_regs; i++)
1221 if (search_regs.start[i] >= 0)
1223 search_regs.start[i]
1224 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1225 search_regs.end[i]
1226 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1228 XSETBUFFER (last_thing_searched, current_buffer);
1229 pos = search_regs.end[0];
1231 else
1233 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1234 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1237 else
1239 immediate_quit = 0;
1240 return (0 - n);
1242 n--;
1244 immediate_quit = 0;
1245 return (pos);
1247 else /* non-RE case */
1249 unsigned char *raw_pattern, *pat;
1250 EMACS_INT raw_pattern_size;
1251 EMACS_INT raw_pattern_size_byte;
1252 unsigned char *patbuf;
1253 int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1254 unsigned char *base_pat;
1255 /* Set to positive if we find a non-ASCII char that need
1256 translation. Otherwise set to zero later. */
1257 int char_base = -1;
1258 int boyer_moore_ok = 1;
1260 /* MULTIBYTE says whether the text to be searched is multibyte.
1261 We must convert PATTERN to match that, or we will not really
1262 find things right. */
1264 if (multibyte == STRING_MULTIBYTE (string))
1266 raw_pattern = SDATA (string);
1267 raw_pattern_size = SCHARS (string);
1268 raw_pattern_size_byte = SBYTES (string);
1270 else if (multibyte)
1272 raw_pattern_size = SCHARS (string);
1273 raw_pattern_size_byte
1274 = count_size_as_multibyte (SDATA (string),
1275 raw_pattern_size);
1276 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1277 copy_text (SDATA (string), raw_pattern,
1278 SCHARS (string), 0, 1);
1280 else
1282 /* Converting multibyte to single-byte.
1284 ??? Perhaps this conversion should be done in a special way
1285 by subtracting nonascii-insert-offset from each non-ASCII char,
1286 so that only the multibyte chars which really correspond to
1287 the chosen single-byte character set can possibly match. */
1288 raw_pattern_size = SCHARS (string);
1289 raw_pattern_size_byte = SCHARS (string);
1290 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1291 copy_text (SDATA (string), raw_pattern,
1292 SBYTES (string), 1, 0);
1295 /* Copy and optionally translate the pattern. */
1296 len = raw_pattern_size;
1297 len_byte = raw_pattern_size_byte;
1298 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1299 pat = patbuf;
1300 base_pat = raw_pattern;
1301 if (multibyte)
1303 /* Fill patbuf by translated characters in STRING while
1304 checking if we can use boyer-moore search. If TRT is
1305 non-nil, we can use boyer-moore search only if TRT can be
1306 represented by the byte array of 256 elements. For that,
1307 all non-ASCII case-equivalents of all case-senstive
1308 characters in STRING must belong to the same charset and
1309 row. */
1311 while (--len >= 0)
1313 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1314 int c, translated, inverse;
1315 int in_charlen, charlen;
1317 /* If we got here and the RE flag is set, it's because we're
1318 dealing with a regexp known to be trivial, so the backslash
1319 just quotes the next character. */
1320 if (RE && *base_pat == '\\')
1322 len--;
1323 raw_pattern_size--;
1324 len_byte--;
1325 base_pat++;
1328 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1330 if (NILP (trt))
1332 str = base_pat;
1333 charlen = in_charlen;
1335 else
1337 /* Translate the character. */
1338 TRANSLATE (translated, trt, c);
1339 charlen = CHAR_STRING (translated, str_base);
1340 str = str_base;
1342 /* Check if C has any other case-equivalents. */
1343 TRANSLATE (inverse, inverse_trt, c);
1344 /* If so, check if we can use boyer-moore. */
1345 if (c != inverse && boyer_moore_ok)
1347 /* Check if all equivalents belong to the same
1348 group of characters. Note that the check of C
1349 itself is done by the last iteration. */
1350 int this_char_base = -1;
1352 while (boyer_moore_ok)
1354 if (ASCII_BYTE_P (inverse))
1356 if (this_char_base > 0)
1357 boyer_moore_ok = 0;
1358 else
1359 this_char_base = 0;
1361 else if (CHAR_BYTE8_P (inverse))
1362 /* Boyer-moore search can't handle a
1363 translation of an eight-bit
1364 character. */
1365 boyer_moore_ok = 0;
1366 else if (this_char_base < 0)
1368 this_char_base = inverse & ~0x3F;
1369 if (char_base < 0)
1370 char_base = this_char_base;
1371 else if (this_char_base != char_base)
1372 boyer_moore_ok = 0;
1374 else if ((inverse & ~0x3F) != this_char_base)
1375 boyer_moore_ok = 0;
1376 if (c == inverse)
1377 break;
1378 TRANSLATE (inverse, inverse_trt, inverse);
1383 /* Store this character into the translated pattern. */
1384 memcpy (pat, str, charlen);
1385 pat += charlen;
1386 base_pat += in_charlen;
1387 len_byte -= in_charlen;
1390 /* If char_base is still negative we didn't find any translated
1391 non-ASCII characters. */
1392 if (char_base < 0)
1393 char_base = 0;
1395 else
1397 /* Unibyte buffer. */
1398 char_base = 0;
1399 while (--len >= 0)
1401 int c, translated;
1403 /* If we got here and the RE flag is set, it's because we're
1404 dealing with a regexp known to be trivial, so the backslash
1405 just quotes the next character. */
1406 if (RE && *base_pat == '\\')
1408 len--;
1409 raw_pattern_size--;
1410 base_pat++;
1412 c = *base_pat++;
1413 TRANSLATE (translated, trt, c);
1414 *pat++ = translated;
1418 len_byte = pat - patbuf;
1419 len = raw_pattern_size;
1420 pat = base_pat = patbuf;
1422 if (boyer_moore_ok)
1423 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1424 pos, pos_byte, lim, lim_byte,
1425 char_base);
1426 else
1427 return simple_search (n, pat, len, len_byte, trt,
1428 pos, pos_byte, lim, lim_byte);
1432 /* Do a simple string search N times for the string PAT,
1433 whose length is LEN/LEN_BYTE,
1434 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1435 TRT is the translation table.
1437 Return the character position where the match is found.
1438 Otherwise, if M matches remained to be found, return -M.
1440 This kind of search works regardless of what is in PAT and
1441 regardless of what is in TRT. It is used in cases where
1442 boyer_moore cannot work. */
1444 static EMACS_INT
1445 simple_search (EMACS_INT n, unsigned char *pat,
1446 EMACS_INT len, EMACS_INT len_byte, Lisp_Object trt,
1447 EMACS_INT pos, EMACS_INT pos_byte,
1448 EMACS_INT lim, EMACS_INT lim_byte)
1450 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1451 int forward = n > 0;
1452 /* Number of buffer bytes matched. Note that this may be different
1453 from len_byte in a multibyte buffer. */
1454 EMACS_INT match_byte;
1456 if (lim > pos && multibyte)
1457 while (n > 0)
1459 while (1)
1461 /* Try matching at position POS. */
1462 EMACS_INT this_pos = pos;
1463 EMACS_INT this_pos_byte = pos_byte;
1464 EMACS_INT this_len = len;
1465 unsigned char *p = pat;
1466 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1467 goto stop;
1469 while (this_len > 0)
1471 int charlen, buf_charlen;
1472 int pat_ch, buf_ch;
1474 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1475 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1476 buf_charlen);
1477 TRANSLATE (buf_ch, trt, buf_ch);
1479 if (buf_ch != pat_ch)
1480 break;
1482 this_len--;
1483 p += charlen;
1485 this_pos_byte += buf_charlen;
1486 this_pos++;
1489 if (this_len == 0)
1491 match_byte = this_pos_byte - pos_byte;
1492 pos += len;
1493 pos_byte += match_byte;
1494 break;
1497 INC_BOTH (pos, pos_byte);
1500 n--;
1502 else if (lim > pos)
1503 while (n > 0)
1505 while (1)
1507 /* Try matching at position POS. */
1508 EMACS_INT this_pos = pos;
1509 EMACS_INT this_len = len;
1510 unsigned char *p = pat;
1512 if (pos + len > lim)
1513 goto stop;
1515 while (this_len > 0)
1517 int pat_ch = *p++;
1518 int buf_ch = FETCH_BYTE (this_pos);
1519 TRANSLATE (buf_ch, trt, buf_ch);
1521 if (buf_ch != pat_ch)
1522 break;
1524 this_len--;
1525 this_pos++;
1528 if (this_len == 0)
1530 match_byte = len;
1531 pos += len;
1532 break;
1535 pos++;
1538 n--;
1540 /* Backwards search. */
1541 else if (lim < pos && multibyte)
1542 while (n < 0)
1544 while (1)
1546 /* Try matching at position POS. */
1547 EMACS_INT this_pos = pos;
1548 EMACS_INT this_pos_byte = pos_byte;
1549 EMACS_INT this_len = len;
1550 const unsigned char *p = pat + len_byte;
1552 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1553 goto stop;
1555 while (this_len > 0)
1557 int charlen;
1558 int pat_ch, buf_ch;
1560 DEC_BOTH (this_pos, this_pos_byte);
1561 PREV_CHAR_BOUNDARY (p, pat);
1562 pat_ch = STRING_CHAR (p);
1563 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1564 TRANSLATE (buf_ch, trt, buf_ch);
1566 if (buf_ch != pat_ch)
1567 break;
1569 this_len--;
1572 if (this_len == 0)
1574 match_byte = pos_byte - this_pos_byte;
1575 pos = this_pos;
1576 pos_byte = this_pos_byte;
1577 break;
1580 DEC_BOTH (pos, pos_byte);
1583 n++;
1585 else if (lim < pos)
1586 while (n < 0)
1588 while (1)
1590 /* Try matching at position POS. */
1591 EMACS_INT this_pos = pos - len;
1592 EMACS_INT this_len = len;
1593 unsigned char *p = pat;
1595 if (this_pos < lim)
1596 goto stop;
1598 while (this_len > 0)
1600 int pat_ch = *p++;
1601 int buf_ch = FETCH_BYTE (this_pos);
1602 TRANSLATE (buf_ch, trt, buf_ch);
1604 if (buf_ch != pat_ch)
1605 break;
1606 this_len--;
1607 this_pos++;
1610 if (this_len == 0)
1612 match_byte = len;
1613 pos -= len;
1614 break;
1617 pos--;
1620 n++;
1623 stop:
1624 if (n == 0)
1626 if (forward)
1627 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1628 else
1629 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1631 return pos;
1633 else if (n > 0)
1634 return -n;
1635 else
1636 return n;
1639 /* Do Boyer-Moore search N times for the string BASE_PAT,
1640 whose length is LEN/LEN_BYTE,
1641 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1642 DIRECTION says which direction we search in.
1643 TRT and INVERSE_TRT are translation tables.
1644 Characters in PAT are already translated by TRT.
1646 This kind of search works if all the characters in BASE_PAT that
1647 have nontrivial translation are the same aside from the last byte.
1648 This makes it possible to translate just the last byte of a
1649 character, and do so after just a simple test of the context.
1650 CHAR_BASE is nonzero if there is such a non-ASCII character.
1652 If that criterion is not satisfied, do not call this function. */
1654 static EMACS_INT
1655 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1656 EMACS_INT len, EMACS_INT len_byte,
1657 Lisp_Object trt, Lisp_Object inverse_trt,
1658 EMACS_INT pos, EMACS_INT pos_byte,
1659 EMACS_INT lim, EMACS_INT lim_byte, int char_base)
1661 int direction = ((n > 0) ? 1 : -1);
1662 register EMACS_INT dirlen;
1663 EMACS_INT limit;
1664 int stride_for_teases = 0;
1665 int BM_tab[0400];
1666 register unsigned char *cursor, *p_limit;
1667 register EMACS_INT i;
1668 register int j;
1669 unsigned char *pat, *pat_end;
1670 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1672 unsigned char simple_translate[0400];
1673 /* These are set to the preceding bytes of a byte to be translated
1674 if char_base is nonzero. As the maximum byte length of a
1675 multibyte character is 5, we have to check at most four previous
1676 bytes. */
1677 int translate_prev_byte1 = 0;
1678 int translate_prev_byte2 = 0;
1679 int translate_prev_byte3 = 0;
1680 int translate_prev_byte4 = 0;
1682 /* The general approach is that we are going to maintain that we know
1683 the first (closest to the present position, in whatever direction
1684 we're searching) character that could possibly be the last
1685 (furthest from present position) character of a valid match. We
1686 advance the state of our knowledge by looking at that character
1687 and seeing whether it indeed matches the last character of the
1688 pattern. If it does, we take a closer look. If it does not, we
1689 move our pointer (to putative last characters) as far as is
1690 logically possible. This amount of movement, which I call a
1691 stride, will be the length of the pattern if the actual character
1692 appears nowhere in the pattern, otherwise it will be the distance
1693 from the last occurrence of that character to the end of the
1694 pattern. If the amount is zero we have a possible match. */
1696 /* Here we make a "mickey mouse" BM table. The stride of the search
1697 is determined only by the last character of the putative match.
1698 If that character does not match, we will stride the proper
1699 distance to propose a match that superimposes it on the last
1700 instance of a character that matches it (per trt), or misses
1701 it entirely if there is none. */
1703 dirlen = len_byte * direction;
1705 /* Record position after the end of the pattern. */
1706 pat_end = base_pat + len_byte;
1707 /* BASE_PAT points to a character that we start scanning from.
1708 It is the first character in a forward search,
1709 the last character in a backward search. */
1710 if (direction < 0)
1711 base_pat = pat_end - 1;
1713 /* A character that does not appear in the pattern induces a
1714 stride equal to the pattern length. */
1715 for (i = 0; i < 0400; i++)
1716 BM_tab[i] = dirlen;
1718 /* We use this for translation, instead of TRT itself.
1719 We fill this in to handle the characters that actually
1720 occur in the pattern. Others don't matter anyway! */
1721 for (i = 0; i < 0400; i++)
1722 simple_translate[i] = i;
1724 if (char_base)
1726 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1727 byte following them are the target of translation. */
1728 unsigned char str[MAX_MULTIBYTE_LENGTH];
1729 int len = CHAR_STRING (char_base, str);
1731 translate_prev_byte1 = str[len - 2];
1732 if (len > 2)
1734 translate_prev_byte2 = str[len - 3];
1735 if (len > 3)
1737 translate_prev_byte3 = str[len - 4];
1738 if (len > 4)
1739 translate_prev_byte4 = str[len - 5];
1744 i = 0;
1745 while (i != dirlen)
1747 unsigned char *ptr = base_pat + i;
1748 i += direction;
1749 if (! NILP (trt))
1751 /* If the byte currently looking at is the last of a
1752 character to check case-equivalents, set CH to that
1753 character. An ASCII character and a non-ASCII character
1754 matching with CHAR_BASE are to be checked. */
1755 int ch = -1;
1757 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1758 ch = *ptr;
1759 else if (char_base
1760 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1762 unsigned char *charstart = ptr - 1;
1764 while (! (CHAR_HEAD_P (*charstart)))
1765 charstart--;
1766 ch = STRING_CHAR (charstart);
1767 if (char_base != (ch & ~0x3F))
1768 ch = -1;
1771 if (ch >= 0200)
1772 j = (ch & 0x3F) | 0200;
1773 else
1774 j = *ptr;
1776 if (i == dirlen)
1777 stride_for_teases = BM_tab[j];
1779 BM_tab[j] = dirlen - i;
1780 /* A translation table is accompanied by its inverse -- see */
1781 /* comment following downcase_table for details */
1782 if (ch >= 0)
1784 int starting_ch = ch;
1785 int starting_j = j;
1787 while (1)
1789 TRANSLATE (ch, inverse_trt, ch);
1790 if (ch >= 0200)
1791 j = (ch & 0x3F) | 0200;
1792 else
1793 j = ch;
1795 /* For all the characters that map into CH,
1796 set up simple_translate to map the last byte
1797 into STARTING_J. */
1798 simple_translate[j] = starting_j;
1799 if (ch == starting_ch)
1800 break;
1801 BM_tab[j] = dirlen - i;
1805 else
1807 j = *ptr;
1809 if (i == dirlen)
1810 stride_for_teases = BM_tab[j];
1811 BM_tab[j] = dirlen - i;
1813 /* stride_for_teases tells how much to stride if we get a
1814 match on the far character but are subsequently
1815 disappointed, by recording what the stride would have been
1816 for that character if the last character had been
1817 different. */
1819 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1820 /* loop invariant - POS_BYTE points at where last char (first
1821 char if reverse) of pattern would align in a possible match. */
1822 while (n != 0)
1824 EMACS_INT tail_end;
1825 unsigned char *tail_end_ptr;
1827 /* It's been reported that some (broken) compiler thinks that
1828 Boolean expressions in an arithmetic context are unsigned.
1829 Using an explicit ?1:0 prevents this. */
1830 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1831 < 0)
1832 return (n * (0 - direction));
1833 /* First we do the part we can by pointers (maybe nothing) */
1834 QUIT;
1835 pat = base_pat;
1836 limit = pos_byte - dirlen + direction;
1837 if (direction > 0)
1839 limit = BUFFER_CEILING_OF (limit);
1840 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1841 can take on without hitting edge of buffer or the gap. */
1842 limit = min (limit, pos_byte + 20000);
1843 limit = min (limit, lim_byte - 1);
1845 else
1847 limit = BUFFER_FLOOR_OF (limit);
1848 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1849 can take on without hitting edge of buffer or the gap. */
1850 limit = max (limit, pos_byte - 20000);
1851 limit = max (limit, lim_byte);
1853 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1854 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1856 if ((limit - pos_byte) * direction > 20)
1858 unsigned char *p2;
1860 p_limit = BYTE_POS_ADDR (limit);
1861 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1862 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1863 while (1) /* use one cursor setting as long as i can */
1865 if (direction > 0) /* worth duplicating */
1867 while (cursor <= p_limit)
1869 if (BM_tab[*cursor] == 0)
1870 goto hit;
1871 cursor += BM_tab[*cursor];
1874 else
1876 while (cursor >= p_limit)
1878 if (BM_tab[*cursor] == 0)
1879 goto hit;
1880 cursor += BM_tab[*cursor];
1883 /* If you are here, cursor is beyond the end of the
1884 searched region. You fail to match within the
1885 permitted region and would otherwise try a character
1886 beyond that region. */
1887 break;
1889 hit:
1890 i = dirlen - direction;
1891 if (! NILP (trt))
1893 while ((i -= direction) + direction != 0)
1895 int ch;
1896 cursor -= direction;
1897 /* Translate only the last byte of a character. */
1898 if (! multibyte
1899 || ((cursor == tail_end_ptr
1900 || CHAR_HEAD_P (cursor[1]))
1901 && (CHAR_HEAD_P (cursor[0])
1902 /* Check if this is the last byte of
1903 a translable character. */
1904 || (translate_prev_byte1 == cursor[-1]
1905 && (CHAR_HEAD_P (translate_prev_byte1)
1906 || (translate_prev_byte2 == cursor[-2]
1907 && (CHAR_HEAD_P (translate_prev_byte2)
1908 || (translate_prev_byte3 == cursor[-3]))))))))
1909 ch = simple_translate[*cursor];
1910 else
1911 ch = *cursor;
1912 if (pat[i] != ch)
1913 break;
1916 else
1918 while ((i -= direction) + direction != 0)
1920 cursor -= direction;
1921 if (pat[i] != *cursor)
1922 break;
1925 cursor += dirlen - i - direction; /* fix cursor */
1926 if (i + direction == 0)
1928 EMACS_INT position, start, end;
1930 cursor -= direction;
1932 position = pos_byte + cursor - p2 + ((direction > 0)
1933 ? 1 - len_byte : 0);
1934 set_search_regs (position, len_byte);
1936 if (NILP (Vinhibit_changing_match_data))
1938 start = search_regs.start[0];
1939 end = search_regs.end[0];
1941 else
1942 /* If Vinhibit_changing_match_data is non-nil,
1943 search_regs will not be changed. So let's
1944 compute start and end here. */
1946 start = BYTE_TO_CHAR (position);
1947 end = BYTE_TO_CHAR (position + len_byte);
1950 if ((n -= direction) != 0)
1951 cursor += dirlen; /* to resume search */
1952 else
1953 return direction > 0 ? end : start;
1955 else
1956 cursor += stride_for_teases; /* <sigh> we lose - */
1958 pos_byte += cursor - p2;
1960 else
1961 /* Now we'll pick up a clump that has to be done the hard
1962 way because it covers a discontinuity. */
1964 limit = ((direction > 0)
1965 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1966 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1967 limit = ((direction > 0)
1968 ? min (limit + len_byte, lim_byte - 1)
1969 : max (limit - len_byte, lim_byte));
1970 /* LIMIT is now the last value POS_BYTE can have
1971 and still be valid for a possible match. */
1972 while (1)
1974 /* This loop can be coded for space rather than
1975 speed because it will usually run only once.
1976 (the reach is at most len + 21, and typically
1977 does not exceed len). */
1978 while ((limit - pos_byte) * direction >= 0)
1980 int ch = FETCH_BYTE (pos_byte);
1981 if (BM_tab[ch] == 0)
1982 goto hit2;
1983 pos_byte += BM_tab[ch];
1985 break; /* ran off the end */
1987 hit2:
1988 /* Found what might be a match. */
1989 i = dirlen - direction;
1990 while ((i -= direction) + direction != 0)
1992 int ch;
1993 unsigned char *ptr;
1994 pos_byte -= direction;
1995 ptr = BYTE_POS_ADDR (pos_byte);
1996 /* Translate only the last byte of a character. */
1997 if (! multibyte
1998 || ((ptr == tail_end_ptr
1999 || CHAR_HEAD_P (ptr[1]))
2000 && (CHAR_HEAD_P (ptr[0])
2001 /* Check if this is the last byte of a
2002 translable character. */
2003 || (translate_prev_byte1 == ptr[-1]
2004 && (CHAR_HEAD_P (translate_prev_byte1)
2005 || (translate_prev_byte2 == ptr[-2]
2006 && (CHAR_HEAD_P (translate_prev_byte2)
2007 || translate_prev_byte3 == ptr[-3])))))))
2008 ch = simple_translate[*ptr];
2009 else
2010 ch = *ptr;
2011 if (pat[i] != ch)
2012 break;
2014 /* Above loop has moved POS_BYTE part or all the way
2015 back to the first pos (last pos if reverse).
2016 Set it once again at the last (first if reverse) char. */
2017 pos_byte += dirlen - i - direction;
2018 if (i + direction == 0)
2020 EMACS_INT position, start, end;
2021 pos_byte -= direction;
2023 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2024 set_search_regs (position, len_byte);
2026 if (NILP (Vinhibit_changing_match_data))
2028 start = search_regs.start[0];
2029 end = search_regs.end[0];
2031 else
2032 /* If Vinhibit_changing_match_data is non-nil,
2033 search_regs will not be changed. So let's
2034 compute start and end here. */
2036 start = BYTE_TO_CHAR (position);
2037 end = BYTE_TO_CHAR (position + len_byte);
2040 if ((n -= direction) != 0)
2041 pos_byte += dirlen; /* to resume search */
2042 else
2043 return direction > 0 ? end : start;
2045 else
2046 pos_byte += stride_for_teases;
2049 /* We have done one clump. Can we continue? */
2050 if ((lim_byte - pos_byte) * direction < 0)
2051 return ((0 - n) * direction);
2053 return BYTE_TO_CHAR (pos_byte);
2056 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2057 for the overall match just found in the current buffer.
2058 Also clear out the match data for registers 1 and up. */
2060 static void
2061 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2063 int i;
2065 if (!NILP (Vinhibit_changing_match_data))
2066 return;
2068 /* Make sure we have registers in which to store
2069 the match position. */
2070 if (search_regs.num_regs == 0)
2072 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2073 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2074 search_regs.num_regs = 2;
2077 /* Clear out the other registers. */
2078 for (i = 1; i < search_regs.num_regs; i++)
2080 search_regs.start[i] = -1;
2081 search_regs.end[i] = -1;
2084 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2085 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2086 XSETBUFFER (last_thing_searched, current_buffer);
2089 /* Given STRING, a string of words separated by word delimiters,
2090 compute a regexp that matches those exact words separated by
2091 arbitrary punctuation. If LAX is nonzero, the end of the string
2092 need not match a word boundary unless it ends in whitespace. */
2094 static Lisp_Object
2095 wordify (Lisp_Object string, int lax)
2097 register unsigned char *p, *o;
2098 register EMACS_INT i, i_byte, len, punct_count = 0, word_count = 0;
2099 Lisp_Object val;
2100 int prev_c = 0;
2101 EMACS_INT adjust;
2102 int whitespace_at_end;
2104 CHECK_STRING (string);
2105 p = SDATA (string);
2106 len = SCHARS (string);
2108 for (i = 0, i_byte = 0; i < len; )
2110 int c;
2112 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2114 if (SYNTAX (c) != Sword)
2116 punct_count++;
2117 if (i > 0 && SYNTAX (prev_c) == Sword)
2118 word_count++;
2121 prev_c = c;
2124 if (SYNTAX (prev_c) == Sword)
2126 word_count++;
2127 whitespace_at_end = 0;
2129 else
2130 whitespace_at_end = 1;
2132 if (!word_count)
2133 return empty_unibyte_string;
2135 adjust = - punct_count + 5 * (word_count - 1)
2136 + ((lax && !whitespace_at_end) ? 2 : 4);
2137 if (STRING_MULTIBYTE (string))
2138 val = make_uninit_multibyte_string (len + adjust,
2139 SBYTES (string)
2140 + adjust);
2141 else
2142 val = make_uninit_string (len + adjust);
2144 o = SDATA (val);
2145 *o++ = '\\';
2146 *o++ = 'b';
2147 prev_c = 0;
2149 for (i = 0, i_byte = 0; i < len; )
2151 int c;
2152 EMACS_INT i_byte_orig = i_byte;
2154 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2156 if (SYNTAX (c) == Sword)
2158 memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2159 o += i_byte - i_byte_orig;
2161 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2163 *o++ = '\\';
2164 *o++ = 'W';
2165 *o++ = '\\';
2166 *o++ = 'W';
2167 *o++ = '*';
2170 prev_c = c;
2173 if (!lax || whitespace_at_end)
2175 *o++ = '\\';
2176 *o++ = 'b';
2179 return val;
2182 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2183 "MSearch backward: ",
2184 doc: /* Search backward from point for STRING.
2185 Set point to the beginning of the occurrence found, and return point.
2186 An optional second argument bounds the search; it is a buffer position.
2187 The match found must not extend before that position.
2188 Optional third argument, if t, means if fail just return nil (no error).
2189 If not nil and not t, position at limit of search and return nil.
2190 Optional fourth argument is repeat count--search for successive occurrences.
2192 Search case-sensitivity is determined by the value of the variable
2193 `case-fold-search', which see.
2195 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2196 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2198 return search_command (string, bound, noerror, count, -1, 0, 0);
2201 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2202 doc: /* Search forward from point for STRING.
2203 Set point to the end of the occurrence found, and return point.
2204 An optional second argument bounds the search; it is a buffer position.
2205 The match found must not extend after that position. A value of nil is
2206 equivalent to (point-max).
2207 Optional third argument, if t, means if fail just return nil (no error).
2208 If not nil and not t, move to limit of search and return nil.
2209 Optional fourth argument is repeat count--search for successive occurrences.
2211 Search case-sensitivity is determined by the value of the variable
2212 `case-fold-search', which see.
2214 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2215 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2217 return search_command (string, bound, noerror, count, 1, 0, 0);
2220 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2221 "sWord search backward: ",
2222 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2223 Set point to the beginning of the occurrence found, and return point.
2224 An optional second argument bounds the search; it is a buffer position.
2225 The match found must not extend before that position.
2226 Optional third argument, if t, means if fail just return nil (no error).
2227 If not nil and not t, move to limit of search and return nil.
2228 Optional fourth argument is repeat count--search for successive occurrences. */)
2229 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2231 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2234 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2235 "sWord search: ",
2236 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2237 Set point to the end of the occurrence found, and return point.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must not extend after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241 If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences. */)
2243 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2245 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2248 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2249 "sWord search backward: ",
2250 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2251 Set point to the beginning of the occurrence found, and return point.
2253 Unlike `word-search-backward', the end of STRING need not match a word
2254 boundary unless it ends in whitespace.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend before that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259 If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences. */)
2261 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2263 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2266 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2267 "sWord search: ",
2268 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2269 Set point to the end of the occurrence found, and return point.
2271 Unlike `word-search-forward', the end of STRING need not match a word
2272 boundary unless it ends in whitespace.
2274 An optional second argument bounds the search; it is a buffer position.
2275 The match found must not extend after that position.
2276 Optional third argument, if t, means if fail just return nil (no error).
2277 If not nil and not t, move to limit of search and return nil.
2278 Optional fourth argument is repeat count--search for successive occurrences. */)
2279 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2281 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2284 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2285 "sRE search backward: ",
2286 doc: /* Search backward from point for match for regular expression REGEXP.
2287 Set point to the beginning of the match, and return point.
2288 The match found is the one starting last in the buffer
2289 and yet ending before the origin of the search.
2290 An optional second argument bounds the search; it is a buffer position.
2291 The match found must start at or after that position.
2292 Optional third argument, if t, means if fail just return nil (no error).
2293 If not nil and not t, move to limit of search and return nil.
2294 Optional fourth argument is repeat count--search for successive occurrences.
2295 See also the functions `match-beginning', `match-end', `match-string',
2296 and `replace-match'. */)
2297 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2299 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2302 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2303 "sRE search: ",
2304 doc: /* Search forward from point for regular expression REGEXP.
2305 Set point to the end of the occurrence found, and return point.
2306 An optional second argument bounds the search; it is a buffer position.
2307 The match found must not extend after that position.
2308 Optional third argument, if t, means if fail just return nil (no error).
2309 If not nil and not t, move to limit of search and return nil.
2310 Optional fourth argument is repeat count--search for successive occurrences.
2311 See also the functions `match-beginning', `match-end', `match-string',
2312 and `replace-match'. */)
2313 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2315 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2318 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2319 "sPosix search backward: ",
2320 doc: /* Search backward from point for match for regular expression REGEXP.
2321 Find the longest match in accord with Posix regular expression rules.
2322 Set point to the beginning of the match, and return point.
2323 The match found is the one starting last in the buffer
2324 and yet ending before the origin of the search.
2325 An optional second argument bounds the search; it is a buffer position.
2326 The match found must start at or after that position.
2327 Optional third argument, if t, means if fail just return nil (no error).
2328 If not nil and not t, move to limit of search and return nil.
2329 Optional fourth argument is repeat count--search for successive occurrences.
2330 See also the functions `match-beginning', `match-end', `match-string',
2331 and `replace-match'. */)
2332 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2334 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2337 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2338 "sPosix search: ",
2339 doc: /* Search forward from point for regular expression REGEXP.
2340 Find the longest match in accord with Posix regular expression rules.
2341 Set point to the end of the occurrence found, and return point.
2342 An optional second argument bounds the search; it is a buffer position.
2343 The match found must not extend after that position.
2344 Optional third argument, if t, means if fail just return nil (no error).
2345 If not nil and not t, move to limit of search and return nil.
2346 Optional fourth argument is repeat count--search for successive occurrences.
2347 See also the functions `match-beginning', `match-end', `match-string',
2348 and `replace-match'. */)
2349 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2351 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2354 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2355 doc: /* Replace text matched by last search with NEWTEXT.
2356 Leave point at the end of the replacement text.
2358 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2359 Otherwise maybe capitalize the whole text, or maybe just word initials,
2360 based on the replaced text.
2361 If the replaced text has only capital letters
2362 and has at least one multiletter word, convert NEWTEXT to all caps.
2363 Otherwise if all words are capitalized in the replaced text,
2364 capitalize each word in NEWTEXT.
2366 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2367 Otherwise treat `\\' as special:
2368 `\\&' in NEWTEXT means substitute original matched text.
2369 `\\N' means substitute what matched the Nth `\\(...\\)'.
2370 If Nth parens didn't match, substitute nothing.
2371 `\\\\' means insert one `\\'.
2372 Case conversion does not apply to these substitutions.
2374 FIXEDCASE and LITERAL are optional arguments.
2376 The optional fourth argument STRING can be a string to modify.
2377 This is meaningful when the previous match was done against STRING,
2378 using `string-match'. When used this way, `replace-match'
2379 creates and returns a new string made by copying STRING and replacing
2380 the part of STRING that was matched.
2382 The optional fifth argument SUBEXP specifies a subexpression;
2383 it says to replace just that subexpression with NEWTEXT,
2384 rather than replacing the entire matched text.
2385 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2386 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2387 NEWTEXT in place of subexp N.
2388 This is useful only after a regular expression search or match,
2389 since only regular expressions have distinguished subexpressions. */)
2390 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2392 enum { nochange, all_caps, cap_initial } case_action;
2393 register EMACS_INT pos, pos_byte;
2394 int some_multiletter_word;
2395 int some_lowercase;
2396 int some_uppercase;
2397 int some_nonuppercase_initial;
2398 register int c, prevc;
2399 int sub;
2400 EMACS_INT opoint, newpoint;
2402 CHECK_STRING (newtext);
2404 if (! NILP (string))
2405 CHECK_STRING (string);
2407 case_action = nochange; /* We tried an initialization */
2408 /* but some C compilers blew it */
2410 if (search_regs.num_regs <= 0)
2411 error ("`replace-match' called before any match found");
2413 if (NILP (subexp))
2414 sub = 0;
2415 else
2417 CHECK_NUMBER (subexp);
2418 sub = XINT (subexp);
2419 if (sub < 0 || sub >= search_regs.num_regs)
2420 args_out_of_range (subexp, make_number (search_regs.num_regs));
2423 if (NILP (string))
2425 if (search_regs.start[sub] < BEGV
2426 || search_regs.start[sub] > search_regs.end[sub]
2427 || search_regs.end[sub] > ZV)
2428 args_out_of_range (make_number (search_regs.start[sub]),
2429 make_number (search_regs.end[sub]));
2431 else
2433 if (search_regs.start[sub] < 0
2434 || search_regs.start[sub] > search_regs.end[sub]
2435 || search_regs.end[sub] > SCHARS (string))
2436 args_out_of_range (make_number (search_regs.start[sub]),
2437 make_number (search_regs.end[sub]));
2440 if (NILP (fixedcase))
2442 /* Decide how to casify by examining the matched text. */
2443 EMACS_INT last;
2445 pos = search_regs.start[sub];
2446 last = search_regs.end[sub];
2448 if (NILP (string))
2449 pos_byte = CHAR_TO_BYTE (pos);
2450 else
2451 pos_byte = string_char_to_byte (string, pos);
2453 prevc = '\n';
2454 case_action = all_caps;
2456 /* some_multiletter_word is set nonzero if any original word
2457 is more than one letter long. */
2458 some_multiletter_word = 0;
2459 some_lowercase = 0;
2460 some_nonuppercase_initial = 0;
2461 some_uppercase = 0;
2463 while (pos < last)
2465 if (NILP (string))
2467 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2468 INC_BOTH (pos, pos_byte);
2470 else
2471 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2473 if (LOWERCASEP (c))
2475 /* Cannot be all caps if any original char is lower case */
2477 some_lowercase = 1;
2478 if (SYNTAX (prevc) != Sword)
2479 some_nonuppercase_initial = 1;
2480 else
2481 some_multiletter_word = 1;
2483 else if (UPPERCASEP (c))
2485 some_uppercase = 1;
2486 if (SYNTAX (prevc) != Sword)
2488 else
2489 some_multiletter_word = 1;
2491 else
2493 /* If the initial is a caseless word constituent,
2494 treat that like a lowercase initial. */
2495 if (SYNTAX (prevc) != Sword)
2496 some_nonuppercase_initial = 1;
2499 prevc = c;
2502 /* Convert to all caps if the old text is all caps
2503 and has at least one multiletter word. */
2504 if (! some_lowercase && some_multiletter_word)
2505 case_action = all_caps;
2506 /* Capitalize each word, if the old text has all capitalized words. */
2507 else if (!some_nonuppercase_initial && some_multiletter_word)
2508 case_action = cap_initial;
2509 else if (!some_nonuppercase_initial && some_uppercase)
2510 /* Should x -> yz, operating on X, give Yz or YZ?
2511 We'll assume the latter. */
2512 case_action = all_caps;
2513 else
2514 case_action = nochange;
2517 /* Do replacement in a string. */
2518 if (!NILP (string))
2520 Lisp_Object before, after;
2522 before = Fsubstring (string, make_number (0),
2523 make_number (search_regs.start[sub]));
2524 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2526 /* Substitute parts of the match into NEWTEXT
2527 if desired. */
2528 if (NILP (literal))
2530 EMACS_INT lastpos = 0;
2531 EMACS_INT lastpos_byte = 0;
2532 /* We build up the substituted string in ACCUM. */
2533 Lisp_Object accum;
2534 Lisp_Object middle;
2535 int length = SBYTES (newtext);
2537 accum = Qnil;
2539 for (pos_byte = 0, pos = 0; pos_byte < length;)
2541 EMACS_INT substart = -1;
2542 EMACS_INT subend = 0;
2543 int delbackslash = 0;
2545 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2547 if (c == '\\')
2549 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2551 if (c == '&')
2553 substart = search_regs.start[sub];
2554 subend = search_regs.end[sub];
2556 else if (c >= '1' && c <= '9')
2558 if (search_regs.start[c - '0'] >= 0
2559 && c <= search_regs.num_regs + '0')
2561 substart = search_regs.start[c - '0'];
2562 subend = search_regs.end[c - '0'];
2564 else
2566 /* If that subexp did not match,
2567 replace \\N with nothing. */
2568 substart = 0;
2569 subend = 0;
2572 else if (c == '\\')
2573 delbackslash = 1;
2574 else
2575 error ("Invalid use of `\\' in replacement text");
2577 if (substart >= 0)
2579 if (pos - 2 != lastpos)
2580 middle = substring_both (newtext, lastpos,
2581 lastpos_byte,
2582 pos - 2, pos_byte - 2);
2583 else
2584 middle = Qnil;
2585 accum = concat3 (accum, middle,
2586 Fsubstring (string,
2587 make_number (substart),
2588 make_number (subend)));
2589 lastpos = pos;
2590 lastpos_byte = pos_byte;
2592 else if (delbackslash)
2594 middle = substring_both (newtext, lastpos,
2595 lastpos_byte,
2596 pos - 1, pos_byte - 1);
2598 accum = concat2 (accum, middle);
2599 lastpos = pos;
2600 lastpos_byte = pos_byte;
2604 if (pos != lastpos)
2605 middle = substring_both (newtext, lastpos,
2606 lastpos_byte,
2607 pos, pos_byte);
2608 else
2609 middle = Qnil;
2611 newtext = concat2 (accum, middle);
2614 /* Do case substitution in NEWTEXT if desired. */
2615 if (case_action == all_caps)
2616 newtext = Fupcase (newtext);
2617 else if (case_action == cap_initial)
2618 newtext = Fupcase_initials (newtext);
2620 return concat3 (before, newtext, after);
2623 /* Record point, then move (quietly) to the start of the match. */
2624 if (PT >= search_regs.end[sub])
2625 opoint = PT - ZV;
2626 else if (PT > search_regs.start[sub])
2627 opoint = search_regs.end[sub] - ZV;
2628 else
2629 opoint = PT;
2631 /* If we want non-literal replacement,
2632 perform substitution on the replacement string. */
2633 if (NILP (literal))
2635 EMACS_INT length = SBYTES (newtext);
2636 unsigned char *substed;
2637 EMACS_INT substed_alloc_size, substed_len;
2638 int buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2639 int str_multibyte = STRING_MULTIBYTE (newtext);
2640 Lisp_Object rev_tbl;
2641 int really_changed = 0;
2643 rev_tbl = Qnil;
2645 substed_alloc_size = length * 2 + 100;
2646 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2647 substed_len = 0;
2649 /* Go thru NEWTEXT, producing the actual text to insert in
2650 SUBSTED while adjusting multibyteness to that of the current
2651 buffer. */
2653 for (pos_byte = 0, pos = 0; pos_byte < length;)
2655 unsigned char str[MAX_MULTIBYTE_LENGTH];
2656 const unsigned char *add_stuff = NULL;
2657 EMACS_INT add_len = 0;
2658 int idx = -1;
2660 if (str_multibyte)
2662 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2663 if (!buf_multibyte)
2664 c = multibyte_char_to_unibyte (c, rev_tbl);
2666 else
2668 /* Note that we don't have to increment POS. */
2669 c = SREF (newtext, pos_byte++);
2670 if (buf_multibyte)
2671 MAKE_CHAR_MULTIBYTE (c);
2674 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2675 or set IDX to a match index, which means put that part
2676 of the buffer text into SUBSTED. */
2678 if (c == '\\')
2680 really_changed = 1;
2682 if (str_multibyte)
2684 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2685 pos, pos_byte);
2686 if (!buf_multibyte && !ASCII_CHAR_P (c))
2687 c = multibyte_char_to_unibyte (c, rev_tbl);
2689 else
2691 c = SREF (newtext, pos_byte++);
2692 if (buf_multibyte)
2693 MAKE_CHAR_MULTIBYTE (c);
2696 if (c == '&')
2697 idx = sub;
2698 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2700 if (search_regs.start[c - '0'] >= 1)
2701 idx = c - '0';
2703 else if (c == '\\')
2704 add_len = 1, add_stuff = (unsigned char *) "\\";
2705 else
2707 xfree (substed);
2708 error ("Invalid use of `\\' in replacement text");
2711 else
2713 add_len = CHAR_STRING (c, str);
2714 add_stuff = str;
2717 /* If we want to copy part of a previous match,
2718 set up ADD_STUFF and ADD_LEN to point to it. */
2719 if (idx >= 0)
2721 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2722 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2723 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2724 move_gap (search_regs.start[idx]);
2725 add_stuff = BYTE_POS_ADDR (begbyte);
2728 /* Now the stuff we want to add to SUBSTED
2729 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2731 /* Make sure SUBSTED is big enough. */
2732 if (substed_len + add_len >= substed_alloc_size)
2734 substed_alloc_size = substed_len + add_len + 500;
2735 substed = (unsigned char *) xrealloc (substed,
2736 substed_alloc_size + 1);
2739 /* Now add to the end of SUBSTED. */
2740 if (add_stuff)
2742 memcpy (substed + substed_len, add_stuff, add_len);
2743 substed_len += add_len;
2747 if (really_changed)
2749 if (buf_multibyte)
2751 EMACS_INT nchars =
2752 multibyte_chars_in_text (substed, substed_len);
2754 newtext = make_multibyte_string ((char *) substed, nchars,
2755 substed_len);
2757 else
2758 newtext = make_unibyte_string ((char *) substed, substed_len);
2760 xfree (substed);
2763 /* Replace the old text with the new in the cleanest possible way. */
2764 replace_range (search_regs.start[sub], search_regs.end[sub],
2765 newtext, 1, 0, 1);
2766 newpoint = search_regs.start[sub] + SCHARS (newtext);
2768 if (case_action == all_caps)
2769 Fupcase_region (make_number (search_regs.start[sub]),
2770 make_number (newpoint));
2771 else if (case_action == cap_initial)
2772 Fupcase_initials_region (make_number (search_regs.start[sub]),
2773 make_number (newpoint));
2775 /* Adjust search data for this change. */
2777 EMACS_INT oldend = search_regs.end[sub];
2778 EMACS_INT oldstart = search_regs.start[sub];
2779 EMACS_INT change = newpoint - search_regs.end[sub];
2780 int i;
2782 for (i = 0; i < search_regs.num_regs; i++)
2784 if (search_regs.start[i] >= oldend)
2785 search_regs.start[i] += change;
2786 else if (search_regs.start[i] > oldstart)
2787 search_regs.start[i] = oldstart;
2788 if (search_regs.end[i] >= oldend)
2789 search_regs.end[i] += change;
2790 else if (search_regs.end[i] > oldstart)
2791 search_regs.end[i] = oldstart;
2795 /* Put point back where it was in the text. */
2796 if (opoint <= 0)
2797 TEMP_SET_PT (opoint + ZV);
2798 else
2799 TEMP_SET_PT (opoint);
2801 /* Now move point "officially" to the start of the inserted replacement. */
2802 move_if_not_intangible (newpoint);
2804 return Qnil;
2807 static Lisp_Object
2808 match_limit (Lisp_Object num, int beginningp)
2810 register int n;
2812 CHECK_NUMBER (num);
2813 n = XINT (num);
2814 if (n < 0)
2815 args_out_of_range (num, make_number (0));
2816 if (search_regs.num_regs <= 0)
2817 error ("No match data, because no search succeeded");
2818 if (n >= search_regs.num_regs
2819 || search_regs.start[n] < 0)
2820 return Qnil;
2821 return (make_number ((beginningp) ? search_regs.start[n]
2822 : search_regs.end[n]));
2825 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2826 doc: /* Return position of start of text matched by last search.
2827 SUBEXP, a number, specifies which parenthesized expression in the last
2828 regexp.
2829 Value is nil if SUBEXPth pair didn't match, or there were less than
2830 SUBEXP pairs.
2831 Zero means the entire text matched by the whole regexp or whole string. */)
2832 (Lisp_Object subexp)
2834 return match_limit (subexp, 1);
2837 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2838 doc: /* Return position of end of text matched by last search.
2839 SUBEXP, a number, specifies which parenthesized expression in the last
2840 regexp.
2841 Value is nil if SUBEXPth pair didn't match, or there were less than
2842 SUBEXP pairs.
2843 Zero means the entire text matched by the whole regexp or whole string. */)
2844 (Lisp_Object subexp)
2846 return match_limit (subexp, 0);
2849 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2850 doc: /* Return a list containing all info on what the last search matched.
2851 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2852 All the elements are markers or nil (nil if the Nth pair didn't match)
2853 if the last match was on a buffer; integers or nil if a string was matched.
2854 Use `set-match-data' to reinstate the data in this list.
2856 If INTEGERS (the optional first argument) is non-nil, always use
2857 integers \(rather than markers) to represent buffer positions. In
2858 this case, and if the last match was in a buffer, the buffer will get
2859 stored as one additional element at the end of the list.
2861 If REUSE is a list, reuse it as part of the value. If REUSE is long
2862 enough to hold all the values, and if INTEGERS is non-nil, no consing
2863 is done.
2865 If optional third arg RESEAT is non-nil, any previous markers on the
2866 REUSE list will be modified to point to nowhere.
2868 Return value is undefined if the last search failed. */)
2869 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2871 Lisp_Object tail, prev;
2872 Lisp_Object *data;
2873 int i, len;
2875 if (!NILP (reseat))
2876 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2877 if (MARKERP (XCAR (tail)))
2879 unchain_marker (XMARKER (XCAR (tail)));
2880 XSETCAR (tail, Qnil);
2883 if (NILP (last_thing_searched))
2884 return Qnil;
2886 prev = Qnil;
2888 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2889 * sizeof (Lisp_Object));
2891 len = 0;
2892 for (i = 0; i < search_regs.num_regs; i++)
2894 int start = search_regs.start[i];
2895 if (start >= 0)
2897 if (EQ (last_thing_searched, Qt)
2898 || ! NILP (integers))
2900 XSETFASTINT (data[2 * i], start);
2901 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2903 else if (BUFFERP (last_thing_searched))
2905 data[2 * i] = Fmake_marker ();
2906 Fset_marker (data[2 * i],
2907 make_number (start),
2908 last_thing_searched);
2909 data[2 * i + 1] = Fmake_marker ();
2910 Fset_marker (data[2 * i + 1],
2911 make_number (search_regs.end[i]),
2912 last_thing_searched);
2914 else
2915 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2916 abort ();
2918 len = 2 * i + 2;
2920 else
2921 data[2 * i] = data[2 * i + 1] = Qnil;
2924 if (BUFFERP (last_thing_searched) && !NILP (integers))
2926 data[len] = last_thing_searched;
2927 len++;
2930 /* If REUSE is not usable, cons up the values and return them. */
2931 if (! CONSP (reuse))
2932 return Flist (len, data);
2934 /* If REUSE is a list, store as many value elements as will fit
2935 into the elements of REUSE. */
2936 for (i = 0, tail = reuse; CONSP (tail);
2937 i++, tail = XCDR (tail))
2939 if (i < len)
2940 XSETCAR (tail, data[i]);
2941 else
2942 XSETCAR (tail, Qnil);
2943 prev = tail;
2946 /* If we couldn't fit all value elements into REUSE,
2947 cons up the rest of them and add them to the end of REUSE. */
2948 if (i < len)
2949 XSETCDR (prev, Flist (len - i, data + i));
2951 return reuse;
2954 /* We used to have an internal use variant of `reseat' described as:
2956 If RESEAT is `evaporate', put the markers back on the free list
2957 immediately. No other references to the markers must exist in this
2958 case, so it is used only internally on the unwind stack and
2959 save-match-data from Lisp.
2961 But it was ill-conceived: those supposedly-internal markers get exposed via
2962 the undo-list, so freeing them here is unsafe. */
2964 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2965 doc: /* Set internal data on last search match from elements of LIST.
2966 LIST should have been created by calling `match-data' previously.
2968 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2969 (register Lisp_Object list, Lisp_Object reseat)
2971 register int i;
2972 register Lisp_Object marker;
2974 if (running_asynch_code)
2975 save_search_regs ();
2977 CHECK_LIST (list);
2979 /* Unless we find a marker with a buffer or an explicit buffer
2980 in LIST, assume that this match data came from a string. */
2981 last_thing_searched = Qt;
2983 /* Allocate registers if they don't already exist. */
2985 int length = XFASTINT (Flength (list)) / 2;
2987 if (length > search_regs.num_regs)
2989 if (search_regs.num_regs == 0)
2991 search_regs.start
2992 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2993 search_regs.end
2994 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2996 else
2998 search_regs.start
2999 = (regoff_t *) xrealloc (search_regs.start,
3000 length * sizeof (regoff_t));
3001 search_regs.end
3002 = (regoff_t *) xrealloc (search_regs.end,
3003 length * sizeof (regoff_t));
3006 for (i = search_regs.num_regs; i < length; i++)
3007 search_regs.start[i] = -1;
3009 search_regs.num_regs = length;
3012 for (i = 0; CONSP (list); i++)
3014 marker = XCAR (list);
3015 if (BUFFERP (marker))
3017 last_thing_searched = marker;
3018 break;
3020 if (i >= length)
3021 break;
3022 if (NILP (marker))
3024 search_regs.start[i] = -1;
3025 list = XCDR (list);
3027 else
3029 EMACS_INT from;
3030 Lisp_Object m;
3032 m = marker;
3033 if (MARKERP (marker))
3035 if (XMARKER (marker)->buffer == 0)
3036 XSETFASTINT (marker, 0);
3037 else
3038 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3041 CHECK_NUMBER_COERCE_MARKER (marker);
3042 from = XINT (marker);
3044 if (!NILP (reseat) && MARKERP (m))
3046 unchain_marker (XMARKER (m));
3047 XSETCAR (list, Qnil);
3050 if ((list = XCDR (list), !CONSP (list)))
3051 break;
3053 m = marker = XCAR (list);
3055 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3056 XSETFASTINT (marker, 0);
3058 CHECK_NUMBER_COERCE_MARKER (marker);
3059 search_regs.start[i] = from;
3060 search_regs.end[i] = XINT (marker);
3062 if (!NILP (reseat) && MARKERP (m))
3064 unchain_marker (XMARKER (m));
3065 XSETCAR (list, Qnil);
3068 list = XCDR (list);
3071 for (; i < search_regs.num_regs; i++)
3072 search_regs.start[i] = -1;
3075 return Qnil;
3078 /* If non-zero the match data have been saved in saved_search_regs
3079 during the execution of a sentinel or filter. */
3080 static int search_regs_saved;
3081 static struct re_registers saved_search_regs;
3082 static Lisp_Object saved_last_thing_searched;
3084 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3085 if asynchronous code (filter or sentinel) is running. */
3086 static void
3087 save_search_regs (void)
3089 if (!search_regs_saved)
3091 saved_search_regs.num_regs = search_regs.num_regs;
3092 saved_search_regs.start = search_regs.start;
3093 saved_search_regs.end = search_regs.end;
3094 saved_last_thing_searched = last_thing_searched;
3095 last_thing_searched = Qnil;
3096 search_regs.num_regs = 0;
3097 search_regs.start = 0;
3098 search_regs.end = 0;
3100 search_regs_saved = 1;
3104 /* Called upon exit from filters and sentinels. */
3105 void
3106 restore_search_regs (void)
3108 if (search_regs_saved)
3110 if (search_regs.num_regs > 0)
3112 xfree (search_regs.start);
3113 xfree (search_regs.end);
3115 search_regs.num_regs = saved_search_regs.num_regs;
3116 search_regs.start = saved_search_regs.start;
3117 search_regs.end = saved_search_regs.end;
3118 last_thing_searched = saved_last_thing_searched;
3119 saved_last_thing_searched = Qnil;
3120 search_regs_saved = 0;
3124 static Lisp_Object
3125 unwind_set_match_data (Lisp_Object list)
3127 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3128 return Fset_match_data (list, Qt);
3131 /* Called to unwind protect the match data. */
3132 void
3133 record_unwind_save_match_data (void)
3135 record_unwind_protect (unwind_set_match_data,
3136 Fmatch_data (Qnil, Qnil, Qnil));
3139 /* Quote a string to inactivate reg-expr chars */
3141 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3142 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3143 (Lisp_Object string)
3145 register char *in, *out, *end;
3146 register char *temp;
3147 int backslashes_added = 0;
3149 CHECK_STRING (string);
3151 temp = (char *) alloca (SBYTES (string) * 2);
3153 /* Now copy the data into the new string, inserting escapes. */
3155 in = SSDATA (string);
3156 end = in + SBYTES (string);
3157 out = temp;
3159 for (; in != end; in++)
3161 if (*in == '['
3162 || *in == '*' || *in == '.' || *in == '\\'
3163 || *in == '?' || *in == '+'
3164 || *in == '^' || *in == '$')
3165 *out++ = '\\', backslashes_added++;
3166 *out++ = *in;
3169 return make_specified_string (temp,
3170 SCHARS (string) + backslashes_added,
3171 out - temp,
3172 STRING_MULTIBYTE (string));
3175 void
3176 syms_of_search (void)
3178 register int i;
3180 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3182 searchbufs[i].buf.allocated = 100;
3183 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3184 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3185 searchbufs[i].regexp = Qnil;
3186 searchbufs[i].whitespace_regexp = Qnil;
3187 searchbufs[i].syntax_table = Qnil;
3188 staticpro (&searchbufs[i].regexp);
3189 staticpro (&searchbufs[i].whitespace_regexp);
3190 staticpro (&searchbufs[i].syntax_table);
3191 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3193 searchbuf_head = &searchbufs[0];
3195 Qsearch_failed = intern_c_string ("search-failed");
3196 staticpro (&Qsearch_failed);
3197 Qinvalid_regexp = intern_c_string ("invalid-regexp");
3198 staticpro (&Qinvalid_regexp);
3200 Fput (Qsearch_failed, Qerror_conditions,
3201 pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3202 Fput (Qsearch_failed, Qerror_message,
3203 make_pure_c_string ("Search failed"));
3205 Fput (Qinvalid_regexp, Qerror_conditions,
3206 pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3207 Fput (Qinvalid_regexp, Qerror_message,
3208 make_pure_c_string ("Invalid regexp"));
3210 last_thing_searched = Qnil;
3211 staticpro (&last_thing_searched);
3213 saved_last_thing_searched = Qnil;
3214 staticpro (&saved_last_thing_searched);
3216 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3217 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3218 Some commands use this for user-specified regexps.
3219 Spaces that occur inside character classes or repetition operators
3220 or other such regexp constructs are not replaced with this.
3221 A value of nil (which is the normal value) means treat spaces literally. */);
3222 Vsearch_spaces_regexp = Qnil;
3224 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3225 doc: /* Internal use only.
3226 If non-nil, the primitive searching and matching functions
3227 such as `looking-at', `string-match', `re-search-forward', etc.,
3228 do not set the match data. The proper way to use this variable
3229 is to bind it with `let' around a small expression. */);
3230 Vinhibit_changing_match_data = Qnil;
3232 defsubr (&Slooking_at);
3233 defsubr (&Sposix_looking_at);
3234 defsubr (&Sstring_match);
3235 defsubr (&Sposix_string_match);
3236 defsubr (&Ssearch_forward);
3237 defsubr (&Ssearch_backward);
3238 defsubr (&Sword_search_forward);
3239 defsubr (&Sword_search_backward);
3240 defsubr (&Sword_search_forward_lax);
3241 defsubr (&Sword_search_backward_lax);
3242 defsubr (&Sre_search_forward);
3243 defsubr (&Sre_search_backward);
3244 defsubr (&Sposix_search_forward);
3245 defsubr (&Sposix_search_backward);
3246 defsubr (&Sreplace_match);
3247 defsubr (&Smatch_beginning);
3248 defsubr (&Smatch_end);
3249 defsubr (&Smatch_data);
3250 defsubr (&Sset_match_data);
3251 defsubr (&Sregexp_quote);