(search_buffer): Fix casts when assigning raw_pattern.
[emacs.git] / src / search.c
blobc3b873353dac4a9fa0f5993e33664718894f9db6
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 #include <config.h>
23 #ifdef STDC_HEADERS
24 #include <stdlib.h>
25 #endif
26 #include "lisp.h"
27 #include "syntax.h"
28 #include "category.h"
29 #include "buffer.h"
30 #include "charset.h"
31 #include "region-cache.h"
32 #include "commands.h"
33 #include "blockinput.h"
34 #include "intervals.h"
36 #include <sys/types.h>
37 #include "regex.h"
39 #define min(a, b) ((a) < (b) ? (a) : (b))
40 #define max(a, b) ((a) > (b) ? (a) : (b))
42 #define REGEXP_CACHE_SIZE 20
44 /* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
46 struct regexp_cache
48 struct regexp_cache *next;
49 Lisp_Object regexp;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp;
91 static void set_search_regs ();
92 static void save_search_regs ();
93 static int simple_search ();
94 static int boyer_moore ();
95 static int search_buffer ();
97 static void
98 matcher_overflow ()
100 error ("Stack overflow in regexp matcher");
103 #ifdef __STDC__
104 #define CONST const
105 #else
106 #define CONST
107 #endif
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
123 static void
124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
125 struct regexp_cache *cp;
126 Lisp_Object pattern;
127 Lisp_Object translate;
128 struct re_registers *regp;
129 int posix;
130 int multibyte;
132 unsigned char *raw_pattern;
133 int raw_pattern_size;
134 char *val;
135 reg_syntax_t old;
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
141 if (multibyte == STRING_MULTIBYTE (pattern))
143 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
144 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
146 else if (multibyte)
148 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
149 XSTRING (pattern)->size);
150 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
151 copy_text (XSTRING (pattern)->data, raw_pattern,
152 XSTRING (pattern)->size, 0, 1);
154 else
156 /* Converting multibyte to single-byte.
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size = XSTRING (pattern)->size;
163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
164 copy_text (XSTRING (pattern)->data, raw_pattern,
165 STRING_BYTES (XSTRING (pattern)), 1, 0);
168 cp->regexp = Qnil;
169 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
170 cp->posix = posix;
171 cp->buf.multibyte = multibyte;
172 BLOCK_INPUT;
173 old = re_set_syntax (RE_SYNTAX_EMACS
174 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
175 val = (char *) re_compile_pattern ((char *)raw_pattern,
176 raw_pattern_size, &cp->buf);
177 re_set_syntax (old);
178 UNBLOCK_INPUT;
179 if (val)
180 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
182 cp->regexp = Fcopy_sequence (pattern);
185 /* Compile a regexp if necessary, but first check to see if there's one in
186 the cache.
187 PATTERN is the pattern to compile.
188 TRANSLATE is a translation table for ignoring case, or nil for none.
189 REGP is the structure that says where to store the "register"
190 values that will result from matching this pattern.
191 If it is 0, we should compile the pattern not to record any
192 subexpression bounds.
193 POSIX is nonzero if we want full backtracking (POSIX style)
194 for this pattern. 0 means backtrack only enough to get a valid match. */
196 struct re_pattern_buffer *
197 compile_pattern (pattern, regp, translate, posix, multibyte)
198 Lisp_Object pattern;
199 struct re_registers *regp;
200 Lisp_Object translate;
201 int posix, multibyte;
203 struct regexp_cache *cp, **cpp;
205 for (cpp = &searchbuf_head; ; cpp = &cp->next)
207 cp = *cpp;
208 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
209 && !NILP (Fstring_equal (cp->regexp, pattern))
210 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
211 && cp->posix == posix
212 && cp->buf.multibyte == multibyte)
213 break;
215 /* If we're at the end of the cache, compile into the last cell. */
216 if (cp->next == 0)
218 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
219 break;
223 /* When we get here, cp (aka *cpp) contains the compiled pattern,
224 either because we found it in the cache or because we just compiled it.
225 Move it to the front of the queue to mark it as most recently used. */
226 *cpp = cp->next;
227 cp->next = searchbuf_head;
228 searchbuf_head = cp;
230 /* Advise the searching functions about the space we have allocated
231 for register data. */
232 if (regp)
233 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
235 return &cp->buf;
238 /* Error condition used for failing searches */
239 Lisp_Object Qsearch_failed;
241 Lisp_Object
242 signal_failure (arg)
243 Lisp_Object arg;
245 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
246 return Qnil;
249 static Lisp_Object
250 looking_at_1 (string, posix)
251 Lisp_Object string;
252 int posix;
254 Lisp_Object val;
255 unsigned char *p1, *p2;
256 int s1, s2;
257 register int i;
258 struct re_pattern_buffer *bufp;
260 if (running_asynch_code)
261 save_search_regs ();
263 CHECK_STRING (string, 0);
264 bufp = compile_pattern (string, &search_regs,
265 (!NILP (current_buffer->case_fold_search)
266 ? DOWNCASE_TABLE : Qnil),
267 posix,
268 !NILP (current_buffer->enable_multibyte_characters));
270 immediate_quit = 1;
271 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
273 /* Get pointers and sizes of the two strings
274 that make up the visible portion of the buffer. */
276 p1 = BEGV_ADDR;
277 s1 = GPT_BYTE - BEGV_BYTE;
278 p2 = GAP_END_ADDR;
279 s2 = ZV_BYTE - GPT_BYTE;
280 if (s1 < 0)
282 p2 = p1;
283 s2 = ZV_BYTE - BEGV_BYTE;
284 s1 = 0;
286 if (s2 < 0)
288 s1 = ZV_BYTE - BEGV_BYTE;
289 s2 = 0;
292 re_match_object = Qnil;
294 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
295 PT_BYTE - BEGV_BYTE, &search_regs,
296 ZV_BYTE - BEGV_BYTE);
297 if (i == -2)
298 matcher_overflow ();
300 val = (0 <= i ? Qt : Qnil);
301 if (i >= 0)
302 for (i = 0; i < search_regs.num_regs; i++)
303 if (search_regs.start[i] >= 0)
305 search_regs.start[i]
306 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
307 search_regs.end[i]
308 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
310 XSETBUFFER (last_thing_searched, current_buffer);
311 immediate_quit = 0;
312 return val;
315 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
316 "Return t if text after point matches regular expression REGEXP.\n\
317 This function modifies the match data that `match-beginning',\n\
318 `match-end' and `match-data' access; save and restore the match\n\
319 data if you want to preserve them.")
320 (regexp)
321 Lisp_Object regexp;
323 return looking_at_1 (regexp, 0);
326 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
327 "Return t if text after point matches regular expression REGEXP.\n\
328 Find the longest match, in accord with Posix regular expression rules.\n\
329 This function modifies the match data that `match-beginning',\n\
330 `match-end' and `match-data' access; save and restore the match\n\
331 data if you want to preserve them.")
332 (regexp)
333 Lisp_Object regexp;
335 return looking_at_1 (regexp, 1);
338 static Lisp_Object
339 string_match_1 (regexp, string, start, posix)
340 Lisp_Object regexp, string, start;
341 int posix;
343 int val;
344 struct re_pattern_buffer *bufp;
345 int pos, pos_byte;
346 int i;
348 if (running_asynch_code)
349 save_search_regs ();
351 CHECK_STRING (regexp, 0);
352 CHECK_STRING (string, 1);
354 if (NILP (start))
355 pos = 0, pos_byte = 0;
356 else
358 int len = XSTRING (string)->size;
360 CHECK_NUMBER (start, 2);
361 pos = XINT (start);
362 if (pos < 0 && -pos <= len)
363 pos = len + pos;
364 else if (0 > pos || pos > len)
365 args_out_of_range (string, start);
366 pos_byte = string_char_to_byte (string, pos);
369 bufp = compile_pattern (regexp, &search_regs,
370 (!NILP (current_buffer->case_fold_search)
371 ? DOWNCASE_TABLE : Qnil),
372 posix,
373 STRING_MULTIBYTE (string));
374 immediate_quit = 1;
375 re_match_object = string;
377 val = re_search (bufp, (char *) XSTRING (string)->data,
378 STRING_BYTES (XSTRING (string)), pos_byte,
379 STRING_BYTES (XSTRING (string)) - pos_byte,
380 &search_regs);
381 immediate_quit = 0;
382 last_thing_searched = Qt;
383 if (val == -2)
384 matcher_overflow ();
385 if (val < 0) return Qnil;
387 for (i = 0; i < search_regs.num_regs; i++)
388 if (search_regs.start[i] >= 0)
390 search_regs.start[i]
391 = string_byte_to_char (string, search_regs.start[i]);
392 search_regs.end[i]
393 = string_byte_to_char (string, search_regs.end[i]);
396 return make_number (string_byte_to_char (string, val));
399 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
400 "Return index of start of first match for REGEXP in STRING, or nil.\n\
401 If third arg START is non-nil, start search at that index in STRING.\n\
402 For index of first char beyond the match, do (match-end 0).\n\
403 `match-end' and `match-beginning' also give indices of substrings\n\
404 matched by parenthesis constructs in the pattern.")
405 (regexp, string, start)
406 Lisp_Object regexp, string, start;
408 return string_match_1 (regexp, string, start, 0);
411 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
412 "Return index of start of first match for REGEXP in STRING, or nil.\n\
413 Find the longest match, in accord with Posix regular expression rules.\n\
414 If third arg START is non-nil, start search at that index in STRING.\n\
415 For index of first char beyond the match, do (match-end 0).\n\
416 `match-end' and `match-beginning' also give indices of substrings\n\
417 matched by parenthesis constructs in the pattern.")
418 (regexp, string, start)
419 Lisp_Object regexp, string, start;
421 return string_match_1 (regexp, string, start, 1);
424 /* Match REGEXP against STRING, searching all of STRING,
425 and return the index of the match, or negative on failure.
426 This does not clobber the match data. */
429 fast_string_match (regexp, string)
430 Lisp_Object regexp, string;
432 int val;
433 struct re_pattern_buffer *bufp;
435 bufp = compile_pattern (regexp, 0, Qnil,
436 0, STRING_MULTIBYTE (string));
437 immediate_quit = 1;
438 re_match_object = string;
440 val = re_search (bufp, (char *) XSTRING (string)->data,
441 STRING_BYTES (XSTRING (string)), 0,
442 STRING_BYTES (XSTRING (string)), 0);
443 immediate_quit = 0;
444 return val;
447 /* Match REGEXP against STRING, searching all of STRING ignoring case,
448 and return the index of the match, or negative on failure.
449 This does not clobber the match data.
450 We assume that STRING contains single-byte characters. */
452 extern Lisp_Object Vascii_downcase_table;
455 fast_c_string_match_ignore_case (regexp, string)
456 Lisp_Object regexp;
457 char *string;
459 int val;
460 struct re_pattern_buffer *bufp;
461 int len = strlen (string);
463 regexp = string_make_unibyte (regexp);
464 re_match_object = Qt;
465 bufp = compile_pattern (regexp, 0,
466 Vascii_downcase_table, 0,
468 immediate_quit = 1;
469 val = re_search (bufp, string, len, 0, len, 0);
470 immediate_quit = 0;
471 return val;
474 /* The newline cache: remembering which sections of text have no newlines. */
476 /* If the user has requested newline caching, make sure it's on.
477 Otherwise, make sure it's off.
478 This is our cheezy way of associating an action with the change of
479 state of a buffer-local variable. */
480 static void
481 newline_cache_on_off (buf)
482 struct buffer *buf;
484 if (NILP (buf->cache_long_line_scans))
486 /* It should be off. */
487 if (buf->newline_cache)
489 free_region_cache (buf->newline_cache);
490 buf->newline_cache = 0;
493 else
495 /* It should be on. */
496 if (buf->newline_cache == 0)
497 buf->newline_cache = new_region_cache ();
502 /* Search for COUNT instances of the character TARGET between START and END.
504 If COUNT is positive, search forwards; END must be >= START.
505 If COUNT is negative, search backwards for the -COUNTth instance;
506 END must be <= START.
507 If COUNT is zero, do anything you please; run rogue, for all I care.
509 If END is zero, use BEGV or ZV instead, as appropriate for the
510 direction indicated by COUNT.
512 If we find COUNT instances, set *SHORTAGE to zero, and return the
513 position after the COUNTth match. Note that for reverse motion
514 this is not the same as the usual convention for Emacs motion commands.
516 If we don't find COUNT instances before reaching END, set *SHORTAGE
517 to the number of TARGETs left unfound, and return END.
519 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
520 except when inside redisplay. */
523 scan_buffer (target, start, end, count, shortage, allow_quit)
524 register int target;
525 int start, end;
526 int count;
527 int *shortage;
528 int allow_quit;
530 struct region_cache *newline_cache;
531 int direction;
533 if (count > 0)
535 direction = 1;
536 if (! end) end = ZV;
538 else
540 direction = -1;
541 if (! end) end = BEGV;
544 newline_cache_on_off (current_buffer);
545 newline_cache = current_buffer->newline_cache;
547 if (shortage != 0)
548 *shortage = 0;
550 immediate_quit = allow_quit;
552 if (count > 0)
553 while (start != end)
555 /* Our innermost scanning loop is very simple; it doesn't know
556 about gaps, buffer ends, or the newline cache. ceiling is
557 the position of the last character before the next such
558 obstacle --- the last character the dumb search loop should
559 examine. */
560 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
561 int start_byte = CHAR_TO_BYTE (start);
562 int tem;
564 /* If we're looking for a newline, consult the newline cache
565 to see where we can avoid some scanning. */
566 if (target == '\n' && newline_cache)
568 int next_change;
569 immediate_quit = 0;
570 while (region_cache_forward
571 (current_buffer, newline_cache, start_byte, &next_change))
572 start_byte = next_change;
573 immediate_quit = allow_quit;
575 /* START should never be after END. */
576 if (start_byte > ceiling_byte)
577 start_byte = ceiling_byte;
579 /* Now the text after start is an unknown region, and
580 next_change is the position of the next known region. */
581 ceiling_byte = min (next_change - 1, ceiling_byte);
584 /* The dumb loop can only scan text stored in contiguous
585 bytes. BUFFER_CEILING_OF returns the last character
586 position that is contiguous, so the ceiling is the
587 position after that. */
588 tem = BUFFER_CEILING_OF (start_byte);
589 ceiling_byte = min (tem, ceiling_byte);
592 /* The termination address of the dumb loop. */
593 register unsigned char *ceiling_addr
594 = BYTE_POS_ADDR (ceiling_byte) + 1;
595 register unsigned char *cursor
596 = BYTE_POS_ADDR (start_byte);
597 unsigned char *base = cursor;
599 while (cursor < ceiling_addr)
601 unsigned char *scan_start = cursor;
603 /* The dumb loop. */
604 while (*cursor != target && ++cursor < ceiling_addr)
607 /* If we're looking for newlines, cache the fact that
608 the region from start to cursor is free of them. */
609 if (target == '\n' && newline_cache)
610 know_region_cache (current_buffer, newline_cache,
611 start_byte + scan_start - base,
612 start_byte + cursor - base);
614 /* Did we find the target character? */
615 if (cursor < ceiling_addr)
617 if (--count == 0)
619 immediate_quit = 0;
620 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
622 cursor++;
626 start = BYTE_TO_CHAR (start_byte + cursor - base);
629 else
630 while (start > end)
632 /* The last character to check before the next obstacle. */
633 int ceiling_byte = CHAR_TO_BYTE (end);
634 int start_byte = CHAR_TO_BYTE (start);
635 int tem;
637 /* Consult the newline cache, if appropriate. */
638 if (target == '\n' && newline_cache)
640 int next_change;
641 immediate_quit = 0;
642 while (region_cache_backward
643 (current_buffer, newline_cache, start_byte, &next_change))
644 start_byte = next_change;
645 immediate_quit = allow_quit;
647 /* Start should never be at or before end. */
648 if (start_byte <= ceiling_byte)
649 start_byte = ceiling_byte + 1;
651 /* Now the text before start is an unknown region, and
652 next_change is the position of the next known region. */
653 ceiling_byte = max (next_change, ceiling_byte);
656 /* Stop scanning before the gap. */
657 tem = BUFFER_FLOOR_OF (start_byte - 1);
658 ceiling_byte = max (tem, ceiling_byte);
661 /* The termination address of the dumb loop. */
662 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
663 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
664 unsigned char *base = cursor;
666 while (cursor >= ceiling_addr)
668 unsigned char *scan_start = cursor;
670 while (*cursor != target && --cursor >= ceiling_addr)
673 /* If we're looking for newlines, cache the fact that
674 the region from after the cursor to start is free of them. */
675 if (target == '\n' && newline_cache)
676 know_region_cache (current_buffer, newline_cache,
677 start_byte + cursor - base,
678 start_byte + scan_start - base);
680 /* Did we find the target character? */
681 if (cursor >= ceiling_addr)
683 if (++count >= 0)
685 immediate_quit = 0;
686 return BYTE_TO_CHAR (start_byte + cursor - base);
688 cursor--;
692 start = BYTE_TO_CHAR (start_byte + cursor - base);
696 immediate_quit = 0;
697 if (shortage != 0)
698 *shortage = count * direction;
699 return start;
702 /* Search for COUNT instances of a line boundary, which means either a
703 newline or (if selective display enabled) a carriage return.
704 Start at START. If COUNT is negative, search backwards.
706 We report the resulting position by calling TEMP_SET_PT_BOTH.
708 If we find COUNT instances. we position after (always after,
709 even if scanning backwards) the COUNTth match, and return 0.
711 If we don't find COUNT instances before reaching the end of the
712 buffer (or the beginning, if scanning backwards), we return
713 the number of line boundaries left unfound, and position at
714 the limit we bumped up against.
716 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
717 except in special cases. */
720 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
721 int start, start_byte;
722 int limit, limit_byte;
723 register int count;
724 int allow_quit;
726 int direction = ((count > 0) ? 1 : -1);
728 register unsigned char *cursor;
729 unsigned char *base;
731 register int ceiling;
732 register unsigned char *ceiling_addr;
734 int old_immediate_quit = immediate_quit;
736 /* If we are not in selective display mode,
737 check only for newlines. */
738 int selective_display = (!NILP (current_buffer->selective_display)
739 && !INTEGERP (current_buffer->selective_display));
741 /* The code that follows is like scan_buffer
742 but checks for either newline or carriage return. */
744 if (allow_quit)
745 immediate_quit++;
747 start_byte = CHAR_TO_BYTE (start);
749 if (count > 0)
751 while (start_byte < limit_byte)
753 ceiling = BUFFER_CEILING_OF (start_byte);
754 ceiling = min (limit_byte - 1, ceiling);
755 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
756 base = (cursor = BYTE_POS_ADDR (start_byte));
757 while (1)
759 while (*cursor != '\n' && ++cursor != ceiling_addr)
762 if (cursor != ceiling_addr)
764 if (--count == 0)
766 immediate_quit = old_immediate_quit;
767 start_byte = start_byte + cursor - base + 1;
768 start = BYTE_TO_CHAR (start_byte);
769 TEMP_SET_PT_BOTH (start, start_byte);
770 return 0;
772 else
773 if (++cursor == ceiling_addr)
774 break;
776 else
777 break;
779 start_byte += cursor - base;
782 else
784 while (start_byte > limit_byte)
786 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
787 ceiling = max (limit_byte, ceiling);
788 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
789 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
790 while (1)
792 while (--cursor != ceiling_addr && *cursor != '\n')
795 if (cursor != ceiling_addr)
797 if (++count == 0)
799 immediate_quit = old_immediate_quit;
800 /* Return the position AFTER the match we found. */
801 start_byte = start_byte + cursor - base + 1;
802 start = BYTE_TO_CHAR (start_byte);
803 TEMP_SET_PT_BOTH (start, start_byte);
804 return 0;
807 else
808 break;
810 /* Here we add 1 to compensate for the last decrement
811 of CURSOR, which took it past the valid range. */
812 start_byte += cursor - base + 1;
816 TEMP_SET_PT_BOTH (limit, limit_byte);
817 immediate_quit = old_immediate_quit;
819 return count * direction;
823 find_next_newline_no_quit (from, cnt)
824 register int from, cnt;
826 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
829 /* Like find_next_newline, but returns position before the newline,
830 not after, and only search up to TO. This isn't just
831 find_next_newline (...)-1, because you might hit TO. */
834 find_before_next_newline (from, to, cnt)
835 int from, to, cnt;
837 int shortage;
838 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
840 if (shortage == 0)
841 pos--;
843 return pos;
846 /* Subroutines of Lisp buffer search functions. */
848 static Lisp_Object
849 search_command (string, bound, noerror, count, direction, RE, posix)
850 Lisp_Object string, bound, noerror, count;
851 int direction;
852 int RE;
853 int posix;
855 register int np;
856 int lim, lim_byte;
857 int n = direction;
859 if (!NILP (count))
861 CHECK_NUMBER (count, 3);
862 n *= XINT (count);
865 CHECK_STRING (string, 0);
866 if (NILP (bound))
868 if (n > 0)
869 lim = ZV, lim_byte = ZV_BYTE;
870 else
871 lim = BEGV, lim_byte = BEGV_BYTE;
873 else
875 CHECK_NUMBER_COERCE_MARKER (bound, 1);
876 lim = XINT (bound);
877 if (n > 0 ? lim < PT : lim > PT)
878 error ("Invalid search bound (wrong side of point)");
879 if (lim > ZV)
880 lim = ZV, lim_byte = ZV_BYTE;
881 else if (lim < BEGV)
882 lim = BEGV, lim_byte = BEGV_BYTE;
883 else
884 lim_byte = CHAR_TO_BYTE (lim);
887 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
888 (!NILP (current_buffer->case_fold_search)
889 ? current_buffer->case_canon_table
890 : Qnil),
891 (!NILP (current_buffer->case_fold_search)
892 ? current_buffer->case_eqv_table
893 : Qnil),
894 posix);
895 if (np <= 0)
897 if (NILP (noerror))
898 return signal_failure (string);
899 if (!EQ (noerror, Qt))
901 if (lim < BEGV || lim > ZV)
902 abort ();
903 SET_PT_BOTH (lim, lim_byte);
904 return Qnil;
905 #if 0 /* This would be clean, but maybe programs depend on
906 a value of nil here. */
907 np = lim;
908 #endif
910 else
911 return Qnil;
914 if (np < BEGV || np > ZV)
915 abort ();
917 SET_PT (np);
919 return make_number (np);
922 /* Return 1 if REGEXP it matches just one constant string. */
924 static int
925 trivial_regexp_p (regexp)
926 Lisp_Object regexp;
928 int len = STRING_BYTES (XSTRING (regexp));
929 unsigned char *s = XSTRING (regexp)->data;
930 unsigned char c;
931 while (--len >= 0)
933 switch (*s++)
935 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
936 return 0;
937 case '\\':
938 if (--len < 0)
939 return 0;
940 switch (*s++)
942 case '|': case '(': case ')': case '`': case '\'': case 'b':
943 case 'B': case '<': case '>': case 'w': case 'W': case 's':
944 case 'S': case '=':
945 case 'c': case 'C': /* for categoryspec and notcategoryspec */
946 case '1': case '2': case '3': case '4': case '5':
947 case '6': case '7': case '8': case '9':
948 return 0;
952 return 1;
955 /* Search for the n'th occurrence of STRING in the current buffer,
956 starting at position POS and stopping at position LIM,
957 treating STRING as a literal string if RE is false or as
958 a regular expression if RE is true.
960 If N is positive, searching is forward and LIM must be greater than POS.
961 If N is negative, searching is backward and LIM must be less than POS.
963 Returns -x if x occurrences remain to be found (x > 0),
964 or else the position at the beginning of the Nth occurrence
965 (if searching backward) or the end (if searching forward).
967 POSIX is nonzero if we want full backtracking (POSIX style)
968 for this pattern. 0 means backtrack only enough to get a valid match. */
970 #define TRANSLATE(out, trt, d) \
971 do \
973 if (! NILP (trt)) \
975 Lisp_Object temp; \
976 temp = Faref (trt, make_number (d)); \
977 if (INTEGERP (temp)) \
978 out = XINT (temp); \
979 else \
980 out = d; \
982 else \
983 out = d; \
985 while (0)
987 static int
988 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
989 RE, trt, inverse_trt, posix)
990 Lisp_Object string;
991 int pos;
992 int pos_byte;
993 int lim;
994 int lim_byte;
995 int n;
996 int RE;
997 Lisp_Object trt;
998 Lisp_Object inverse_trt;
999 int posix;
1001 int len = XSTRING (string)->size;
1002 int len_byte = STRING_BYTES (XSTRING (string));
1003 register int i;
1005 if (running_asynch_code)
1006 save_search_regs ();
1008 /* Null string is found at starting position. */
1009 if (len == 0)
1011 set_search_regs (pos, 0);
1012 return pos;
1015 /* Searching 0 times means don't move. */
1016 if (n == 0)
1017 return pos;
1019 if (RE && !trivial_regexp_p (string))
1021 unsigned char *p1, *p2;
1022 int s1, s2;
1023 struct re_pattern_buffer *bufp;
1025 bufp = compile_pattern (string, &search_regs, trt, posix,
1026 !NILP (current_buffer->enable_multibyte_characters));
1028 immediate_quit = 1; /* Quit immediately if user types ^G,
1029 because letting this function finish
1030 can take too long. */
1031 QUIT; /* Do a pending quit right away,
1032 to avoid paradoxical behavior */
1033 /* Get pointers and sizes of the two strings
1034 that make up the visible portion of the buffer. */
1036 p1 = BEGV_ADDR;
1037 s1 = GPT_BYTE - BEGV_BYTE;
1038 p2 = GAP_END_ADDR;
1039 s2 = ZV_BYTE - GPT_BYTE;
1040 if (s1 < 0)
1042 p2 = p1;
1043 s2 = ZV_BYTE - BEGV_BYTE;
1044 s1 = 0;
1046 if (s2 < 0)
1048 s1 = ZV_BYTE - BEGV_BYTE;
1049 s2 = 0;
1051 re_match_object = Qnil;
1053 while (n < 0)
1055 int val;
1056 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1057 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1058 &search_regs,
1059 /* Don't allow match past current point */
1060 pos_byte - BEGV_BYTE);
1061 if (val == -2)
1063 matcher_overflow ();
1065 if (val >= 0)
1067 pos_byte = search_regs.start[0] + BEGV_BYTE;
1068 for (i = 0; i < search_regs.num_regs; i++)
1069 if (search_regs.start[i] >= 0)
1071 search_regs.start[i]
1072 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1073 search_regs.end[i]
1074 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1076 XSETBUFFER (last_thing_searched, current_buffer);
1077 /* Set pos to the new position. */
1078 pos = search_regs.start[0];
1080 else
1082 immediate_quit = 0;
1083 return (n);
1085 n++;
1087 while (n > 0)
1089 int val;
1090 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1091 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1092 &search_regs,
1093 lim_byte - BEGV_BYTE);
1094 if (val == -2)
1096 matcher_overflow ();
1098 if (val >= 0)
1100 pos_byte = search_regs.end[0] + BEGV_BYTE;
1101 for (i = 0; i < search_regs.num_regs; i++)
1102 if (search_regs.start[i] >= 0)
1104 search_regs.start[i]
1105 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1106 search_regs.end[i]
1107 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1109 XSETBUFFER (last_thing_searched, current_buffer);
1110 pos = search_regs.end[0];
1112 else
1114 immediate_quit = 0;
1115 return (0 - n);
1117 n--;
1119 immediate_quit = 0;
1120 return (pos);
1122 else /* non-RE case */
1124 unsigned char *raw_pattern, *pat;
1125 int raw_pattern_size;
1126 int raw_pattern_size_byte;
1127 unsigned char *patbuf;
1128 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1129 unsigned char *base_pat = XSTRING (string)->data;
1130 int charset_base = -1;
1131 int simple = 1;
1133 /* MULTIBYTE says whether the text to be searched is multibyte.
1134 We must convert PATTERN to match that, or we will not really
1135 find things right. */
1137 if (multibyte == STRING_MULTIBYTE (string))
1139 raw_pattern = (unsigned char *) XSTRING (string)->data;
1140 raw_pattern_size = XSTRING (string)->size;
1141 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1143 else if (multibyte)
1145 raw_pattern_size = XSTRING (string)->size;
1146 raw_pattern_size_byte
1147 = count_size_as_multibyte (XSTRING (string)->data,
1148 raw_pattern_size);
1149 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1150 copy_text (XSTRING (string)->data, raw_pattern,
1151 XSTRING (string)->size, 0, 1);
1153 else
1155 /* Converting multibyte to single-byte.
1157 ??? Perhaps this conversion should be done in a special way
1158 by subtracting nonascii-insert-offset from each non-ASCII char,
1159 so that only the multibyte chars which really correspond to
1160 the chosen single-byte character set can possibly match. */
1161 raw_pattern_size = XSTRING (string)->size;
1162 raw_pattern_size_byte = XSTRING (string)->size;
1163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1164 copy_text (XSTRING (string)->data, raw_pattern,
1165 STRING_BYTES (XSTRING (string)), 1, 0);
1168 /* Copy and optionally translate the pattern. */
1169 len = raw_pattern_size;
1170 len_byte = raw_pattern_size_byte;
1171 patbuf = (unsigned char *) alloca (len_byte);
1172 pat = patbuf;
1173 base_pat = raw_pattern;
1174 if (multibyte)
1176 while (--len >= 0)
1178 unsigned char workbuf[4], *str;
1179 int c, translated, inverse;
1180 int in_charlen, charlen;
1182 /* If we got here and the RE flag is set, it's because we're
1183 dealing with a regexp known to be trivial, so the backslash
1184 just quotes the next character. */
1185 if (RE && *base_pat == '\\')
1187 len--;
1188 len_byte--;
1189 base_pat++;
1192 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1193 /* Translate the character, if requested. */
1194 TRANSLATE (translated, trt, c);
1195 /* If translation changed the byte-length, go back
1196 to the original character. */
1197 charlen = CHAR_STRING (translated, workbuf, str);
1198 if (in_charlen != charlen)
1200 translated = c;
1201 charlen = CHAR_STRING (c, workbuf, str);
1204 TRANSLATE (inverse, inverse_trt, c);
1206 /* Did this char actually get translated?
1207 Would any other char get translated into it? */
1208 if (translated != c || inverse != c)
1210 /* Keep track of which character set row
1211 contains the characters that need translation. */
1212 int charset_base_code = c & ~0xff;
1213 if (charset_base == -1)
1214 charset_base = charset_base_code;
1215 else if (charset_base != charset_base_code)
1216 /* If two different rows appear, needing translation,
1217 then we cannot use boyer_moore search. */
1218 simple = 0;
1219 /* ??? Handa: this must do simple = 0
1220 if c is a composite character. */
1223 /* Store this character into the translated pattern. */
1224 bcopy (str, pat, charlen);
1225 pat += charlen;
1226 base_pat += in_charlen;
1227 len_byte -= in_charlen;
1230 else
1232 while (--len >= 0)
1234 int c, translated, inverse;
1236 /* If we got here and the RE flag is set, it's because we're
1237 dealing with a regexp known to be trivial, so the backslash
1238 just quotes the next character. */
1239 if (RE && *base_pat == '\\')
1241 len--;
1242 base_pat++;
1244 c = *base_pat++;
1245 TRANSLATE (translated, trt, c);
1246 TRANSLATE (inverse, inverse_trt, c);
1248 /* Did this char actually get translated?
1249 Would any other char get translated into it? */
1250 if (translated != c || inverse != c)
1252 /* Keep track of which character set row
1253 contains the characters that need translation. */
1254 int charset_base_code = c & ~0xff;
1255 if (charset_base == -1)
1256 charset_base = charset_base_code;
1257 else if (charset_base != charset_base_code)
1258 /* If two different rows appear, needing translation,
1259 then we cannot use boyer_moore search. */
1260 simple = 0;
1262 *pat++ = translated;
1266 len_byte = pat - patbuf;
1267 len = raw_pattern_size;
1268 pat = base_pat = patbuf;
1270 if (simple)
1271 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1272 pos, pos_byte, lim, lim_byte,
1273 charset_base);
1274 else
1275 return simple_search (n, pat, len, len_byte, trt,
1276 pos, pos_byte, lim, lim_byte);
1280 /* Do a simple string search N times for the string PAT,
1281 whose length is LEN/LEN_BYTE,
1282 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1283 TRT is the translation table.
1285 Return the character position where the match is found.
1286 Otherwise, if M matches remained to be found, return -M.
1288 This kind of search works regardless of what is in PAT and
1289 regardless of what is in TRT. It is used in cases where
1290 boyer_moore cannot work. */
1292 static int
1293 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1294 int n;
1295 unsigned char *pat;
1296 int len, len_byte;
1297 Lisp_Object trt;
1298 int pos, pos_byte;
1299 int lim, lim_byte;
1301 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1302 int forward = n > 0;
1304 if (lim > pos && multibyte)
1305 while (n > 0)
1307 while (1)
1309 /* Try matching at position POS. */
1310 int this_pos = pos;
1311 int this_pos_byte = pos_byte;
1312 int this_len = len;
1313 int this_len_byte = len_byte;
1314 unsigned char *p = pat;
1315 if (pos + len > lim)
1316 goto stop;
1318 while (this_len > 0)
1320 int charlen, buf_charlen;
1321 int pat_ch, buf_ch;
1323 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1324 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1325 ZV_BYTE - this_pos_byte,
1326 buf_charlen);
1327 TRANSLATE (buf_ch, trt, buf_ch);
1329 if (buf_ch != pat_ch)
1330 break;
1332 this_len_byte -= charlen;
1333 this_len--;
1334 p += charlen;
1336 this_pos_byte += buf_charlen;
1337 this_pos++;
1340 if (this_len == 0)
1342 pos += len;
1343 pos_byte += len_byte;
1344 break;
1347 INC_BOTH (pos, pos_byte);
1350 n--;
1352 else if (lim > pos)
1353 while (n > 0)
1355 while (1)
1357 /* Try matching at position POS. */
1358 int this_pos = pos;
1359 int this_len = len;
1360 unsigned char *p = pat;
1362 if (pos + len > lim)
1363 goto stop;
1365 while (this_len > 0)
1367 int pat_ch = *p++;
1368 int buf_ch = FETCH_BYTE (this_pos);
1369 TRANSLATE (buf_ch, trt, buf_ch);
1371 if (buf_ch != pat_ch)
1372 break;
1374 this_len--;
1375 this_pos++;
1378 if (this_len == 0)
1380 pos += len;
1381 break;
1384 pos++;
1387 n--;
1389 /* Backwards search. */
1390 else if (lim < pos && multibyte)
1391 while (n < 0)
1393 while (1)
1395 /* Try matching at position POS. */
1396 int this_pos = pos - len;
1397 int this_pos_byte = pos_byte - len_byte;
1398 int this_len = len;
1399 int this_len_byte = len_byte;
1400 unsigned char *p = pat;
1402 if (pos - len < lim)
1403 goto stop;
1405 while (this_len > 0)
1407 int charlen, buf_charlen;
1408 int pat_ch, buf_ch;
1410 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1411 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1412 ZV_BYTE - this_pos_byte,
1413 buf_charlen);
1414 TRANSLATE (buf_ch, trt, buf_ch);
1416 if (buf_ch != pat_ch)
1417 break;
1419 this_len_byte -= charlen;
1420 this_len--;
1421 p += charlen;
1422 this_pos_byte += buf_charlen;
1423 this_pos++;
1426 if (this_len == 0)
1428 pos -= len;
1429 pos_byte -= len_byte;
1430 break;
1433 DEC_BOTH (pos, pos_byte);
1436 n++;
1438 else if (lim < pos)
1439 while (n < 0)
1441 while (1)
1443 /* Try matching at position POS. */
1444 int this_pos = pos - len;
1445 int this_len = len;
1446 unsigned char *p = pat;
1448 if (pos - len < lim)
1449 goto stop;
1451 while (this_len > 0)
1453 int pat_ch = *p++;
1454 int buf_ch = FETCH_BYTE (this_pos);
1455 TRANSLATE (buf_ch, trt, buf_ch);
1457 if (buf_ch != pat_ch)
1458 break;
1459 this_len--;
1460 this_pos++;
1463 if (this_len == 0)
1465 pos -= len;
1466 break;
1469 pos--;
1472 n++;
1475 stop:
1476 if (n == 0)
1478 if (forward)
1479 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1480 else
1481 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1483 return pos;
1485 else if (n > 0)
1486 return -n;
1487 else
1488 return n;
1491 /* Do Boyer-Moore search N times for the string PAT,
1492 whose length is LEN/LEN_BYTE,
1493 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1494 DIRECTION says which direction we search in.
1495 TRT and INVERSE_TRT are translation tables.
1497 This kind of search works if all the characters in PAT that have
1498 nontrivial translation are the same aside from the last byte. This
1499 makes it possible to translate just the last byte of a character,
1500 and do so after just a simple test of the context.
1502 If that criterion is not satisfied, do not call this function. */
1504 static int
1505 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1506 pos, pos_byte, lim, lim_byte, charset_base)
1507 int n;
1508 unsigned char *base_pat;
1509 int len, len_byte;
1510 Lisp_Object trt;
1511 Lisp_Object inverse_trt;
1512 int pos, pos_byte;
1513 int lim, lim_byte;
1514 int charset_base;
1516 int direction = ((n > 0) ? 1 : -1);
1517 register int dirlen;
1518 int infinity, limit, k, stride_for_teases;
1519 register int *BM_tab;
1520 int *BM_tab_base;
1521 register unsigned char *cursor, *p_limit;
1522 register int i, j;
1523 unsigned char *pat;
1524 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1526 unsigned char simple_translate[0400];
1527 int translate_prev_byte;
1528 int translate_anteprev_byte;
1530 #ifdef C_ALLOCA
1531 int BM_tab_space[0400];
1532 BM_tab = &BM_tab_space[0];
1533 #else
1534 BM_tab = (int *) alloca (0400 * sizeof (int));
1535 #endif
1536 /* The general approach is that we are going to maintain that we know */
1537 /* the first (closest to the present position, in whatever direction */
1538 /* we're searching) character that could possibly be the last */
1539 /* (furthest from present position) character of a valid match. We */
1540 /* advance the state of our knowledge by looking at that character */
1541 /* and seeing whether it indeed matches the last character of the */
1542 /* pattern. If it does, we take a closer look. If it does not, we */
1543 /* move our pointer (to putative last characters) as far as is */
1544 /* logically possible. This amount of movement, which I call a */
1545 /* stride, will be the length of the pattern if the actual character */
1546 /* appears nowhere in the pattern, otherwise it will be the distance */
1547 /* from the last occurrence of that character to the end of the */
1548 /* pattern. */
1549 /* As a coding trick, an enormous stride is coded into the table for */
1550 /* characters that match the last character. This allows use of only */
1551 /* a single test, a test for having gone past the end of the */
1552 /* permissible match region, to test for both possible matches (when */
1553 /* the stride goes past the end immediately) and failure to */
1554 /* match (where you get nudged past the end one stride at a time). */
1556 /* Here we make a "mickey mouse" BM table. The stride of the search */
1557 /* is determined only by the last character of the putative match. */
1558 /* If that character does not match, we will stride the proper */
1559 /* distance to propose a match that superimposes it on the last */
1560 /* instance of a character that matches it (per trt), or misses */
1561 /* it entirely if there is none. */
1563 dirlen = len_byte * direction;
1564 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1565 if (direction < 0)
1566 pat = (base_pat += len_byte - 1);
1567 else
1568 pat = base_pat;
1569 BM_tab_base = BM_tab;
1570 BM_tab += 0400;
1571 j = dirlen; /* to get it in a register */
1572 /* A character that does not appear in the pattern induces a */
1573 /* stride equal to the pattern length. */
1574 while (BM_tab_base != BM_tab)
1576 *--BM_tab = j;
1577 *--BM_tab = j;
1578 *--BM_tab = j;
1579 *--BM_tab = j;
1582 /* We use this for translation, instead of TRT itself.
1583 We fill this in to handle the characters that actually
1584 occur in the pattern. Others don't matter anyway! */
1585 bzero (simple_translate, sizeof simple_translate);
1586 for (i = 0; i < 0400; i++)
1587 simple_translate[i] = i;
1589 i = 0;
1590 while (i != infinity)
1592 unsigned char *ptr = pat + i;
1593 i += direction;
1594 if (i == dirlen)
1595 i = infinity;
1596 if (! NILP (trt))
1598 int ch;
1599 int untranslated;
1600 int this_translated = 1;
1602 if (multibyte
1603 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
1605 unsigned char *charstart = ptr;
1606 while (! CHAR_HEAD_P (*charstart))
1607 charstart--;
1608 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1609 if (charset_base == (untranslated & ~0xff))
1611 TRANSLATE (ch, trt, untranslated);
1612 if (! CHAR_HEAD_P (*ptr))
1614 translate_prev_byte = ptr[-1];
1615 if (! CHAR_HEAD_P (translate_prev_byte))
1616 translate_anteprev_byte = ptr[-2];
1619 else
1621 this_translated = 0;
1622 ch = *ptr;
1625 else if (!multibyte)
1626 TRANSLATE (ch, trt, *ptr);
1627 else
1629 ch = *ptr;
1630 this_translated = 0;
1633 if (ch > 0400)
1634 j = ((unsigned char) ch) | 0200;
1635 else
1636 j = (unsigned char) ch;
1638 if (i == infinity)
1639 stride_for_teases = BM_tab[j];
1641 BM_tab[j] = dirlen - i;
1642 /* A translation table is accompanied by its inverse -- see */
1643 /* comment following downcase_table for details */
1644 if (this_translated)
1646 int starting_ch = ch;
1647 int starting_j = j;
1648 while (1)
1650 TRANSLATE (ch, inverse_trt, ch);
1651 if (ch > 0400)
1652 j = ((unsigned char) ch) | 0200;
1653 else
1654 j = (unsigned char) ch;
1656 /* For all the characters that map into CH,
1657 set up simple_translate to map the last byte
1658 into STARTING_J. */
1659 simple_translate[j] = starting_j;
1660 if (ch == starting_ch)
1661 break;
1662 BM_tab[j] = dirlen - i;
1666 else
1668 j = *ptr;
1670 if (i == infinity)
1671 stride_for_teases = BM_tab[j];
1672 BM_tab[j] = dirlen - i;
1674 /* stride_for_teases tells how much to stride if we get a */
1675 /* match on the far character but are subsequently */
1676 /* disappointed, by recording what the stride would have been */
1677 /* for that character if the last character had been */
1678 /* different. */
1680 infinity = dirlen - infinity;
1681 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1682 /* loop invariant - POS_BYTE points at where last char (first
1683 char if reverse) of pattern would align in a possible match. */
1684 while (n != 0)
1686 int tail_end;
1687 unsigned char *tail_end_ptr;
1689 /* It's been reported that some (broken) compiler thinks that
1690 Boolean expressions in an arithmetic context are unsigned.
1691 Using an explicit ?1:0 prevents this. */
1692 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1693 < 0)
1694 return (n * (0 - direction));
1695 /* First we do the part we can by pointers (maybe nothing) */
1696 QUIT;
1697 pat = base_pat;
1698 limit = pos_byte - dirlen + direction;
1699 if (direction > 0)
1701 limit = BUFFER_CEILING_OF (limit);
1702 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1703 can take on without hitting edge of buffer or the gap. */
1704 limit = min (limit, pos_byte + 20000);
1705 limit = min (limit, lim_byte - 1);
1707 else
1709 limit = BUFFER_FLOOR_OF (limit);
1710 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1711 can take on without hitting edge of buffer or the gap. */
1712 limit = max (limit, pos_byte - 20000);
1713 limit = max (limit, lim_byte);
1715 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1716 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1718 if ((limit - pos_byte) * direction > 20)
1720 unsigned char *p2;
1722 p_limit = BYTE_POS_ADDR (limit);
1723 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1724 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1725 while (1) /* use one cursor setting as long as i can */
1727 if (direction > 0) /* worth duplicating */
1729 /* Use signed comparison if appropriate
1730 to make cursor+infinity sure to be > p_limit.
1731 Assuming that the buffer lies in a range of addresses
1732 that are all "positive" (as ints) or all "negative",
1733 either kind of comparison will work as long
1734 as we don't step by infinity. So pick the kind
1735 that works when we do step by infinity. */
1736 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1737 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1738 cursor += BM_tab[*cursor];
1739 else
1740 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1741 cursor += BM_tab[*cursor];
1743 else
1745 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1746 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1747 cursor += BM_tab[*cursor];
1748 else
1749 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1750 cursor += BM_tab[*cursor];
1752 /* If you are here, cursor is beyond the end of the searched region. */
1753 /* This can happen if you match on the far character of the pattern, */
1754 /* because the "stride" of that character is infinity, a number able */
1755 /* to throw you well beyond the end of the search. It can also */
1756 /* happen if you fail to match within the permitted region and would */
1757 /* otherwise try a character beyond that region */
1758 if ((cursor - p_limit) * direction <= len_byte)
1759 break; /* a small overrun is genuine */
1760 cursor -= infinity; /* large overrun = hit */
1761 i = dirlen - direction;
1762 if (! NILP (trt))
1764 while ((i -= direction) + direction != 0)
1766 int ch;
1767 cursor -= direction;
1768 /* Translate only the last byte of a character. */
1769 if (! multibyte
1770 || ((cursor == tail_end_ptr
1771 || CHAR_HEAD_P (cursor[1]))
1772 && (CHAR_HEAD_P (cursor[0])
1773 || (translate_prev_byte == cursor[-1]
1774 && (CHAR_HEAD_P (translate_prev_byte)
1775 || translate_anteprev_byte == cursor[-2])))))
1776 ch = simple_translate[*cursor];
1777 else
1778 ch = *cursor;
1779 if (pat[i] != ch)
1780 break;
1783 else
1785 while ((i -= direction) + direction != 0)
1787 cursor -= direction;
1788 if (pat[i] != *cursor)
1789 break;
1792 cursor += dirlen - i - direction; /* fix cursor */
1793 if (i + direction == 0)
1795 int position;
1797 cursor -= direction;
1799 position = pos_byte + cursor - p2 + ((direction > 0)
1800 ? 1 - len_byte : 0);
1801 set_search_regs (position, len_byte);
1803 if ((n -= direction) != 0)
1804 cursor += dirlen; /* to resume search */
1805 else
1806 return ((direction > 0)
1807 ? search_regs.end[0] : search_regs.start[0]);
1809 else
1810 cursor += stride_for_teases; /* <sigh> we lose - */
1812 pos_byte += cursor - p2;
1814 else
1815 /* Now we'll pick up a clump that has to be done the hard */
1816 /* way because it covers a discontinuity */
1818 limit = ((direction > 0)
1819 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1820 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1821 limit = ((direction > 0)
1822 ? min (limit + len_byte, lim_byte - 1)
1823 : max (limit - len_byte, lim_byte));
1824 /* LIMIT is now the last value POS_BYTE can have
1825 and still be valid for a possible match. */
1826 while (1)
1828 /* This loop can be coded for space rather than */
1829 /* speed because it will usually run only once. */
1830 /* (the reach is at most len + 21, and typically */
1831 /* does not exceed len) */
1832 while ((limit - pos_byte) * direction >= 0)
1833 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1834 /* now run the same tests to distinguish going off the */
1835 /* end, a match or a phony match. */
1836 if ((pos_byte - limit) * direction <= len_byte)
1837 break; /* ran off the end */
1838 /* Found what might be a match.
1839 Set POS_BYTE back to last (first if reverse) pos. */
1840 pos_byte -= infinity;
1841 i = dirlen - direction;
1842 while ((i -= direction) + direction != 0)
1844 int ch;
1845 unsigned char *ptr;
1846 pos_byte -= direction;
1847 ptr = BYTE_POS_ADDR (pos_byte);
1848 /* Translate only the last byte of a character. */
1849 if (! multibyte
1850 || ((ptr == tail_end_ptr
1851 || CHAR_HEAD_P (ptr[1]))
1852 && (CHAR_HEAD_P (ptr[0])
1853 || (translate_prev_byte == ptr[-1]
1854 && (CHAR_HEAD_P (translate_prev_byte)
1855 || translate_anteprev_byte == ptr[-2])))))
1856 ch = simple_translate[*ptr];
1857 else
1858 ch = *ptr;
1859 if (pat[i] != ch)
1860 break;
1862 /* Above loop has moved POS_BYTE part or all the way
1863 back to the first pos (last pos if reverse).
1864 Set it once again at the last (first if reverse) char. */
1865 pos_byte += dirlen - i- direction;
1866 if (i + direction == 0)
1868 int position;
1869 pos_byte -= direction;
1871 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1873 set_search_regs (position, len_byte);
1875 if ((n -= direction) != 0)
1876 pos_byte += dirlen; /* to resume search */
1877 else
1878 return ((direction > 0)
1879 ? search_regs.end[0] : search_regs.start[0]);
1881 else
1882 pos_byte += stride_for_teases;
1885 /* We have done one clump. Can we continue? */
1886 if ((lim_byte - pos_byte) * direction < 0)
1887 return ((0 - n) * direction);
1889 return BYTE_TO_CHAR (pos_byte);
1892 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1893 for a match just found in the current buffer. */
1895 static void
1896 set_search_regs (beg_byte, nbytes)
1897 int beg_byte, nbytes;
1899 /* Make sure we have registers in which to store
1900 the match position. */
1901 if (search_regs.num_regs == 0)
1903 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1904 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1905 search_regs.num_regs = 2;
1908 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1909 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1910 XSETBUFFER (last_thing_searched, current_buffer);
1913 /* Given a string of words separated by word delimiters,
1914 compute a regexp that matches those exact words
1915 separated by arbitrary punctuation. */
1917 static Lisp_Object
1918 wordify (string)
1919 Lisp_Object string;
1921 register unsigned char *p, *o;
1922 register int i, i_byte, len, punct_count = 0, word_count = 0;
1923 Lisp_Object val;
1924 int prev_c = 0;
1925 int adjust;
1927 CHECK_STRING (string, 0);
1928 p = XSTRING (string)->data;
1929 len = XSTRING (string)->size;
1931 for (i = 0, i_byte = 0; i < len; )
1933 int c;
1935 if (STRING_MULTIBYTE (string))
1936 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1937 else
1938 c = XSTRING (string)->data[i++];
1940 if (SYNTAX (c) != Sword)
1942 punct_count++;
1943 if (i > 0 && SYNTAX (prev_c) == Sword)
1944 word_count++;
1947 prev_c = c;
1950 if (SYNTAX (prev_c) == Sword)
1951 word_count++;
1952 if (!word_count)
1953 return build_string ("");
1955 adjust = - punct_count + 5 * (word_count - 1) + 4;
1956 val = make_uninit_multibyte_string (len + adjust,
1957 STRING_BYTES (XSTRING (string)) + adjust);
1959 o = XSTRING (val)->data;
1960 *o++ = '\\';
1961 *o++ = 'b';
1962 prev_c = 0;
1964 for (i = 0, i_byte = 0; i < len; )
1966 int c;
1967 int i_byte_orig = i_byte;
1969 if (STRING_MULTIBYTE (string))
1970 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1971 else
1972 c = XSTRING (string)->data[i++];
1974 if (SYNTAX (c) == Sword)
1976 bcopy (&XSTRING (string)->data[i_byte_orig], o,
1977 i_byte - i_byte_orig);
1978 o += i_byte - i_byte_orig;
1980 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1982 *o++ = '\\';
1983 *o++ = 'W';
1984 *o++ = '\\';
1985 *o++ = 'W';
1986 *o++ = '*';
1989 prev_c = c;
1992 *o++ = '\\';
1993 *o++ = 'b';
1995 return val;
1998 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1999 "MSearch backward: ",
2000 "Search backward from point for STRING.\n\
2001 Set point to the beginning of the occurrence found, and return point.\n\
2002 An optional second argument bounds the search; it is a buffer position.\n\
2003 The match found must not extend before that position.\n\
2004 Optional third argument, if t, means if fail just return nil (no error).\n\
2005 If not nil and not t, position at limit of search and return nil.\n\
2006 Optional fourth argument is repeat count--search for successive occurrences.\n\
2007 See also the functions `match-beginning', `match-end' and `replace-match'.")
2008 (string, bound, noerror, count)
2009 Lisp_Object string, bound, noerror, count;
2011 return search_command (string, bound, noerror, count, -1, 0, 0);
2014 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2015 "Search forward from point for STRING.\n\
2016 Set point to the end of the occurrence found, and return point.\n\
2017 An optional second argument bounds the search; it is a buffer position.\n\
2018 The match found must not extend after that position. nil is equivalent\n\
2019 to (point-max).\n\
2020 Optional third argument, if t, means if fail just return nil (no error).\n\
2021 If not nil and not t, move to limit of search and return nil.\n\
2022 Optional fourth argument is repeat count--search for successive occurrences.\n\
2023 See also the functions `match-beginning', `match-end' and `replace-match'.")
2024 (string, bound, noerror, count)
2025 Lisp_Object string, bound, noerror, count;
2027 return search_command (string, bound, noerror, count, 1, 0, 0);
2030 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2031 "sWord search backward: ",
2032 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2033 Set point to the beginning of the occurrence found, and return point.\n\
2034 An optional second argument bounds the search; it is a buffer position.\n\
2035 The match found must not extend before that position.\n\
2036 Optional third argument, if t, means if fail just return nil (no error).\n\
2037 If not nil and not t, move to limit of search and return nil.\n\
2038 Optional fourth argument is repeat count--search for successive occurrences.")
2039 (string, bound, noerror, count)
2040 Lisp_Object string, bound, noerror, count;
2042 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2045 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2046 "sWord search: ",
2047 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2048 Set point to the end of the occurrence found, and return point.\n\
2049 An optional second argument bounds the search; it is a buffer position.\n\
2050 The match found must not extend after that position.\n\
2051 Optional third argument, if t, means if fail just return nil (no error).\n\
2052 If not nil and not t, move to limit of search and return nil.\n\
2053 Optional fourth argument is repeat count--search for successive occurrences.")
2054 (string, bound, noerror, count)
2055 Lisp_Object string, bound, noerror, count;
2057 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2060 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2061 "sRE search backward: ",
2062 "Search backward from point for match for regular expression REGEXP.\n\
2063 Set point to the beginning of the match, and return point.\n\
2064 The match found is the one starting last in the buffer\n\
2065 and yet ending before the origin of the search.\n\
2066 An optional second argument bounds the search; it is a buffer position.\n\
2067 The match found must start at or after that position.\n\
2068 Optional third argument, if t, means if fail just return nil (no error).\n\
2069 If not nil and not t, move to limit of search and return nil.\n\
2070 Optional fourth argument is repeat count--search for successive occurrences.\n\
2071 See also the functions `match-beginning', `match-end' and `replace-match'.")
2072 (regexp, bound, noerror, count)
2073 Lisp_Object regexp, bound, noerror, count;
2075 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2078 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2079 "sRE search: ",
2080 "Search forward from point for regular expression REGEXP.\n\
2081 Set point to the end of the occurrence found, and return point.\n\
2082 An optional second argument bounds the search; it is a buffer position.\n\
2083 The match found must not extend after that position.\n\
2084 Optional third argument, if t, means if fail just return nil (no error).\n\
2085 If not nil and not t, move to limit of search and return nil.\n\
2086 Optional fourth argument is repeat count--search for successive occurrences.\n\
2087 See also the functions `match-beginning', `match-end' and `replace-match'.")
2088 (regexp, bound, noerror, count)
2089 Lisp_Object regexp, bound, noerror, count;
2091 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2094 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2095 "sPosix search backward: ",
2096 "Search backward from point for match for regular expression REGEXP.\n\
2097 Find the longest match in accord with Posix regular expression rules.\n\
2098 Set point to the beginning of the match, and return point.\n\
2099 The match found is the one starting last in the buffer\n\
2100 and yet ending before the origin of the search.\n\
2101 An optional second argument bounds the search; it is a buffer position.\n\
2102 The match found must start at or after that position.\n\
2103 Optional third argument, if t, means if fail just return nil (no error).\n\
2104 If not nil and not t, move to limit of search and return nil.\n\
2105 Optional fourth argument is repeat count--search for successive occurrences.\n\
2106 See also the functions `match-beginning', `match-end' and `replace-match'.")
2107 (regexp, bound, noerror, count)
2108 Lisp_Object regexp, bound, noerror, count;
2110 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2113 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2114 "sPosix search: ",
2115 "Search forward from point for regular expression REGEXP.\n\
2116 Find the longest match in accord with Posix regular expression rules.\n\
2117 Set point to the end of the occurrence found, and return point.\n\
2118 An optional second argument bounds the search; it is a buffer position.\n\
2119 The match found must not extend after that position.\n\
2120 Optional third argument, if t, means if fail just return nil (no error).\n\
2121 If not nil and not t, move to limit of search and return nil.\n\
2122 Optional fourth argument is repeat count--search for successive occurrences.\n\
2123 See also the functions `match-beginning', `match-end' and `replace-match'.")
2124 (regexp, bound, noerror, count)
2125 Lisp_Object regexp, bound, noerror, count;
2127 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2130 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2131 "Replace text matched by last search with NEWTEXT.\n\
2132 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2133 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2134 based on the replaced text.\n\
2135 If the replaced text has only capital letters\n\
2136 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2137 If the replaced text has at least one word starting with a capital letter,\n\
2138 then capitalize each word in NEWTEXT.\n\n\
2139 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2140 Otherwise treat `\\' as special:\n\
2141 `\\&' in NEWTEXT means substitute original matched text.\n\
2142 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2143 If Nth parens didn't match, substitute nothing.\n\
2144 `\\\\' means insert one `\\'.\n\
2145 FIXEDCASE and LITERAL are optional arguments.\n\
2146 Leaves point at end of replacement text.\n\
2148 The optional fourth argument STRING can be a string to modify.\n\
2149 In that case, this function creates and returns a new string\n\
2150 which is made by replacing the part of STRING that was matched.\n\
2152 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2153 It says to replace just that subexpression instead of the whole match.\n\
2154 This is useful only after a regular expression search or match\n\
2155 since only regular expressions have distinguished subexpressions.")
2156 (newtext, fixedcase, literal, string, subexp)
2157 Lisp_Object newtext, fixedcase, literal, string, subexp;
2159 enum { nochange, all_caps, cap_initial } case_action;
2160 register int pos, last;
2161 int some_multiletter_word;
2162 int some_lowercase;
2163 int some_uppercase;
2164 int some_nonuppercase_initial;
2165 register int c, prevc;
2166 int inslen;
2167 int sub;
2168 int opoint, newpoint;
2170 CHECK_STRING (newtext, 0);
2172 if (! NILP (string))
2173 CHECK_STRING (string, 4);
2175 case_action = nochange; /* We tried an initialization */
2176 /* but some C compilers blew it */
2178 if (search_regs.num_regs <= 0)
2179 error ("replace-match called before any match found");
2181 if (NILP (subexp))
2182 sub = 0;
2183 else
2185 CHECK_NUMBER (subexp, 3);
2186 sub = XINT (subexp);
2187 if (sub < 0 || sub >= search_regs.num_regs)
2188 args_out_of_range (subexp, make_number (search_regs.num_regs));
2191 if (NILP (string))
2193 if (search_regs.start[sub] < BEGV
2194 || search_regs.start[sub] > search_regs.end[sub]
2195 || search_regs.end[sub] > ZV)
2196 args_out_of_range (make_number (search_regs.start[sub]),
2197 make_number (search_regs.end[sub]));
2199 else
2201 if (search_regs.start[sub] < 0
2202 || search_regs.start[sub] > search_regs.end[sub]
2203 || search_regs.end[sub] > XSTRING (string)->size)
2204 args_out_of_range (make_number (search_regs.start[sub]),
2205 make_number (search_regs.end[sub]));
2208 if (NILP (fixedcase))
2210 int beg;
2211 /* Decide how to casify by examining the matched text. */
2213 if (NILP (string))
2214 last = CHAR_TO_BYTE (search_regs.end[sub]);
2215 else
2216 last = search_regs.end[sub];
2218 if (NILP (string))
2219 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2220 else
2221 beg = search_regs.start[sub];
2223 prevc = '\n';
2224 case_action = all_caps;
2226 /* some_multiletter_word is set nonzero if any original word
2227 is more than one letter long. */
2228 some_multiletter_word = 0;
2229 some_lowercase = 0;
2230 some_nonuppercase_initial = 0;
2231 some_uppercase = 0;
2233 for (pos = beg; pos < last; pos++)
2235 if (NILP (string))
2236 c = FETCH_BYTE (pos);
2237 else
2238 c = XSTRING (string)->data[pos];
2240 if (LOWERCASEP (c))
2242 /* Cannot be all caps if any original char is lower case */
2244 some_lowercase = 1;
2245 if (SYNTAX (prevc) != Sword)
2246 some_nonuppercase_initial = 1;
2247 else
2248 some_multiletter_word = 1;
2250 else if (!NOCASEP (c))
2252 some_uppercase = 1;
2253 if (SYNTAX (prevc) != Sword)
2255 else
2256 some_multiletter_word = 1;
2258 else
2260 /* If the initial is a caseless word constituent,
2261 treat that like a lowercase initial. */
2262 if (SYNTAX (prevc) != Sword)
2263 some_nonuppercase_initial = 1;
2266 prevc = c;
2269 /* Convert to all caps if the old text is all caps
2270 and has at least one multiletter word. */
2271 if (! some_lowercase && some_multiletter_word)
2272 case_action = all_caps;
2273 /* Capitalize each word, if the old text has all capitalized words. */
2274 else if (!some_nonuppercase_initial && some_multiletter_word)
2275 case_action = cap_initial;
2276 else if (!some_nonuppercase_initial && some_uppercase)
2277 /* Should x -> yz, operating on X, give Yz or YZ?
2278 We'll assume the latter. */
2279 case_action = all_caps;
2280 else
2281 case_action = nochange;
2284 /* Do replacement in a string. */
2285 if (!NILP (string))
2287 Lisp_Object before, after;
2289 before = Fsubstring (string, make_number (0),
2290 make_number (search_regs.start[sub]));
2291 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2293 /* Substitute parts of the match into NEWTEXT
2294 if desired. */
2295 if (NILP (literal))
2297 int lastpos = -1;
2298 int lastpos_byte = -1;
2299 /* We build up the substituted string in ACCUM. */
2300 Lisp_Object accum;
2301 Lisp_Object middle;
2302 int pos_byte;
2304 accum = Qnil;
2306 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2308 int substart = -1;
2309 int subend;
2310 int delbackslash = 0;
2312 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2314 if (c == '\\')
2316 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2317 if (c == '&')
2319 substart = search_regs.start[sub];
2320 subend = search_regs.end[sub];
2322 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2324 if (search_regs.start[c - '0'] >= 0)
2326 substart = search_regs.start[c - '0'];
2327 subend = search_regs.end[c - '0'];
2330 else if (c == '\\')
2331 delbackslash = 1;
2332 else
2333 error ("Invalid use of `\\' in replacement text");
2335 if (substart >= 0)
2337 if (pos - 1 != lastpos + 1)
2338 middle = substring_both (newtext, lastpos + 1,
2339 lastpos_byte + 1,
2340 pos - 1, pos_byte - 1);
2341 else
2342 middle = Qnil;
2343 accum = concat3 (accum, middle,
2344 Fsubstring (string,
2345 make_number (substart),
2346 make_number (subend)));
2347 lastpos = pos;
2348 lastpos_byte = pos_byte;
2350 else if (delbackslash)
2352 middle = substring_both (newtext, lastpos + 1,
2353 lastpos_byte + 1,
2354 pos, pos_byte);
2356 accum = concat2 (accum, middle);
2357 lastpos = pos;
2358 lastpos_byte = pos_byte;
2362 if (pos != lastpos + 1)
2363 middle = substring_both (newtext, lastpos + 1,
2364 lastpos_byte + 1,
2365 pos, pos_byte);
2366 else
2367 middle = Qnil;
2369 newtext = concat2 (accum, middle);
2372 /* Do case substitution in NEWTEXT if desired. */
2373 if (case_action == all_caps)
2374 newtext = Fupcase (newtext);
2375 else if (case_action == cap_initial)
2376 newtext = Fupcase_initials (newtext);
2378 return concat3 (before, newtext, after);
2381 /* Record point, the move (quietly) to the start of the match. */
2382 if (PT > search_regs.start[sub])
2383 opoint = PT - ZV;
2384 else
2385 opoint = PT;
2387 TEMP_SET_PT (search_regs.start[sub]);
2389 /* We insert the replacement text before the old text, and then
2390 delete the original text. This means that markers at the
2391 beginning or end of the original will float to the corresponding
2392 position in the replacement. */
2393 if (!NILP (literal))
2394 Finsert_and_inherit (1, &newtext);
2395 else
2397 struct gcpro gcpro1;
2398 GCPRO1 (newtext);
2400 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2402 int offset = PT - search_regs.start[sub];
2404 c = XSTRING (newtext)->data[pos];
2405 if (c == '\\')
2407 c = XSTRING (newtext)->data[++pos];
2408 if (c == '&')
2409 Finsert_buffer_substring
2410 (Fcurrent_buffer (),
2411 make_number (search_regs.start[sub] + offset),
2412 make_number (search_regs.end[sub] + offset));
2413 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2415 if (search_regs.start[c - '0'] >= 1)
2416 Finsert_buffer_substring
2417 (Fcurrent_buffer (),
2418 make_number (search_regs.start[c - '0'] + offset),
2419 make_number (search_regs.end[c - '0'] + offset));
2421 else if (c == '\\')
2422 insert_char (c);
2423 else
2424 error ("Invalid use of `\\' in replacement text");
2426 else
2427 insert_char (c);
2429 UNGCPRO;
2432 inslen = PT - (search_regs.start[sub]);
2433 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2435 if (case_action == all_caps)
2436 Fupcase_region (make_number (PT - inslen), make_number (PT));
2437 else if (case_action == cap_initial)
2438 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2440 newpoint = PT;
2442 /* Put point back where it was in the text. */
2443 if (opoint <= 0)
2444 TEMP_SET_PT (opoint + ZV);
2445 else
2446 TEMP_SET_PT (opoint);
2448 /* Now move point "officially" to the start of the inserted replacement. */
2449 move_if_not_intangible (newpoint);
2451 return Qnil;
2454 static Lisp_Object
2455 match_limit (num, beginningp)
2456 Lisp_Object num;
2457 int beginningp;
2459 register int n;
2461 CHECK_NUMBER (num, 0);
2462 n = XINT (num);
2463 if (n < 0 || n >= search_regs.num_regs)
2464 args_out_of_range (num, make_number (search_regs.num_regs));
2465 if (search_regs.num_regs <= 0
2466 || search_regs.start[n] < 0)
2467 return Qnil;
2468 return (make_number ((beginningp) ? search_regs.start[n]
2469 : search_regs.end[n]));
2472 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2473 "Return position of start of text matched by last search.\n\
2474 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2475 regexp.\n\
2476 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2477 SUBEXP pairs.\n\
2478 Zero means the entire text matched by the whole regexp or whole string.")
2479 (subexp)
2480 Lisp_Object subexp;
2482 return match_limit (subexp, 1);
2485 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2486 "Return position of end of text matched by last search.\n\
2487 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2488 regexp.\n\
2489 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2490 SUBEXP pairs.\n\
2491 Zero means the entire text matched by the whole regexp or whole string.")
2492 (subexp)
2493 Lisp_Object subexp;
2495 return match_limit (subexp, 0);
2498 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2499 "Return a list containing all info on what the last search matched.\n\
2500 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2501 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2502 if the last match was on a buffer; integers or nil if a string was matched.\n\
2503 Use `store-match-data' to reinstate the data in this list.\n\
2505 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2506 \(rather than markers) to represent buffer positions.\n\
2507 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2508 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2509 (integers, reuse)
2510 Lisp_Object integers, reuse;
2512 Lisp_Object tail, prev;
2513 Lisp_Object *data;
2514 int i, len;
2516 if (NILP (last_thing_searched))
2517 return Qnil;
2519 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2520 * sizeof (Lisp_Object));
2522 len = -1;
2523 for (i = 0; i < search_regs.num_regs; i++)
2525 int start = search_regs.start[i];
2526 if (start >= 0)
2528 if (EQ (last_thing_searched, Qt)
2529 || ! NILP (integers))
2531 XSETFASTINT (data[2 * i], start);
2532 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2534 else if (BUFFERP (last_thing_searched))
2536 data[2 * i] = Fmake_marker ();
2537 Fset_marker (data[2 * i],
2538 make_number (start),
2539 last_thing_searched);
2540 data[2 * i + 1] = Fmake_marker ();
2541 Fset_marker (data[2 * i + 1],
2542 make_number (search_regs.end[i]),
2543 last_thing_searched);
2545 else
2546 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2547 abort ();
2549 len = i;
2551 else
2552 data[2 * i] = data [2 * i + 1] = Qnil;
2555 /* If REUSE is not usable, cons up the values and return them. */
2556 if (! CONSP (reuse))
2557 return Flist (2 * len + 2, data);
2559 /* If REUSE is a list, store as many value elements as will fit
2560 into the elements of REUSE. */
2561 for (i = 0, tail = reuse; CONSP (tail);
2562 i++, tail = XCONS (tail)->cdr)
2564 if (i < 2 * len + 2)
2565 XCONS (tail)->car = data[i];
2566 else
2567 XCONS (tail)->car = Qnil;
2568 prev = tail;
2571 /* If we couldn't fit all value elements into REUSE,
2572 cons up the rest of them and add them to the end of REUSE. */
2573 if (i < 2 * len + 2)
2574 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2576 return reuse;
2580 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2581 "Set internal data on last search match from elements of LIST.\n\
2582 LIST should have been created by calling `match-data' previously.")
2583 (list)
2584 register Lisp_Object list;
2586 register int i;
2587 register Lisp_Object marker;
2589 if (running_asynch_code)
2590 save_search_regs ();
2592 if (!CONSP (list) && !NILP (list))
2593 list = wrong_type_argument (Qconsp, list);
2595 /* Unless we find a marker with a buffer in LIST, assume that this
2596 match data came from a string. */
2597 last_thing_searched = Qt;
2599 /* Allocate registers if they don't already exist. */
2601 int length = XFASTINT (Flength (list)) / 2;
2603 if (length > search_regs.num_regs)
2605 if (search_regs.num_regs == 0)
2607 search_regs.start
2608 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2609 search_regs.end
2610 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2612 else
2614 search_regs.start
2615 = (regoff_t *) xrealloc (search_regs.start,
2616 length * sizeof (regoff_t));
2617 search_regs.end
2618 = (regoff_t *) xrealloc (search_regs.end,
2619 length * sizeof (regoff_t));
2622 search_regs.num_regs = length;
2626 for (i = 0; i < search_regs.num_regs; i++)
2628 marker = Fcar (list);
2629 if (NILP (marker))
2631 search_regs.start[i] = -1;
2632 list = Fcdr (list);
2634 else
2636 if (MARKERP (marker))
2638 if (XMARKER (marker)->buffer == 0)
2639 XSETFASTINT (marker, 0);
2640 else
2641 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2644 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2645 search_regs.start[i] = XINT (marker);
2646 list = Fcdr (list);
2648 marker = Fcar (list);
2649 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2650 XSETFASTINT (marker, 0);
2652 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2653 search_regs.end[i] = XINT (marker);
2655 list = Fcdr (list);
2658 return Qnil;
2661 /* If non-zero the match data have been saved in saved_search_regs
2662 during the execution of a sentinel or filter. */
2663 static int search_regs_saved;
2664 static struct re_registers saved_search_regs;
2666 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2667 if asynchronous code (filter or sentinel) is running. */
2668 static void
2669 save_search_regs ()
2671 if (!search_regs_saved)
2673 saved_search_regs.num_regs = search_regs.num_regs;
2674 saved_search_regs.start = search_regs.start;
2675 saved_search_regs.end = search_regs.end;
2676 search_regs.num_regs = 0;
2677 search_regs.start = 0;
2678 search_regs.end = 0;
2680 search_regs_saved = 1;
2684 /* Called upon exit from filters and sentinels. */
2685 void
2686 restore_match_data ()
2688 if (search_regs_saved)
2690 if (search_regs.num_regs > 0)
2692 xfree (search_regs.start);
2693 xfree (search_regs.end);
2695 search_regs.num_regs = saved_search_regs.num_regs;
2696 search_regs.start = saved_search_regs.start;
2697 search_regs.end = saved_search_regs.end;
2699 search_regs_saved = 0;
2703 /* Quote a string to inactivate reg-expr chars */
2705 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2706 "Return a regexp string which matches exactly STRING and nothing else.")
2707 (string)
2708 Lisp_Object string;
2710 register unsigned char *in, *out, *end;
2711 register unsigned char *temp;
2712 int backslashes_added = 0;
2714 CHECK_STRING (string, 0);
2716 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2718 /* Now copy the data into the new string, inserting escapes. */
2720 in = XSTRING (string)->data;
2721 end = in + STRING_BYTES (XSTRING (string));
2722 out = temp;
2724 for (; in != end; in++)
2726 if (*in == '[' || *in == ']'
2727 || *in == '*' || *in == '.' || *in == '\\'
2728 || *in == '?' || *in == '+'
2729 || *in == '^' || *in == '$')
2730 *out++ = '\\', backslashes_added++;
2731 *out++ = *in;
2734 return make_specified_string (temp,
2735 XSTRING (string)->size + backslashes_added,
2736 out - temp,
2737 STRING_MULTIBYTE (string));
2740 void
2741 syms_of_search ()
2743 register int i;
2745 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2747 searchbufs[i].buf.allocated = 100;
2748 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2749 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2750 searchbufs[i].regexp = Qnil;
2751 staticpro (&searchbufs[i].regexp);
2752 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2754 searchbuf_head = &searchbufs[0];
2756 Qsearch_failed = intern ("search-failed");
2757 staticpro (&Qsearch_failed);
2758 Qinvalid_regexp = intern ("invalid-regexp");
2759 staticpro (&Qinvalid_regexp);
2761 Fput (Qsearch_failed, Qerror_conditions,
2762 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2763 Fput (Qsearch_failed, Qerror_message,
2764 build_string ("Search failed"));
2766 Fput (Qinvalid_regexp, Qerror_conditions,
2767 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2768 Fput (Qinvalid_regexp, Qerror_message,
2769 build_string ("Invalid regexp"));
2771 last_thing_searched = Qnil;
2772 staticpro (&last_thing_searched);
2774 defsubr (&Slooking_at);
2775 defsubr (&Sposix_looking_at);
2776 defsubr (&Sstring_match);
2777 defsubr (&Sposix_string_match);
2778 defsubr (&Ssearch_forward);
2779 defsubr (&Ssearch_backward);
2780 defsubr (&Sword_search_forward);
2781 defsubr (&Sword_search_backward);
2782 defsubr (&Sre_search_forward);
2783 defsubr (&Sre_search_backward);
2784 defsubr (&Sposix_search_forward);
2785 defsubr (&Sposix_search_backward);
2786 defsubr (&Sreplace_match);
2787 defsubr (&Smatch_beginning);
2788 defsubr (&Smatch_end);
2789 defsubr (&Smatch_data);
2790 defsubr (&Sset_match_data);
2791 defsubr (&Sregexp_quote);