Merge from emacs--devo--0
[emacs/old-mirror.git] / src / search.c
blobfd7b474e4ab93a9c8a785a45acaafbcb0d426f2f
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
23 #include <config.h>
24 #include "lisp.h"
25 #include "syntax.h"
26 #include "category.h"
27 #include "buffer.h"
28 #include "character.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
35 #include <sys/types.h>
36 #include "regex.h"
38 #define REGEXP_CACHE_SIZE 20
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp;
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
94 Lisp_Object Vsearch_spaces_regexp;
96 static void set_search_regs ();
97 static void save_search_regs ();
98 static int simple_search ();
99 static int boyer_moore ();
100 static int search_buffer ();
101 static void matcher_overflow () NO_RETURN;
103 static void
104 matcher_overflow ()
106 error ("Stack overflow in regexp matcher");
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
120 The behavior also depends on Vsearch_spaces_regexp. */
122 static void
123 compile_pattern_1 (cp, pattern, translate, regp, posix)
124 struct regexp_cache *cp;
125 Lisp_Object pattern;
126 Lisp_Object translate;
127 struct re_registers *regp;
128 int posix;
130 char *val;
131 reg_syntax_t old;
133 cp->regexp = Qnil;
134 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
135 cp->posix = posix;
136 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
137 cp->buf.charset_unibyte = charset_unibyte;
138 cp->whitespace_regexp = Vsearch_spaces_regexp;
139 /* rms: I think BLOCK_INPUT is not needed here any more,
140 because regex.c defines malloc to call xmalloc.
141 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
142 So let's turn it off. */
143 /* BLOCK_INPUT; */
144 old = re_set_syntax (RE_SYNTAX_EMACS
145 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
146 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
147 : SDATA (Vsearch_spaces_regexp));
149 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
150 SBYTES (pattern), &cp->buf);
152 /* If the compiled pattern hard codes some of the contents of the
153 syntax-table, it can only be reused with *this* syntax table. */
154 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
156 re_set_whitespace_regexp (NULL);
158 re_set_syntax (old);
159 /* UNBLOCK_INPUT; */
160 if (val)
161 xsignal1 (Qinvalid_regexp, build_string (val));
163 cp->regexp = Fcopy_sequence (pattern);
166 /* Shrink each compiled regexp buffer in the cache
167 to the size actually used right now.
168 This is called from garbage collection. */
170 void
171 shrink_regexp_cache ()
173 struct regexp_cache *cp;
175 for (cp = searchbuf_head; cp != 0; cp = cp->next)
177 cp->buf.allocated = cp->buf.used;
178 cp->buf.buffer
179 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
183 /* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
188 void
189 clear_regexp_cache ()
191 int i;
193 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
194 /* It's tempting to compare with the syntax-table we've actually changd,
195 but it's not sufficient because char-table inheritance mewans that
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs[i].syntax_table, Qt))
198 searchbufs[i].regexp = Qnil;
201 /* Compile a regexp if necessary, but first check to see if there's one in
202 the cache.
203 PATTERN is the pattern to compile.
204 TRANSLATE is a translation table for ignoring case, or nil for none.
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
209 POSIX is nonzero if we want full backtracking (POSIX style)
210 for this pattern. 0 means backtrack only enough to get a valid match. */
212 struct re_pattern_buffer *
213 compile_pattern (pattern, regp, translate, posix, multibyte)
214 Lisp_Object pattern;
215 struct re_registers *regp;
216 Lisp_Object translate;
217 int posix, multibyte;
219 struct regexp_cache *cp, **cpp;
221 for (cpp = &searchbuf_head; ; cpp = &cp->next)
223 cp = *cpp;
224 /* Entries are initialized to nil, and may be set to nil by
225 compile_pattern_1 if the pattern isn't valid. Don't apply
226 string accessors in those cases. However, compile_pattern_1
227 is only applied to the cache entry we pick here to reuse. So
228 nil should never appear before a non-nil entry. */
229 if (NILP (cp->regexp))
230 goto compile_it;
231 if (SCHARS (cp->regexp) == SCHARS (pattern)
232 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
233 && !NILP (Fstring_equal (cp->regexp, pattern))
234 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
235 && cp->posix == posix
236 && (EQ (cp->syntax_table, Qt)
237 || EQ (cp->syntax_table, current_buffer->syntax_table))
238 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
239 && cp->buf.charset_unibyte == charset_unibyte)
240 break;
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
244 string value. */
245 if (cp->next == 0)
247 compile_it:
248 compile_pattern_1 (cp, pattern, translate, regp, posix);
249 break;
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
256 *cpp = cp->next;
257 cp->next = searchbuf_head;
258 searchbuf_head = cp;
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
262 if (regp)
263 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
265 /* The compiled pattern can be used both for mulitbyte and unibyte
266 target. But, we have to tell which the pattern is used for. */
267 cp->buf.target_multibyte = multibyte;
269 return &cp->buf;
273 static Lisp_Object
274 looking_at_1 (string, posix)
275 Lisp_Object string;
276 int posix;
278 Lisp_Object val;
279 unsigned char *p1, *p2;
280 int s1, s2;
281 register int i;
282 struct re_pattern_buffer *bufp;
284 if (running_asynch_code)
285 save_search_regs ();
287 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
288 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
289 = current_buffer->case_eqv_table;
291 CHECK_STRING (string);
292 bufp = compile_pattern (string, &search_regs,
293 (!NILP (current_buffer->case_fold_search)
294 ? current_buffer->case_canon_table : Qnil),
295 posix,
296 !NILP (current_buffer->enable_multibyte_characters));
298 immediate_quit = 1;
299 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
301 /* Get pointers and sizes of the two strings
302 that make up the visible portion of the buffer. */
304 p1 = BEGV_ADDR;
305 s1 = GPT_BYTE - BEGV_BYTE;
306 p2 = GAP_END_ADDR;
307 s2 = ZV_BYTE - GPT_BYTE;
308 if (s1 < 0)
310 p2 = p1;
311 s2 = ZV_BYTE - BEGV_BYTE;
312 s1 = 0;
314 if (s2 < 0)
316 s1 = ZV_BYTE - BEGV_BYTE;
317 s2 = 0;
320 re_match_object = Qnil;
322 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
323 PT_BYTE - BEGV_BYTE, &search_regs,
324 ZV_BYTE - BEGV_BYTE);
325 immediate_quit = 0;
327 if (i == -2)
328 matcher_overflow ();
330 val = (0 <= i ? Qt : Qnil);
331 if (i >= 0)
332 for (i = 0; i < search_regs.num_regs; i++)
333 if (search_regs.start[i] >= 0)
335 search_regs.start[i]
336 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
337 search_regs.end[i]
338 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
340 XSETBUFFER (last_thing_searched, current_buffer);
341 return val;
344 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
345 doc: /* Return t if text after point matches regular expression REGEXP.
346 This function modifies the match data that `match-beginning',
347 `match-end' and `match-data' access; save and restore the match
348 data if you want to preserve them. */)
349 (regexp)
350 Lisp_Object regexp;
352 return looking_at_1 (regexp, 0);
355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
356 doc: /* Return t if text after point matches regular expression REGEXP.
357 Find the longest match, in accord with Posix regular expression rules.
358 This function modifies the match data that `match-beginning',
359 `match-end' and `match-data' access; save and restore the match
360 data if you want to preserve them. */)
361 (regexp)
362 Lisp_Object regexp;
364 return looking_at_1 (regexp, 1);
367 static Lisp_Object
368 string_match_1 (regexp, string, start, posix)
369 Lisp_Object regexp, string, start;
370 int posix;
372 int val;
373 struct re_pattern_buffer *bufp;
374 int pos, pos_byte;
375 int i;
377 if (running_asynch_code)
378 save_search_regs ();
380 CHECK_STRING (regexp);
381 CHECK_STRING (string);
383 if (NILP (start))
384 pos = 0, pos_byte = 0;
385 else
387 int len = SCHARS (string);
389 CHECK_NUMBER (start);
390 pos = XINT (start);
391 if (pos < 0 && -pos <= len)
392 pos = len + pos;
393 else if (0 > pos || pos > len)
394 args_out_of_range (string, start);
395 pos_byte = string_char_to_byte (string, pos);
398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
399 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
400 = current_buffer->case_eqv_table;
402 bufp = compile_pattern (regexp, &search_regs,
403 (!NILP (current_buffer->case_fold_search)
404 ? current_buffer->case_canon_table : Qnil),
405 posix,
406 STRING_MULTIBYTE (string));
407 immediate_quit = 1;
408 re_match_object = string;
410 val = re_search (bufp, (char *) SDATA (string),
411 SBYTES (string), pos_byte,
412 SBYTES (string) - pos_byte,
413 &search_regs);
414 immediate_quit = 0;
415 last_thing_searched = Qt;
416 if (val == -2)
417 matcher_overflow ();
418 if (val < 0) return Qnil;
420 for (i = 0; i < search_regs.num_regs; i++)
421 if (search_regs.start[i] >= 0)
423 search_regs.start[i]
424 = string_byte_to_char (string, search_regs.start[i]);
425 search_regs.end[i]
426 = string_byte_to_char (string, search_regs.end[i]);
429 return make_number (string_byte_to_char (string, val));
432 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
433 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
434 Matching ignores case if `case-fold-search' is non-nil.
435 If third arg START is non-nil, start search at that index in STRING.
436 For index of first char beyond the match, do (match-end 0).
437 `match-end' and `match-beginning' also give indices of substrings
438 matched by parenthesis constructs in the pattern.
440 You can use the function `match-string' to extract the substrings
441 matched by the parenthesis constructions in REGEXP. */)
442 (regexp, string, start)
443 Lisp_Object regexp, string, start;
445 return string_match_1 (regexp, string, start, 0);
448 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
449 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
450 Find the longest match, in accord with Posix regular expression rules.
451 Case is ignored if `case-fold-search' is non-nil in the current buffer.
452 If third arg START is non-nil, start search at that index in STRING.
453 For index of first char beyond the match, do (match-end 0).
454 `match-end' and `match-beginning' also give indices of substrings
455 matched by parenthesis constructs in the pattern. */)
456 (regexp, string, start)
457 Lisp_Object regexp, string, start;
459 return string_match_1 (regexp, string, start, 1);
462 /* Match REGEXP against STRING, searching all of STRING,
463 and return the index of the match, or negative on failure.
464 This does not clobber the match data. */
467 fast_string_match (regexp, string)
468 Lisp_Object regexp, string;
470 int val;
471 struct re_pattern_buffer *bufp;
473 bufp = compile_pattern (regexp, 0, Qnil,
474 0, STRING_MULTIBYTE (string));
475 immediate_quit = 1;
476 re_match_object = string;
478 val = re_search (bufp, (char *) SDATA (string),
479 SBYTES (string), 0,
480 SBYTES (string), 0);
481 immediate_quit = 0;
482 return val;
485 /* Match REGEXP against STRING, searching all of STRING ignoring case,
486 and return the index of the match, or negative on failure.
487 This does not clobber the match data.
488 We assume that STRING contains single-byte characters. */
490 extern Lisp_Object Vascii_downcase_table;
493 fast_c_string_match_ignore_case (regexp, string)
494 Lisp_Object regexp;
495 const char *string;
497 int val;
498 struct re_pattern_buffer *bufp;
499 int len = strlen (string);
501 regexp = string_make_unibyte (regexp);
502 re_match_object = Qt;
503 bufp = compile_pattern (regexp, 0,
504 Vascii_canon_table, 0,
506 immediate_quit = 1;
507 val = re_search (bufp, string, len, 0, len, 0);
508 immediate_quit = 0;
509 return val;
512 /* Like fast_string_match but ignore case. */
515 fast_string_match_ignore_case (regexp, string)
516 Lisp_Object regexp, string;
518 int val;
519 struct re_pattern_buffer *bufp;
521 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
522 0, STRING_MULTIBYTE (string));
523 immediate_quit = 1;
524 re_match_object = string;
526 val = re_search (bufp, (char *) SDATA (string),
527 SBYTES (string), 0,
528 SBYTES (string), 0);
529 immediate_quit = 0;
530 return val;
533 /* The newline cache: remembering which sections of text have no newlines. */
535 /* If the user has requested newline caching, make sure it's on.
536 Otherwise, make sure it's off.
537 This is our cheezy way of associating an action with the change of
538 state of a buffer-local variable. */
539 static void
540 newline_cache_on_off (buf)
541 struct buffer *buf;
543 if (NILP (buf->cache_long_line_scans))
545 /* It should be off. */
546 if (buf->newline_cache)
548 free_region_cache (buf->newline_cache);
549 buf->newline_cache = 0;
552 else
554 /* It should be on. */
555 if (buf->newline_cache == 0)
556 buf->newline_cache = new_region_cache ();
561 /* Search for COUNT instances of the character TARGET between START and END.
563 If COUNT is positive, search forwards; END must be >= START.
564 If COUNT is negative, search backwards for the -COUNTth instance;
565 END must be <= START.
566 If COUNT is zero, do anything you please; run rogue, for all I care.
568 If END is zero, use BEGV or ZV instead, as appropriate for the
569 direction indicated by COUNT.
571 If we find COUNT instances, set *SHORTAGE to zero, and return the
572 position past the COUNTth match. Note that for reverse motion
573 this is not the same as the usual convention for Emacs motion commands.
575 If we don't find COUNT instances before reaching END, set *SHORTAGE
576 to the number of TARGETs left unfound, and return END.
578 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
579 except when inside redisplay. */
582 scan_buffer (target, start, end, count, shortage, allow_quit)
583 register int target;
584 int start, end;
585 int count;
586 int *shortage;
587 int allow_quit;
589 struct region_cache *newline_cache;
590 int direction;
592 if (count > 0)
594 direction = 1;
595 if (! end) end = ZV;
597 else
599 direction = -1;
600 if (! end) end = BEGV;
603 newline_cache_on_off (current_buffer);
604 newline_cache = current_buffer->newline_cache;
606 if (shortage != 0)
607 *shortage = 0;
609 immediate_quit = allow_quit;
611 if (count > 0)
612 while (start != end)
614 /* Our innermost scanning loop is very simple; it doesn't know
615 about gaps, buffer ends, or the newline cache. ceiling is
616 the position of the last character before the next such
617 obstacle --- the last character the dumb search loop should
618 examine. */
619 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
620 int start_byte = CHAR_TO_BYTE (start);
621 int tem;
623 /* If we're looking for a newline, consult the newline cache
624 to see where we can avoid some scanning. */
625 if (target == '\n' && newline_cache)
627 int next_change;
628 immediate_quit = 0;
629 while (region_cache_forward
630 (current_buffer, newline_cache, start_byte, &next_change))
631 start_byte = next_change;
632 immediate_quit = allow_quit;
634 /* START should never be after END. */
635 if (start_byte > ceiling_byte)
636 start_byte = ceiling_byte;
638 /* Now the text after start is an unknown region, and
639 next_change is the position of the next known region. */
640 ceiling_byte = min (next_change - 1, ceiling_byte);
643 /* The dumb loop can only scan text stored in contiguous
644 bytes. BUFFER_CEILING_OF returns the last character
645 position that is contiguous, so the ceiling is the
646 position after that. */
647 tem = BUFFER_CEILING_OF (start_byte);
648 ceiling_byte = min (tem, ceiling_byte);
651 /* The termination address of the dumb loop. */
652 register unsigned char *ceiling_addr
653 = BYTE_POS_ADDR (ceiling_byte) + 1;
654 register unsigned char *cursor
655 = BYTE_POS_ADDR (start_byte);
656 unsigned char *base = cursor;
658 while (cursor < ceiling_addr)
660 unsigned char *scan_start = cursor;
662 /* The dumb loop. */
663 while (*cursor != target && ++cursor < ceiling_addr)
666 /* If we're looking for newlines, cache the fact that
667 the region from start to cursor is free of them. */
668 if (target == '\n' && newline_cache)
669 know_region_cache (current_buffer, newline_cache,
670 start_byte + scan_start - base,
671 start_byte + cursor - base);
673 /* Did we find the target character? */
674 if (cursor < ceiling_addr)
676 if (--count == 0)
678 immediate_quit = 0;
679 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
681 cursor++;
685 start = BYTE_TO_CHAR (start_byte + cursor - base);
688 else
689 while (start > end)
691 /* The last character to check before the next obstacle. */
692 int ceiling_byte = CHAR_TO_BYTE (end);
693 int start_byte = CHAR_TO_BYTE (start);
694 int tem;
696 /* Consult the newline cache, if appropriate. */
697 if (target == '\n' && newline_cache)
699 int next_change;
700 immediate_quit = 0;
701 while (region_cache_backward
702 (current_buffer, newline_cache, start_byte, &next_change))
703 start_byte = next_change;
704 immediate_quit = allow_quit;
706 /* Start should never be at or before end. */
707 if (start_byte <= ceiling_byte)
708 start_byte = ceiling_byte + 1;
710 /* Now the text before start is an unknown region, and
711 next_change is the position of the next known region. */
712 ceiling_byte = max (next_change, ceiling_byte);
715 /* Stop scanning before the gap. */
716 tem = BUFFER_FLOOR_OF (start_byte - 1);
717 ceiling_byte = max (tem, ceiling_byte);
720 /* The termination address of the dumb loop. */
721 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
722 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
723 unsigned char *base = cursor;
725 while (cursor >= ceiling_addr)
727 unsigned char *scan_start = cursor;
729 while (*cursor != target && --cursor >= ceiling_addr)
732 /* If we're looking for newlines, cache the fact that
733 the region from after the cursor to start is free of them. */
734 if (target == '\n' && newline_cache)
735 know_region_cache (current_buffer, newline_cache,
736 start_byte + cursor - base,
737 start_byte + scan_start - base);
739 /* Did we find the target character? */
740 if (cursor >= ceiling_addr)
742 if (++count >= 0)
744 immediate_quit = 0;
745 return BYTE_TO_CHAR (start_byte + cursor - base);
747 cursor--;
751 start = BYTE_TO_CHAR (start_byte + cursor - base);
755 immediate_quit = 0;
756 if (shortage != 0)
757 *shortage = count * direction;
758 return start;
761 /* Search for COUNT instances of a line boundary, which means either a
762 newline or (if selective display enabled) a carriage return.
763 Start at START. If COUNT is negative, search backwards.
765 We report the resulting position by calling TEMP_SET_PT_BOTH.
767 If we find COUNT instances. we position after (always after,
768 even if scanning backwards) the COUNTth match, and return 0.
770 If we don't find COUNT instances before reaching the end of the
771 buffer (or the beginning, if scanning backwards), we return
772 the number of line boundaries left unfound, and position at
773 the limit we bumped up against.
775 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
776 except in special cases. */
779 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
780 int start, start_byte;
781 int limit, limit_byte;
782 register int count;
783 int allow_quit;
785 int direction = ((count > 0) ? 1 : -1);
787 register unsigned char *cursor;
788 unsigned char *base;
790 register int ceiling;
791 register unsigned char *ceiling_addr;
793 int old_immediate_quit = immediate_quit;
795 /* The code that follows is like scan_buffer
796 but checks for either newline or carriage return. */
798 if (allow_quit)
799 immediate_quit++;
801 start_byte = CHAR_TO_BYTE (start);
803 if (count > 0)
805 while (start_byte < limit_byte)
807 ceiling = BUFFER_CEILING_OF (start_byte);
808 ceiling = min (limit_byte - 1, ceiling);
809 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
810 base = (cursor = BYTE_POS_ADDR (start_byte));
811 while (1)
813 while (*cursor != '\n' && ++cursor != ceiling_addr)
816 if (cursor != ceiling_addr)
818 if (--count == 0)
820 immediate_quit = old_immediate_quit;
821 start_byte = start_byte + cursor - base + 1;
822 start = BYTE_TO_CHAR (start_byte);
823 TEMP_SET_PT_BOTH (start, start_byte);
824 return 0;
826 else
827 if (++cursor == ceiling_addr)
828 break;
830 else
831 break;
833 start_byte += cursor - base;
836 else
838 while (start_byte > limit_byte)
840 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
841 ceiling = max (limit_byte, ceiling);
842 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
843 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
844 while (1)
846 while (--cursor != ceiling_addr && *cursor != '\n')
849 if (cursor != ceiling_addr)
851 if (++count == 0)
853 immediate_quit = old_immediate_quit;
854 /* Return the position AFTER the match we found. */
855 start_byte = start_byte + cursor - base + 1;
856 start = BYTE_TO_CHAR (start_byte);
857 TEMP_SET_PT_BOTH (start, start_byte);
858 return 0;
861 else
862 break;
864 /* Here we add 1 to compensate for the last decrement
865 of CURSOR, which took it past the valid range. */
866 start_byte += cursor - base + 1;
870 TEMP_SET_PT_BOTH (limit, limit_byte);
871 immediate_quit = old_immediate_quit;
873 return count * direction;
877 find_next_newline_no_quit (from, cnt)
878 register int from, cnt;
880 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
883 /* Like find_next_newline, but returns position before the newline,
884 not after, and only search up to TO. This isn't just
885 find_next_newline (...)-1, because you might hit TO. */
888 find_before_next_newline (from, to, cnt)
889 int from, to, cnt;
891 int shortage;
892 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
894 if (shortage == 0)
895 pos--;
897 return pos;
900 /* Subroutines of Lisp buffer search functions. */
902 static Lisp_Object
903 search_command (string, bound, noerror, count, direction, RE, posix)
904 Lisp_Object string, bound, noerror, count;
905 int direction;
906 int RE;
907 int posix;
909 register int np;
910 int lim, lim_byte;
911 int n = direction;
913 if (!NILP (count))
915 CHECK_NUMBER (count);
916 n *= XINT (count);
919 CHECK_STRING (string);
920 if (NILP (bound))
922 if (n > 0)
923 lim = ZV, lim_byte = ZV_BYTE;
924 else
925 lim = BEGV, lim_byte = BEGV_BYTE;
927 else
929 CHECK_NUMBER_COERCE_MARKER (bound);
930 lim = XINT (bound);
931 if (n > 0 ? lim < PT : lim > PT)
932 error ("Invalid search bound (wrong side of point)");
933 if (lim > ZV)
934 lim = ZV, lim_byte = ZV_BYTE;
935 else if (lim < BEGV)
936 lim = BEGV, lim_byte = BEGV_BYTE;
937 else
938 lim_byte = CHAR_TO_BYTE (lim);
941 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
942 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
943 = current_buffer->case_eqv_table;
945 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
946 (!NILP (current_buffer->case_fold_search)
947 ? current_buffer->case_canon_table
948 : Qnil),
949 (!NILP (current_buffer->case_fold_search)
950 ? current_buffer->case_eqv_table
951 : Qnil),
952 posix);
953 if (np <= 0)
955 if (NILP (noerror))
956 xsignal1 (Qsearch_failed, string);
958 if (!EQ (noerror, Qt))
960 if (lim < BEGV || lim > ZV)
961 abort ();
962 SET_PT_BOTH (lim, lim_byte);
963 return Qnil;
964 #if 0 /* This would be clean, but maybe programs depend on
965 a value of nil here. */
966 np = lim;
967 #endif
969 else
970 return Qnil;
973 if (np < BEGV || np > ZV)
974 abort ();
976 SET_PT (np);
978 return make_number (np);
981 /* Return 1 if REGEXP it matches just one constant string. */
983 static int
984 trivial_regexp_p (regexp)
985 Lisp_Object regexp;
987 int len = SBYTES (regexp);
988 unsigned char *s = SDATA (regexp);
989 while (--len >= 0)
991 switch (*s++)
993 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
994 return 0;
995 case '\\':
996 if (--len < 0)
997 return 0;
998 switch (*s++)
1000 case '|': case '(': case ')': case '`': case '\'': case 'b':
1001 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1002 case 'S': case '=': case '{': case '}': case '_':
1003 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1004 case '1': case '2': case '3': case '4': case '5':
1005 case '6': case '7': case '8': case '9':
1006 return 0;
1010 return 1;
1013 /* Search for the n'th occurrence of STRING in the current buffer,
1014 starting at position POS and stopping at position LIM,
1015 treating STRING as a literal string if RE is false or as
1016 a regular expression if RE is true.
1018 If N is positive, searching is forward and LIM must be greater than POS.
1019 If N is negative, searching is backward and LIM must be less than POS.
1021 Returns -x if x occurrences remain to be found (x > 0),
1022 or else the position at the beginning of the Nth occurrence
1023 (if searching backward) or the end (if searching forward).
1025 POSIX is nonzero if we want full backtracking (POSIX style)
1026 for this pattern. 0 means backtrack only enough to get a valid match. */
1028 #define TRANSLATE(out, trt, d) \
1029 do \
1031 if (! NILP (trt)) \
1033 Lisp_Object temp; \
1034 temp = Faref (trt, make_number (d)); \
1035 if (INTEGERP (temp)) \
1036 out = XINT (temp); \
1037 else \
1038 out = d; \
1040 else \
1041 out = d; \
1043 while (0)
1045 static int
1046 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1047 RE, trt, inverse_trt, posix)
1048 Lisp_Object string;
1049 int pos;
1050 int pos_byte;
1051 int lim;
1052 int lim_byte;
1053 int n;
1054 int RE;
1055 Lisp_Object trt;
1056 Lisp_Object inverse_trt;
1057 int posix;
1059 int len = SCHARS (string);
1060 int len_byte = SBYTES (string);
1061 register int i;
1063 if (running_asynch_code)
1064 save_search_regs ();
1066 /* Searching 0 times means don't move. */
1067 /* Null string is found at starting position. */
1068 if (len == 0 || n == 0)
1070 set_search_regs (pos_byte, 0);
1071 return pos;
1074 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1076 unsigned char *p1, *p2;
1077 int s1, s2;
1078 struct re_pattern_buffer *bufp;
1080 bufp = compile_pattern (string, &search_regs, trt, posix,
1081 !NILP (current_buffer->enable_multibyte_characters));
1083 immediate_quit = 1; /* Quit immediately if user types ^G,
1084 because letting this function finish
1085 can take too long. */
1086 QUIT; /* Do a pending quit right away,
1087 to avoid paradoxical behavior */
1088 /* Get pointers and sizes of the two strings
1089 that make up the visible portion of the buffer. */
1091 p1 = BEGV_ADDR;
1092 s1 = GPT_BYTE - BEGV_BYTE;
1093 p2 = GAP_END_ADDR;
1094 s2 = ZV_BYTE - GPT_BYTE;
1095 if (s1 < 0)
1097 p2 = p1;
1098 s2 = ZV_BYTE - BEGV_BYTE;
1099 s1 = 0;
1101 if (s2 < 0)
1103 s1 = ZV_BYTE - BEGV_BYTE;
1104 s2 = 0;
1106 re_match_object = Qnil;
1108 while (n < 0)
1110 int val;
1111 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1112 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1113 &search_regs,
1114 /* Don't allow match past current point */
1115 pos_byte - BEGV_BYTE);
1116 if (val == -2)
1118 matcher_overflow ();
1120 if (val >= 0)
1122 pos_byte = search_regs.start[0] + BEGV_BYTE;
1123 for (i = 0; i < search_regs.num_regs; i++)
1124 if (search_regs.start[i] >= 0)
1126 search_regs.start[i]
1127 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1128 search_regs.end[i]
1129 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1131 XSETBUFFER (last_thing_searched, current_buffer);
1132 /* Set pos to the new position. */
1133 pos = search_regs.start[0];
1135 else
1137 immediate_quit = 0;
1138 return (n);
1140 n++;
1142 while (n > 0)
1144 int val;
1145 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1146 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1147 &search_regs,
1148 lim_byte - BEGV_BYTE);
1149 if (val == -2)
1151 matcher_overflow ();
1153 if (val >= 0)
1155 pos_byte = search_regs.end[0] + BEGV_BYTE;
1156 for (i = 0; i < search_regs.num_regs; i++)
1157 if (search_regs.start[i] >= 0)
1159 search_regs.start[i]
1160 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1161 search_regs.end[i]
1162 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1164 XSETBUFFER (last_thing_searched, current_buffer);
1165 pos = search_regs.end[0];
1167 else
1169 immediate_quit = 0;
1170 return (0 - n);
1172 n--;
1174 immediate_quit = 0;
1175 return (pos);
1177 else /* non-RE case */
1179 unsigned char *raw_pattern, *pat;
1180 int raw_pattern_size;
1181 int raw_pattern_size_byte;
1182 unsigned char *patbuf;
1183 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1184 unsigned char *base_pat;
1185 /* Set to positive if we find a non-ASCII char that need
1186 translation. Otherwise set to zero later. */
1187 int char_base = -1;
1188 int boyer_moore_ok = 1;
1190 /* MULTIBYTE says whether the text to be searched is multibyte.
1191 We must convert PATTERN to match that, or we will not really
1192 find things right. */
1194 if (multibyte == STRING_MULTIBYTE (string))
1196 raw_pattern = (unsigned char *) SDATA (string);
1197 raw_pattern_size = SCHARS (string);
1198 raw_pattern_size_byte = SBYTES (string);
1200 else if (multibyte)
1202 raw_pattern_size = SCHARS (string);
1203 raw_pattern_size_byte
1204 = count_size_as_multibyte (SDATA (string),
1205 raw_pattern_size);
1206 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1207 copy_text (SDATA (string), raw_pattern,
1208 SCHARS (string), 0, 1);
1210 else
1212 /* Converting multibyte to single-byte.
1214 ??? Perhaps this conversion should be done in a special way
1215 by subtracting nonascii-insert-offset from each non-ASCII char,
1216 so that only the multibyte chars which really correspond to
1217 the chosen single-byte character set can possibly match. */
1218 raw_pattern_size = SCHARS (string);
1219 raw_pattern_size_byte = SCHARS (string);
1220 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1221 copy_text (SDATA (string), raw_pattern,
1222 SBYTES (string), 1, 0);
1225 /* Copy and optionally translate the pattern. */
1226 len = raw_pattern_size;
1227 len_byte = raw_pattern_size_byte;
1228 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1229 pat = patbuf;
1230 base_pat = raw_pattern;
1231 if (multibyte)
1233 /* Fill patbuf by translated characters in STRING while
1234 checking if we can use boyer-moore search. If TRT is
1235 non-nil, we can use boyer-moore search only if TRT can be
1236 represented by the byte array of 256 elements. For that,
1237 all non-ASCII case-equivalents of all case-senstive
1238 characters in STRING must belong to the same charset and
1239 row. */
1241 while (--len >= 0)
1243 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1244 int c, translated, inverse;
1245 int in_charlen, charlen;
1247 /* If we got here and the RE flag is set, it's because we're
1248 dealing with a regexp known to be trivial, so the backslash
1249 just quotes the next character. */
1250 if (RE && *base_pat == '\\')
1252 len--;
1253 raw_pattern_size--;
1254 len_byte--;
1255 base_pat++;
1258 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1260 if (NILP (trt))
1262 str = base_pat;
1263 charlen = in_charlen;
1265 else
1267 /* Translate the character. */
1268 TRANSLATE (translated, trt, c);
1269 charlen = CHAR_STRING (translated, str_base);
1270 str = str_base;
1272 /* Check if C has any other case-equivalents. */
1273 TRANSLATE (inverse, inverse_trt, c);
1274 /* If so, check if we can use boyer-moore. */
1275 if (c != inverse && boyer_moore_ok)
1277 /* Check if all equivalents belong to the same
1278 group of characters. Note that the check of C
1279 itself is done by the last iteration. */
1280 int this_char_base = -1;
1282 while (boyer_moore_ok)
1284 if (ASCII_BYTE_P (inverse))
1286 if (this_char_base > 0)
1287 boyer_moore_ok = 0;
1288 else
1290 this_char_base = 0;
1291 if (char_base < 0)
1292 char_base = this_char_base;
1295 else if (CHAR_BYTE8_P (inverse))
1296 /* Boyer-moore search can't handle a
1297 translation of an eight-bit
1298 character. */
1299 boyer_moore_ok = 0;
1300 else if (this_char_base < 0)
1302 this_char_base = inverse & ~0x3F;
1303 if (char_base < 0)
1304 char_base = this_char_base;
1305 else if (char_base > 0
1306 && this_char_base != char_base)
1307 boyer_moore_ok = 0;
1309 else if ((inverse & ~0x3F) != this_char_base)
1310 boyer_moore_ok = 0;
1311 if (c == inverse)
1312 break;
1313 TRANSLATE (inverse, inverse_trt, inverse);
1317 if (char_base < 0)
1318 char_base = 0;
1320 /* Store this character into the translated pattern. */
1321 bcopy (str, pat, charlen);
1322 pat += charlen;
1323 base_pat += in_charlen;
1324 len_byte -= in_charlen;
1327 else
1329 /* Unibyte buffer. */
1330 char_base = 0;
1331 while (--len >= 0)
1333 int c, translated;
1335 /* If we got here and the RE flag is set, it's because we're
1336 dealing with a regexp known to be trivial, so the backslash
1337 just quotes the next character. */
1338 if (RE && *base_pat == '\\')
1340 len--;
1341 raw_pattern_size--;
1342 base_pat++;
1344 c = *base_pat++;
1345 TRANSLATE (translated, trt, c);
1346 *pat++ = translated;
1350 len_byte = pat - patbuf;
1351 len = raw_pattern_size;
1352 pat = base_pat = patbuf;
1354 if (boyer_moore_ok)
1355 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1356 pos, pos_byte, lim, lim_byte,
1357 char_base);
1358 else
1359 return simple_search (n, pat, len, len_byte, trt,
1360 pos, pos_byte, lim, lim_byte);
1364 /* Do a simple string search N times for the string PAT,
1365 whose length is LEN/LEN_BYTE,
1366 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1367 TRT is the translation table.
1369 Return the character position where the match is found.
1370 Otherwise, if M matches remained to be found, return -M.
1372 This kind of search works regardless of what is in PAT and
1373 regardless of what is in TRT. It is used in cases where
1374 boyer_moore cannot work. */
1376 static int
1377 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1378 int n;
1379 unsigned char *pat;
1380 int len, len_byte;
1381 Lisp_Object trt;
1382 int pos, pos_byte;
1383 int lim, lim_byte;
1385 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1386 int forward = n > 0;
1387 /* Number of buffer bytes matched. Note that this may be different
1388 from len_byte in a multibyte buffer. */
1389 int match_byte;
1391 if (lim > pos && multibyte)
1392 while (n > 0)
1394 while (1)
1396 /* Try matching at position POS. */
1397 int this_pos = pos;
1398 int this_pos_byte = pos_byte;
1399 int this_len = len;
1400 int this_len_byte = len_byte;
1401 unsigned char *p = pat;
1402 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1403 goto stop;
1405 while (this_len > 0)
1407 int charlen, buf_charlen;
1408 int pat_ch, buf_ch;
1410 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1411 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1412 ZV_BYTE - this_pos_byte,
1413 buf_charlen);
1414 TRANSLATE (buf_ch, trt, buf_ch);
1416 if (buf_ch != pat_ch)
1417 break;
1419 this_len_byte -= charlen;
1420 this_len--;
1421 p += charlen;
1423 this_pos_byte += buf_charlen;
1424 this_pos++;
1427 if (this_len == 0)
1429 match_byte = this_pos_byte - pos_byte;
1430 pos += len;
1431 pos_byte += match_byte;
1432 break;
1435 INC_BOTH (pos, pos_byte);
1438 n--;
1440 else if (lim > pos)
1441 while (n > 0)
1443 while (1)
1445 /* Try matching at position POS. */
1446 int this_pos = pos;
1447 int this_len = len;
1448 unsigned char *p = pat;
1450 if (pos + len > lim)
1451 goto stop;
1453 while (this_len > 0)
1455 int pat_ch = *p++;
1456 int buf_ch = FETCH_BYTE (this_pos);
1457 TRANSLATE (buf_ch, trt, buf_ch);
1459 if (buf_ch != pat_ch)
1460 break;
1462 this_len--;
1463 this_pos++;
1466 if (this_len == 0)
1468 match_byte = len;
1469 pos += len;
1470 break;
1473 pos++;
1476 n--;
1478 /* Backwards search. */
1479 else if (lim < pos && multibyte)
1480 while (n < 0)
1482 while (1)
1484 /* Try matching at position POS. */
1485 int this_pos = pos - len;
1486 int this_pos_byte;
1487 int this_len = len;
1488 int this_len_byte = len_byte;
1489 unsigned char *p = pat;
1491 if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1492 goto stop;
1493 this_pos_byte = CHAR_TO_BYTE (this_pos);
1494 match_byte = pos_byte - this_pos_byte;
1496 while (this_len > 0)
1498 int charlen, buf_charlen;
1499 int pat_ch, buf_ch;
1501 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1502 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1503 ZV_BYTE - this_pos_byte,
1504 buf_charlen);
1505 TRANSLATE (buf_ch, trt, buf_ch);
1507 if (buf_ch != pat_ch)
1508 break;
1510 this_len_byte -= charlen;
1511 this_len--;
1512 p += charlen;
1513 this_pos_byte += buf_charlen;
1514 this_pos++;
1517 if (this_len == 0)
1519 pos -= len;
1520 pos_byte -= match_byte;
1521 break;
1524 DEC_BOTH (pos, pos_byte);
1527 n++;
1529 else if (lim < pos)
1530 while (n < 0)
1532 while (1)
1534 /* Try matching at position POS. */
1535 int this_pos = pos - len;
1536 int this_len = len;
1537 unsigned char *p = pat;
1539 if (this_pos < lim)
1540 goto stop;
1542 while (this_len > 0)
1544 int pat_ch = *p++;
1545 int buf_ch = FETCH_BYTE (this_pos);
1546 TRANSLATE (buf_ch, trt, buf_ch);
1548 if (buf_ch != pat_ch)
1549 break;
1550 this_len--;
1551 this_pos++;
1554 if (this_len == 0)
1556 match_byte = len;
1557 pos -= len;
1558 break;
1561 pos--;
1564 n++;
1567 stop:
1568 if (n == 0)
1570 if (forward)
1571 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1572 else
1573 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1575 return pos;
1577 else if (n > 0)
1578 return -n;
1579 else
1580 return n;
1583 /* Do Boyer-Moore search N times for the string BASE_PAT,
1584 whose length is LEN/LEN_BYTE,
1585 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1586 DIRECTION says which direction we search in.
1587 TRT and INVERSE_TRT are translation tables.
1588 Characters in PAT are already translated by TRT.
1590 This kind of search works if all the characters in BASE_PAT that
1591 have nontrivial translation are the same aside from the last byte.
1592 This makes it possible to translate just the last byte of a
1593 character, and do so after just a simple test of the context.
1594 CHAR_BASE is nonzero iff there is such a non-ASCII character.
1596 If that criterion is not satisfied, do not call this function. */
1598 static int
1599 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1600 pos, pos_byte, lim, lim_byte, char_base)
1601 int n;
1602 unsigned char *base_pat;
1603 int len, len_byte;
1604 Lisp_Object trt;
1605 Lisp_Object inverse_trt;
1606 int pos, pos_byte;
1607 int lim, lim_byte;
1608 int char_base;
1610 int direction = ((n > 0) ? 1 : -1);
1611 register int dirlen;
1612 int infinity, limit, stride_for_teases = 0;
1613 register int *BM_tab;
1614 int *BM_tab_base;
1615 register unsigned char *cursor, *p_limit;
1616 register int i, j;
1617 unsigned char *pat, *pat_end;
1618 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1620 unsigned char simple_translate[0400];
1621 /* These are set to the preceding bytes of a byte to be translated
1622 if char_base is nonzero. As the maximum byte length of a
1623 multibyte character is 5, we have to check at most four previous
1624 bytes. */
1625 int translate_prev_byte1 = 0;
1626 int translate_prev_byte2 = 0;
1627 int translate_prev_byte3 = 0;
1628 int translate_prev_byte4 = 0;
1630 #ifdef C_ALLOCA
1631 int BM_tab_space[0400];
1632 BM_tab = &BM_tab_space[0];
1633 #else
1634 BM_tab = (int *) alloca (0400 * sizeof (int));
1635 #endif
1636 /* The general approach is that we are going to maintain that we know */
1637 /* the first (closest to the present position, in whatever direction */
1638 /* we're searching) character that could possibly be the last */
1639 /* (furthest from present position) character of a valid match. We */
1640 /* advance the state of our knowledge by looking at that character */
1641 /* and seeing whether it indeed matches the last character of the */
1642 /* pattern. If it does, we take a closer look. If it does not, we */
1643 /* move our pointer (to putative last characters) as far as is */
1644 /* logically possible. This amount of movement, which I call a */
1645 /* stride, will be the length of the pattern if the actual character */
1646 /* appears nowhere in the pattern, otherwise it will be the distance */
1647 /* from the last occurrence of that character to the end of the */
1648 /* pattern. */
1649 /* As a coding trick, an enormous stride is coded into the table for */
1650 /* characters that match the last character. This allows use of only */
1651 /* a single test, a test for having gone past the end of the */
1652 /* permissible match region, to test for both possible matches (when */
1653 /* the stride goes past the end immediately) and failure to */
1654 /* match (where you get nudged past the end one stride at a time). */
1656 /* Here we make a "mickey mouse" BM table. The stride of the search */
1657 /* is determined only by the last character of the putative match. */
1658 /* If that character does not match, we will stride the proper */
1659 /* distance to propose a match that superimposes it on the last */
1660 /* instance of a character that matches it (per trt), or misses */
1661 /* it entirely if there is none. */
1663 dirlen = len_byte * direction;
1664 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1666 /* Record position after the end of the pattern. */
1667 pat_end = base_pat + len_byte;
1668 /* BASE_PAT points to a character that we start scanning from.
1669 It is the first character in a forward search,
1670 the last character in a backward search. */
1671 if (direction < 0)
1672 base_pat = pat_end - 1;
1674 BM_tab_base = BM_tab;
1675 BM_tab += 0400;
1676 j = dirlen; /* to get it in a register */
1677 /* A character that does not appear in the pattern induces a */
1678 /* stride equal to the pattern length. */
1679 while (BM_tab_base != BM_tab)
1681 *--BM_tab = j;
1682 *--BM_tab = j;
1683 *--BM_tab = j;
1684 *--BM_tab = j;
1687 /* We use this for translation, instead of TRT itself.
1688 We fill this in to handle the characters that actually
1689 occur in the pattern. Others don't matter anyway! */
1690 bzero (simple_translate, sizeof simple_translate);
1691 for (i = 0; i < 0400; i++)
1692 simple_translate[i] = i;
1694 if (char_base)
1696 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1697 byte following them are the target of translation. */
1698 unsigned char str[MAX_MULTIBYTE_LENGTH];
1699 int len = CHAR_STRING (char_base, str);
1701 translate_prev_byte1 = str[len - 2];
1702 if (len > 2)
1704 translate_prev_byte2 = str[len - 3];
1705 if (len > 3)
1707 translate_prev_byte3 = str[len - 4];
1708 if (len > 4)
1709 translate_prev_byte4 = str[len - 5];
1714 i = 0;
1715 while (i != infinity)
1717 unsigned char *ptr = base_pat + i;
1718 i += direction;
1719 if (i == dirlen)
1720 i = infinity;
1721 if (! NILP (trt))
1723 /* If the byte currently looking at is the last of a
1724 character to check case-equivalents, set CH to that
1725 character. An ASCII character and a non-ASCII character
1726 matching with CHAR_BASE are to be checked. */
1727 int ch = -1;
1729 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1730 ch = *ptr;
1731 else if (char_base
1732 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1734 unsigned char *charstart = ptr - 1;
1736 while (! (CHAR_HEAD_P (*charstart)))
1737 charstart--;
1738 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1739 if (char_base != (ch & ~0x3F))
1740 ch = -1;
1743 if (ch >= 0400)
1744 j = (ch & 0x3F) | 0200;
1745 else
1746 j = *ptr;
1748 if (i == infinity)
1749 stride_for_teases = BM_tab[j];
1751 BM_tab[j] = dirlen - i;
1752 /* A translation table is accompanied by its inverse -- see */
1753 /* comment following downcase_table for details */
1754 if (ch >= 0)
1756 int starting_ch = ch;
1757 int starting_j = j;
1759 while (1)
1761 TRANSLATE (ch, inverse_trt, ch);
1762 if (ch >= 0400)
1763 j = (ch & 0x3F) | 0200;
1764 else
1765 j = ch;
1767 /* For all the characters that map into CH,
1768 set up simple_translate to map the last byte
1769 into STARTING_J. */
1770 simple_translate[j] = starting_j;
1771 if (ch == starting_ch)
1772 break;
1773 BM_tab[j] = dirlen - i;
1777 else
1779 j = *ptr;
1781 if (i == infinity)
1782 stride_for_teases = BM_tab[j];
1783 BM_tab[j] = dirlen - i;
1785 /* stride_for_teases tells how much to stride if we get a */
1786 /* match on the far character but are subsequently */
1787 /* disappointed, by recording what the stride would have been */
1788 /* for that character if the last character had been */
1789 /* different. */
1791 infinity = dirlen - infinity;
1792 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1793 /* loop invariant - POS_BYTE points at where last char (first
1794 char if reverse) of pattern would align in a possible match. */
1795 while (n != 0)
1797 int tail_end;
1798 unsigned char *tail_end_ptr;
1800 /* It's been reported that some (broken) compiler thinks that
1801 Boolean expressions in an arithmetic context are unsigned.
1802 Using an explicit ?1:0 prevents this. */
1803 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1804 < 0)
1805 return (n * (0 - direction));
1806 /* First we do the part we can by pointers (maybe nothing) */
1807 QUIT;
1808 pat = base_pat;
1809 limit = pos_byte - dirlen + direction;
1810 if (direction > 0)
1812 limit = BUFFER_CEILING_OF (limit);
1813 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1814 can take on without hitting edge of buffer or the gap. */
1815 limit = min (limit, pos_byte + 20000);
1816 limit = min (limit, lim_byte - 1);
1818 else
1820 limit = BUFFER_FLOOR_OF (limit);
1821 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1822 can take on without hitting edge of buffer or the gap. */
1823 limit = max (limit, pos_byte - 20000);
1824 limit = max (limit, lim_byte);
1826 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1827 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1829 if ((limit - pos_byte) * direction > 20)
1831 unsigned char *p2;
1833 p_limit = BYTE_POS_ADDR (limit);
1834 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1835 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1836 while (1) /* use one cursor setting as long as i can */
1838 if (direction > 0) /* worth duplicating */
1840 /* Use signed comparison if appropriate
1841 to make cursor+infinity sure to be > p_limit.
1842 Assuming that the buffer lies in a range of addresses
1843 that are all "positive" (as ints) or all "negative",
1844 either kind of comparison will work as long
1845 as we don't step by infinity. So pick the kind
1846 that works when we do step by infinity. */
1847 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1848 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1849 cursor += BM_tab[*cursor];
1850 else
1851 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1852 cursor += BM_tab[*cursor];
1854 else
1856 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1857 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1858 cursor += BM_tab[*cursor];
1859 else
1860 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1861 cursor += BM_tab[*cursor];
1863 /* If you are here, cursor is beyond the end of the searched region. */
1864 /* This can happen if you match on the far character of the pattern, */
1865 /* because the "stride" of that character is infinity, a number able */
1866 /* to throw you well beyond the end of the search. It can also */
1867 /* happen if you fail to match within the permitted region and would */
1868 /* otherwise try a character beyond that region */
1869 if ((cursor - p_limit) * direction <= len_byte)
1870 break; /* a small overrun is genuine */
1871 cursor -= infinity; /* large overrun = hit */
1872 i = dirlen - direction;
1873 if (! NILP (trt))
1875 while ((i -= direction) + direction != 0)
1877 int ch;
1878 cursor -= direction;
1879 /* Translate only the last byte of a character. */
1880 if (! multibyte
1881 || ((cursor == tail_end_ptr
1882 || CHAR_HEAD_P (cursor[1]))
1883 && (CHAR_HEAD_P (cursor[0])
1884 /* Check if this is the last byte of
1885 a translable character. */
1886 || (translate_prev_byte1 == cursor[-1]
1887 && (CHAR_HEAD_P (translate_prev_byte1)
1888 || (translate_prev_byte2 == cursor[-2]
1889 && (CHAR_HEAD_P (translate_prev_byte2)
1890 || (translate_prev_byte3 == cursor[-3]))))))))
1891 ch = simple_translate[*cursor];
1892 else
1893 ch = *cursor;
1894 if (pat[i] != ch)
1895 break;
1898 else
1900 while ((i -= direction) + direction != 0)
1902 cursor -= direction;
1903 if (pat[i] != *cursor)
1904 break;
1907 cursor += dirlen - i - direction; /* fix cursor */
1908 if (i + direction == 0)
1910 int position;
1912 cursor -= direction;
1914 position = pos_byte + cursor - p2 + ((direction > 0)
1915 ? 1 - len_byte : 0);
1916 set_search_regs (position, len_byte);
1918 if ((n -= direction) != 0)
1919 cursor += dirlen; /* to resume search */
1920 else
1921 return ((direction > 0)
1922 ? search_regs.end[0] : search_regs.start[0]);
1924 else
1925 cursor += stride_for_teases; /* <sigh> we lose - */
1927 pos_byte += cursor - p2;
1929 else
1930 /* Now we'll pick up a clump that has to be done the hard */
1931 /* way because it covers a discontinuity */
1933 limit = ((direction > 0)
1934 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1935 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1936 limit = ((direction > 0)
1937 ? min (limit + len_byte, lim_byte - 1)
1938 : max (limit - len_byte, lim_byte));
1939 /* LIMIT is now the last value POS_BYTE can have
1940 and still be valid for a possible match. */
1941 while (1)
1943 /* This loop can be coded for space rather than */
1944 /* speed because it will usually run only once. */
1945 /* (the reach is at most len + 21, and typically */
1946 /* does not exceed len) */
1947 while ((limit - pos_byte) * direction >= 0)
1948 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1949 /* now run the same tests to distinguish going off the */
1950 /* end, a match or a phony match. */
1951 if ((pos_byte - limit) * direction <= len_byte)
1952 break; /* ran off the end */
1953 /* Found what might be a match.
1954 Set POS_BYTE back to last (first if reverse) pos. */
1955 pos_byte -= infinity;
1956 i = dirlen - direction;
1957 while ((i -= direction) + direction != 0)
1959 int ch;
1960 unsigned char *ptr;
1961 pos_byte -= direction;
1962 ptr = BYTE_POS_ADDR (pos_byte);
1963 /* Translate only the last byte of a character. */
1964 if (! multibyte
1965 || ((ptr == tail_end_ptr
1966 || CHAR_HEAD_P (ptr[1]))
1967 && (CHAR_HEAD_P (ptr[0])
1968 /* Check if this is the last byte of a
1969 translable character. */
1970 || (translate_prev_byte1 == ptr[-1]
1971 && (CHAR_HEAD_P (translate_prev_byte1)
1972 || (translate_prev_byte2 == ptr[-2]
1973 && (CHAR_HEAD_P (translate_prev_byte2)
1974 || translate_prev_byte3 == ptr[-3])))))))
1975 ch = simple_translate[*ptr];
1976 else
1977 ch = *ptr;
1978 if (pat[i] != ch)
1979 break;
1981 /* Above loop has moved POS_BYTE part or all the way
1982 back to the first pos (last pos if reverse).
1983 Set it once again at the last (first if reverse) char. */
1984 pos_byte += dirlen - i- direction;
1985 if (i + direction == 0)
1987 int position;
1988 pos_byte -= direction;
1990 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1992 set_search_regs (position, len_byte);
1994 if ((n -= direction) != 0)
1995 pos_byte += dirlen; /* to resume search */
1996 else
1997 return ((direction > 0)
1998 ? search_regs.end[0] : search_regs.start[0]);
2000 else
2001 pos_byte += stride_for_teases;
2004 /* We have done one clump. Can we continue? */
2005 if ((lim_byte - pos_byte) * direction < 0)
2006 return ((0 - n) * direction);
2008 return BYTE_TO_CHAR (pos_byte);
2011 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2012 for the overall match just found in the current buffer.
2013 Also clear out the match data for registers 1 and up. */
2015 static void
2016 set_search_regs (beg_byte, nbytes)
2017 int beg_byte, nbytes;
2019 int i;
2021 /* Make sure we have registers in which to store
2022 the match position. */
2023 if (search_regs.num_regs == 0)
2025 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2026 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2027 search_regs.num_regs = 2;
2030 /* Clear out the other registers. */
2031 for (i = 1; i < search_regs.num_regs; i++)
2033 search_regs.start[i] = -1;
2034 search_regs.end[i] = -1;
2037 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2038 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2039 XSETBUFFER (last_thing_searched, current_buffer);
2042 /* Given a string of words separated by word delimiters,
2043 compute a regexp that matches those exact words
2044 separated by arbitrary punctuation. */
2046 static Lisp_Object
2047 wordify (string)
2048 Lisp_Object string;
2050 register unsigned char *p, *o;
2051 register int i, i_byte, len, punct_count = 0, word_count = 0;
2052 Lisp_Object val;
2053 int prev_c = 0;
2054 int adjust;
2056 CHECK_STRING (string);
2057 p = SDATA (string);
2058 len = SCHARS (string);
2060 for (i = 0, i_byte = 0; i < len; )
2062 int c;
2064 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2066 if (SYNTAX (c) != Sword)
2068 punct_count++;
2069 if (i > 0 && SYNTAX (prev_c) == Sword)
2070 word_count++;
2073 prev_c = c;
2076 if (SYNTAX (prev_c) == Sword)
2077 word_count++;
2078 if (!word_count)
2079 return empty_unibyte_string;
2081 adjust = - punct_count + 5 * (word_count - 1) + 4;
2082 if (STRING_MULTIBYTE (string))
2083 val = make_uninit_multibyte_string (len + adjust,
2084 SBYTES (string)
2085 + adjust);
2086 else
2087 val = make_uninit_string (len + adjust);
2089 o = SDATA (val);
2090 *o++ = '\\';
2091 *o++ = 'b';
2092 prev_c = 0;
2094 for (i = 0, i_byte = 0; i < len; )
2096 int c;
2097 int i_byte_orig = i_byte;
2099 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2101 if (SYNTAX (c) == Sword)
2103 bcopy (SDATA (string) + i_byte_orig, o,
2104 i_byte - i_byte_orig);
2105 o += i_byte - i_byte_orig;
2107 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2109 *o++ = '\\';
2110 *o++ = 'W';
2111 *o++ = '\\';
2112 *o++ = 'W';
2113 *o++ = '*';
2116 prev_c = c;
2119 *o++ = '\\';
2120 *o++ = 'b';
2122 return val;
2125 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2126 "MSearch backward: ",
2127 doc: /* Search backward from point for STRING.
2128 Set point to the beginning of the occurrence found, and return point.
2129 An optional second argument bounds the search; it is a buffer position.
2130 The match found must not extend before that position.
2131 Optional third argument, if t, means if fail just return nil (no error).
2132 If not nil and not t, position at limit of search and return nil.
2133 Optional fourth argument is repeat count--search for successive occurrences.
2135 Search case-sensitivity is determined by the value of the variable
2136 `case-fold-search', which see.
2138 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2139 (string, bound, noerror, count)
2140 Lisp_Object string, bound, noerror, count;
2142 return search_command (string, bound, noerror, count, -1, 0, 0);
2145 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2146 doc: /* Search forward from point for STRING.
2147 Set point to the end of the occurrence found, and return point.
2148 An optional second argument bounds the search; it is a buffer position.
2149 The match found must not extend after that position. A value of nil is
2150 equivalent to (point-max).
2151 Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, move to limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.
2155 Search case-sensitivity is determined by the value of the variable
2156 `case-fold-search', which see.
2158 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2159 (string, bound, noerror, count)
2160 Lisp_Object string, bound, noerror, count;
2162 return search_command (string, bound, noerror, count, 1, 0, 0);
2165 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2166 "sWord search backward: ",
2167 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2168 Set point to the beginning of the occurrence found, and return point.
2169 An optional second argument bounds the search; it is a buffer position.
2170 The match found must not extend before that position.
2171 Optional third argument, if t, means if fail just return nil (no error).
2172 If not nil and not t, move to limit of search and return nil.
2173 Optional fourth argument is repeat count--search for successive occurrences. */)
2174 (string, bound, noerror, count)
2175 Lisp_Object string, bound, noerror, count;
2177 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2180 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2181 "sWord search: ",
2182 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2183 Set point to the end of the occurrence found, and return point.
2184 An optional second argument bounds the search; it is a buffer position.
2185 The match found must not extend after that position.
2186 Optional third argument, if t, means if fail just return nil (no error).
2187 If not nil and not t, move to limit of search and return nil.
2188 Optional fourth argument is repeat count--search for successive occurrences. */)
2189 (string, bound, noerror, count)
2190 Lisp_Object string, bound, noerror, count;
2192 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2195 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2196 "sRE search backward: ",
2197 doc: /* Search backward from point for match for regular expression REGEXP.
2198 Set point to the beginning of the match, and return point.
2199 The match found is the one starting last in the buffer
2200 and yet ending before the origin of the search.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must start at or after that position.
2203 Optional third argument, if t, means if fail just return nil (no error).
2204 If not nil and not t, move to limit of search and return nil.
2205 Optional fourth argument is repeat count--search for successive occurrences.
2206 See also the functions `match-beginning', `match-end', `match-string',
2207 and `replace-match'. */)
2208 (regexp, bound, noerror, count)
2209 Lisp_Object regexp, bound, noerror, count;
2211 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2214 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2215 "sRE search: ",
2216 doc: /* Search forward from point for regular expression REGEXP.
2217 Set point to the end of the occurrence found, and return point.
2218 An optional second argument bounds the search; it is a buffer position.
2219 The match found must not extend after that position.
2220 Optional third argument, if t, means if fail just return nil (no error).
2221 If not nil and not t, move to limit of search and return nil.
2222 Optional fourth argument is repeat count--search for successive occurrences.
2223 See also the functions `match-beginning', `match-end', `match-string',
2224 and `replace-match'. */)
2225 (regexp, bound, noerror, count)
2226 Lisp_Object regexp, bound, noerror, count;
2228 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2231 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2232 "sPosix search backward: ",
2233 doc: /* Search backward from point for match for regular expression REGEXP.
2234 Find the longest match in accord with Posix regular expression rules.
2235 Set point to the beginning of the match, and return point.
2236 The match found is the one starting last in the buffer
2237 and yet ending before the origin of the search.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must start at or after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241 If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences.
2243 See also the functions `match-beginning', `match-end', `match-string',
2244 and `replace-match'. */)
2245 (regexp, bound, noerror, count)
2246 Lisp_Object regexp, bound, noerror, count;
2248 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2251 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2252 "sPosix search: ",
2253 doc: /* Search forward from point for regular expression REGEXP.
2254 Find the longest match in accord with Posix regular expression rules.
2255 Set point to the end of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend after that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259 If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences.
2261 See also the functions `match-beginning', `match-end', `match-string',
2262 and `replace-match'. */)
2263 (regexp, bound, noerror, count)
2264 Lisp_Object regexp, bound, noerror, count;
2266 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2269 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2270 doc: /* Replace text matched by last search with NEWTEXT.
2271 Leave point at the end of the replacement text.
2273 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2274 Otherwise maybe capitalize the whole text, or maybe just word initials,
2275 based on the replaced text.
2276 If the replaced text has only capital letters
2277 and has at least one multiletter word, convert NEWTEXT to all caps.
2278 Otherwise if all words are capitalized in the replaced text,
2279 capitalize each word in NEWTEXT.
2281 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2282 Otherwise treat `\\' as special:
2283 `\\&' in NEWTEXT means substitute original matched text.
2284 `\\N' means substitute what matched the Nth `\\(...\\)'.
2285 If Nth parens didn't match, substitute nothing.
2286 `\\\\' means insert one `\\'.
2287 Case conversion does not apply to these substitutions.
2289 FIXEDCASE and LITERAL are optional arguments.
2291 The optional fourth argument STRING can be a string to modify.
2292 This is meaningful when the previous match was done against STRING,
2293 using `string-match'. When used this way, `replace-match'
2294 creates and returns a new string made by copying STRING and replacing
2295 the part of STRING that was matched.
2297 The optional fifth argument SUBEXP specifies a subexpression;
2298 it says to replace just that subexpression with NEWTEXT,
2299 rather than replacing the entire matched text.
2300 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2301 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2302 NEWTEXT in place of subexp N.
2303 This is useful only after a regular expression search or match,
2304 since only regular expressions have distinguished subexpressions. */)
2305 (newtext, fixedcase, literal, string, subexp)
2306 Lisp_Object newtext, fixedcase, literal, string, subexp;
2308 enum { nochange, all_caps, cap_initial } case_action;
2309 register int pos, pos_byte;
2310 int some_multiletter_word;
2311 int some_lowercase;
2312 int some_uppercase;
2313 int some_nonuppercase_initial;
2314 register int c, prevc;
2315 int sub;
2316 int opoint, newpoint;
2318 CHECK_STRING (newtext);
2320 if (! NILP (string))
2321 CHECK_STRING (string);
2323 case_action = nochange; /* We tried an initialization */
2324 /* but some C compilers blew it */
2326 if (search_regs.num_regs <= 0)
2327 error ("`replace-match' called before any match found");
2329 if (NILP (subexp))
2330 sub = 0;
2331 else
2333 CHECK_NUMBER (subexp);
2334 sub = XINT (subexp);
2335 if (sub < 0 || sub >= search_regs.num_regs)
2336 args_out_of_range (subexp, make_number (search_regs.num_regs));
2339 if (NILP (string))
2341 if (search_regs.start[sub] < BEGV
2342 || search_regs.start[sub] > search_regs.end[sub]
2343 || search_regs.end[sub] > ZV)
2344 args_out_of_range (make_number (search_regs.start[sub]),
2345 make_number (search_regs.end[sub]));
2347 else
2349 if (search_regs.start[sub] < 0
2350 || search_regs.start[sub] > search_regs.end[sub]
2351 || search_regs.end[sub] > SCHARS (string))
2352 args_out_of_range (make_number (search_regs.start[sub]),
2353 make_number (search_regs.end[sub]));
2356 if (NILP (fixedcase))
2358 /* Decide how to casify by examining the matched text. */
2359 int last;
2361 pos = search_regs.start[sub];
2362 last = search_regs.end[sub];
2364 if (NILP (string))
2365 pos_byte = CHAR_TO_BYTE (pos);
2366 else
2367 pos_byte = string_char_to_byte (string, pos);
2369 prevc = '\n';
2370 case_action = all_caps;
2372 /* some_multiletter_word is set nonzero if any original word
2373 is more than one letter long. */
2374 some_multiletter_word = 0;
2375 some_lowercase = 0;
2376 some_nonuppercase_initial = 0;
2377 some_uppercase = 0;
2379 while (pos < last)
2381 if (NILP (string))
2383 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2384 INC_BOTH (pos, pos_byte);
2386 else
2387 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2389 if (LOWERCASEP (c))
2391 /* Cannot be all caps if any original char is lower case */
2393 some_lowercase = 1;
2394 if (SYNTAX (prevc) != Sword)
2395 some_nonuppercase_initial = 1;
2396 else
2397 some_multiletter_word = 1;
2399 else if (UPPERCASEP (c))
2401 some_uppercase = 1;
2402 if (SYNTAX (prevc) != Sword)
2404 else
2405 some_multiletter_word = 1;
2407 else
2409 /* If the initial is a caseless word constituent,
2410 treat that like a lowercase initial. */
2411 if (SYNTAX (prevc) != Sword)
2412 some_nonuppercase_initial = 1;
2415 prevc = c;
2418 /* Convert to all caps if the old text is all caps
2419 and has at least one multiletter word. */
2420 if (! some_lowercase && some_multiletter_word)
2421 case_action = all_caps;
2422 /* Capitalize each word, if the old text has all capitalized words. */
2423 else if (!some_nonuppercase_initial && some_multiletter_word)
2424 case_action = cap_initial;
2425 else if (!some_nonuppercase_initial && some_uppercase)
2426 /* Should x -> yz, operating on X, give Yz or YZ?
2427 We'll assume the latter. */
2428 case_action = all_caps;
2429 else
2430 case_action = nochange;
2433 /* Do replacement in a string. */
2434 if (!NILP (string))
2436 Lisp_Object before, after;
2438 before = Fsubstring (string, make_number (0),
2439 make_number (search_regs.start[sub]));
2440 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2442 /* Substitute parts of the match into NEWTEXT
2443 if desired. */
2444 if (NILP (literal))
2446 int lastpos = 0;
2447 int lastpos_byte = 0;
2448 /* We build up the substituted string in ACCUM. */
2449 Lisp_Object accum;
2450 Lisp_Object middle;
2451 int length = SBYTES (newtext);
2453 accum = Qnil;
2455 for (pos_byte = 0, pos = 0; pos_byte < length;)
2457 int substart = -1;
2458 int subend = 0;
2459 int delbackslash = 0;
2461 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2463 if (c == '\\')
2465 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2467 if (c == '&')
2469 substart = search_regs.start[sub];
2470 subend = search_regs.end[sub];
2472 else if (c >= '1' && c <= '9')
2474 if (search_regs.start[c - '0'] >= 0
2475 && c <= search_regs.num_regs + '0')
2477 substart = search_regs.start[c - '0'];
2478 subend = search_regs.end[c - '0'];
2480 else
2482 /* If that subexp did not match,
2483 replace \\N with nothing. */
2484 substart = 0;
2485 subend = 0;
2488 else if (c == '\\')
2489 delbackslash = 1;
2490 else
2491 error ("Invalid use of `\\' in replacement text");
2493 if (substart >= 0)
2495 if (pos - 2 != lastpos)
2496 middle = substring_both (newtext, lastpos,
2497 lastpos_byte,
2498 pos - 2, pos_byte - 2);
2499 else
2500 middle = Qnil;
2501 accum = concat3 (accum, middle,
2502 Fsubstring (string,
2503 make_number (substart),
2504 make_number (subend)));
2505 lastpos = pos;
2506 lastpos_byte = pos_byte;
2508 else if (delbackslash)
2510 middle = substring_both (newtext, lastpos,
2511 lastpos_byte,
2512 pos - 1, pos_byte - 1);
2514 accum = concat2 (accum, middle);
2515 lastpos = pos;
2516 lastpos_byte = pos_byte;
2520 if (pos != lastpos)
2521 middle = substring_both (newtext, lastpos,
2522 lastpos_byte,
2523 pos, pos_byte);
2524 else
2525 middle = Qnil;
2527 newtext = concat2 (accum, middle);
2530 /* Do case substitution in NEWTEXT if desired. */
2531 if (case_action == all_caps)
2532 newtext = Fupcase (newtext);
2533 else if (case_action == cap_initial)
2534 newtext = Fupcase_initials (newtext);
2536 return concat3 (before, newtext, after);
2539 /* Record point, then move (quietly) to the start of the match. */
2540 if (PT >= search_regs.end[sub])
2541 opoint = PT - ZV;
2542 else if (PT > search_regs.start[sub])
2543 opoint = search_regs.end[sub] - ZV;
2544 else
2545 opoint = PT;
2547 /* If we want non-literal replacement,
2548 perform substitution on the replacement string. */
2549 if (NILP (literal))
2551 int length = SBYTES (newtext);
2552 unsigned char *substed;
2553 int substed_alloc_size, substed_len;
2554 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2555 int str_multibyte = STRING_MULTIBYTE (newtext);
2556 Lisp_Object rev_tbl;
2557 int really_changed = 0;
2559 rev_tbl = Qnil;
2561 substed_alloc_size = length * 2 + 100;
2562 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2563 substed_len = 0;
2565 /* Go thru NEWTEXT, producing the actual text to insert in
2566 SUBSTED while adjusting multibyteness to that of the current
2567 buffer. */
2569 for (pos_byte = 0, pos = 0; pos_byte < length;)
2571 unsigned char str[MAX_MULTIBYTE_LENGTH];
2572 unsigned char *add_stuff = NULL;
2573 int add_len = 0;
2574 int idx = -1;
2576 if (str_multibyte)
2578 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2579 if (!buf_multibyte)
2580 c = multibyte_char_to_unibyte (c, rev_tbl);
2582 else
2584 /* Note that we don't have to increment POS. */
2585 c = SREF (newtext, pos_byte++);
2586 if (buf_multibyte)
2587 c = unibyte_char_to_multibyte (c);
2590 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2591 or set IDX to a match index, which means put that part
2592 of the buffer text into SUBSTED. */
2594 if (c == '\\')
2596 really_changed = 1;
2598 if (str_multibyte)
2600 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2601 pos, pos_byte);
2602 if (!buf_multibyte && !ASCII_CHAR_P (c))
2603 c = multibyte_char_to_unibyte (c, rev_tbl);
2605 else
2607 c = SREF (newtext, pos_byte++);
2608 if (buf_multibyte)
2609 c = unibyte_char_to_multibyte (c);
2612 if (c == '&')
2613 idx = sub;
2614 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2616 if (search_regs.start[c - '0'] >= 1)
2617 idx = c - '0';
2619 else if (c == '\\')
2620 add_len = 1, add_stuff = "\\";
2621 else
2623 xfree (substed);
2624 error ("Invalid use of `\\' in replacement text");
2627 else
2629 add_len = CHAR_STRING (c, str);
2630 add_stuff = str;
2633 /* If we want to copy part of a previous match,
2634 set up ADD_STUFF and ADD_LEN to point to it. */
2635 if (idx >= 0)
2637 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2638 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2639 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2640 move_gap (search_regs.start[idx]);
2641 add_stuff = BYTE_POS_ADDR (begbyte);
2644 /* Now the stuff we want to add to SUBSTED
2645 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2647 /* Make sure SUBSTED is big enough. */
2648 if (substed_len + add_len >= substed_alloc_size)
2650 substed_alloc_size = substed_len + add_len + 500;
2651 substed = (unsigned char *) xrealloc (substed,
2652 substed_alloc_size + 1);
2655 /* Now add to the end of SUBSTED. */
2656 if (add_stuff)
2658 bcopy (add_stuff, substed + substed_len, add_len);
2659 substed_len += add_len;
2663 if (really_changed)
2665 if (buf_multibyte)
2667 int nchars = multibyte_chars_in_text (substed, substed_len);
2669 newtext = make_multibyte_string (substed, nchars, substed_len);
2671 else
2672 newtext = make_unibyte_string (substed, substed_len);
2674 xfree (substed);
2677 /* Replace the old text with the new in the cleanest possible way. */
2678 replace_range (search_regs.start[sub], search_regs.end[sub],
2679 newtext, 1, 0, 1);
2680 newpoint = search_regs.start[sub] + SCHARS (newtext);
2682 if (case_action == all_caps)
2683 Fupcase_region (make_number (search_regs.start[sub]),
2684 make_number (newpoint));
2685 else if (case_action == cap_initial)
2686 Fupcase_initials_region (make_number (search_regs.start[sub]),
2687 make_number (newpoint));
2689 /* Adjust search data for this change. */
2691 int oldend = search_regs.end[sub];
2692 int oldstart = search_regs.start[sub];
2693 int change = newpoint - search_regs.end[sub];
2694 int i;
2696 for (i = 0; i < search_regs.num_regs; i++)
2698 if (search_regs.start[i] >= oldend)
2699 search_regs.start[i] += change;
2700 else if (search_regs.start[i] > oldstart)
2701 search_regs.start[i] = oldstart;
2702 if (search_regs.end[i] >= oldend)
2703 search_regs.end[i] += change;
2704 else if (search_regs.end[i] > oldstart)
2705 search_regs.end[i] = oldstart;
2709 /* Put point back where it was in the text. */
2710 if (opoint <= 0)
2711 TEMP_SET_PT (opoint + ZV);
2712 else
2713 TEMP_SET_PT (opoint);
2715 /* Now move point "officially" to the start of the inserted replacement. */
2716 move_if_not_intangible (newpoint);
2718 return Qnil;
2721 static Lisp_Object
2722 match_limit (num, beginningp)
2723 Lisp_Object num;
2724 int beginningp;
2726 register int n;
2728 CHECK_NUMBER (num);
2729 n = XINT (num);
2730 if (n < 0)
2731 args_out_of_range (num, make_number (0));
2732 if (search_regs.num_regs <= 0)
2733 error ("No match data, because no search succeeded");
2734 if (n >= search_regs.num_regs
2735 || search_regs.start[n] < 0)
2736 return Qnil;
2737 return (make_number ((beginningp) ? search_regs.start[n]
2738 : search_regs.end[n]));
2741 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2742 doc: /* Return position of start of text matched by last search.
2743 SUBEXP, a number, specifies which parenthesized expression in the last
2744 regexp.
2745 Value is nil if SUBEXPth pair didn't match, or there were less than
2746 SUBEXP pairs.
2747 Zero means the entire text matched by the whole regexp or whole string. */)
2748 (subexp)
2749 Lisp_Object subexp;
2751 return match_limit (subexp, 1);
2754 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2755 doc: /* Return position of end of text matched by last search.
2756 SUBEXP, a number, specifies which parenthesized expression in the last
2757 regexp.
2758 Value is nil if SUBEXPth pair didn't match, or there were less than
2759 SUBEXP pairs.
2760 Zero means the entire text matched by the whole regexp or whole string. */)
2761 (subexp)
2762 Lisp_Object subexp;
2764 return match_limit (subexp, 0);
2767 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2768 doc: /* Return a list containing all info on what the last search matched.
2769 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2770 All the elements are markers or nil (nil if the Nth pair didn't match)
2771 if the last match was on a buffer; integers or nil if a string was matched.
2772 Use `store-match-data' to reinstate the data in this list.
2774 If INTEGERS (the optional first argument) is non-nil, always use
2775 integers \(rather than markers) to represent buffer positions. In
2776 this case, and if the last match was in a buffer, the buffer will get
2777 stored as one additional element at the end of the list.
2779 If REUSE is a list, reuse it as part of the value. If REUSE is long
2780 enough to hold all the values, and if INTEGERS is non-nil, no consing
2781 is done.
2783 If optional third arg RESEAT is non-nil, any previous markers on the
2784 REUSE list will be modified to point to nowhere.
2786 Return value is undefined if the last search failed. */)
2787 (integers, reuse, reseat)
2788 Lisp_Object integers, reuse, reseat;
2790 Lisp_Object tail, prev;
2791 Lisp_Object *data;
2792 int i, len;
2794 if (!NILP (reseat))
2795 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2796 if (MARKERP (XCAR (tail)))
2798 unchain_marker (XMARKER (XCAR (tail)));
2799 XSETCAR (tail, Qnil);
2802 if (NILP (last_thing_searched))
2803 return Qnil;
2805 prev = Qnil;
2807 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2808 * sizeof (Lisp_Object));
2810 len = 0;
2811 for (i = 0; i < search_regs.num_regs; i++)
2813 int start = search_regs.start[i];
2814 if (start >= 0)
2816 if (EQ (last_thing_searched, Qt)
2817 || ! NILP (integers))
2819 XSETFASTINT (data[2 * i], start);
2820 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2822 else if (BUFFERP (last_thing_searched))
2824 data[2 * i] = Fmake_marker ();
2825 Fset_marker (data[2 * i],
2826 make_number (start),
2827 last_thing_searched);
2828 data[2 * i + 1] = Fmake_marker ();
2829 Fset_marker (data[2 * i + 1],
2830 make_number (search_regs.end[i]),
2831 last_thing_searched);
2833 else
2834 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2835 abort ();
2837 len = 2 * i + 2;
2839 else
2840 data[2 * i] = data[2 * i + 1] = Qnil;
2843 if (BUFFERP (last_thing_searched) && !NILP (integers))
2845 data[len] = last_thing_searched;
2846 len++;
2849 /* If REUSE is not usable, cons up the values and return them. */
2850 if (! CONSP (reuse))
2851 return Flist (len, data);
2853 /* If REUSE is a list, store as many value elements as will fit
2854 into the elements of REUSE. */
2855 for (i = 0, tail = reuse; CONSP (tail);
2856 i++, tail = XCDR (tail))
2858 if (i < len)
2859 XSETCAR (tail, data[i]);
2860 else
2861 XSETCAR (tail, Qnil);
2862 prev = tail;
2865 /* If we couldn't fit all value elements into REUSE,
2866 cons up the rest of them and add them to the end of REUSE. */
2867 if (i < len)
2868 XSETCDR (prev, Flist (len - i, data + i));
2870 return reuse;
2873 /* Internal usage only:
2874 If RESEAT is `evaporate', put the markers back on the free list
2875 immediately. No other references to the markers must exist in this case,
2876 so it is used only internally on the unwind stack and save-match-data from
2877 Lisp. */
2879 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2880 doc: /* Set internal data on last search match from elements of LIST.
2881 LIST should have been created by calling `match-data' previously.
2883 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2884 (list, reseat)
2885 register Lisp_Object list, reseat;
2887 register int i;
2888 register Lisp_Object marker;
2890 if (running_asynch_code)
2891 save_search_regs ();
2893 CHECK_LIST (list);
2895 /* Unless we find a marker with a buffer or an explicit buffer
2896 in LIST, assume that this match data came from a string. */
2897 last_thing_searched = Qt;
2899 /* Allocate registers if they don't already exist. */
2901 int length = XFASTINT (Flength (list)) / 2;
2903 if (length > search_regs.num_regs)
2905 if (search_regs.num_regs == 0)
2907 search_regs.start
2908 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2909 search_regs.end
2910 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2912 else
2914 search_regs.start
2915 = (regoff_t *) xrealloc (search_regs.start,
2916 length * sizeof (regoff_t));
2917 search_regs.end
2918 = (regoff_t *) xrealloc (search_regs.end,
2919 length * sizeof (regoff_t));
2922 for (i = search_regs.num_regs; i < length; i++)
2923 search_regs.start[i] = -1;
2925 search_regs.num_regs = length;
2928 for (i = 0; CONSP (list); i++)
2930 marker = XCAR (list);
2931 if (BUFFERP (marker))
2933 last_thing_searched = marker;
2934 break;
2936 if (i >= length)
2937 break;
2938 if (NILP (marker))
2940 search_regs.start[i] = -1;
2941 list = XCDR (list);
2943 else
2945 int from;
2946 Lisp_Object m;
2948 m = marker;
2949 if (MARKERP (marker))
2951 if (XMARKER (marker)->buffer == 0)
2952 XSETFASTINT (marker, 0);
2953 else
2954 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2957 CHECK_NUMBER_COERCE_MARKER (marker);
2958 from = XINT (marker);
2960 if (!NILP (reseat) && MARKERP (m))
2962 if (EQ (reseat, Qevaporate))
2963 free_marker (m);
2964 else
2965 unchain_marker (XMARKER (m));
2966 XSETCAR (list, Qnil);
2969 if ((list = XCDR (list), !CONSP (list)))
2970 break;
2972 m = marker = XCAR (list);
2974 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2975 XSETFASTINT (marker, 0);
2977 CHECK_NUMBER_COERCE_MARKER (marker);
2978 search_regs.start[i] = from;
2979 search_regs.end[i] = XINT (marker);
2981 if (!NILP (reseat) && MARKERP (m))
2983 if (EQ (reseat, Qevaporate))
2984 free_marker (m);
2985 else
2986 unchain_marker (XMARKER (m));
2987 XSETCAR (list, Qnil);
2990 list = XCDR (list);
2993 for (; i < search_regs.num_regs; i++)
2994 search_regs.start[i] = -1;
2997 return Qnil;
3000 /* If non-zero the match data have been saved in saved_search_regs
3001 during the execution of a sentinel or filter. */
3002 static int search_regs_saved;
3003 static struct re_registers saved_search_regs;
3004 static Lisp_Object saved_last_thing_searched;
3006 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3007 if asynchronous code (filter or sentinel) is running. */
3008 static void
3009 save_search_regs ()
3011 if (!search_regs_saved)
3013 saved_search_regs.num_regs = search_regs.num_regs;
3014 saved_search_regs.start = search_regs.start;
3015 saved_search_regs.end = search_regs.end;
3016 saved_last_thing_searched = last_thing_searched;
3017 last_thing_searched = Qnil;
3018 search_regs.num_regs = 0;
3019 search_regs.start = 0;
3020 search_regs.end = 0;
3022 search_regs_saved = 1;
3026 /* Called upon exit from filters and sentinels. */
3027 void
3028 restore_search_regs ()
3030 if (search_regs_saved)
3032 if (search_regs.num_regs > 0)
3034 xfree (search_regs.start);
3035 xfree (search_regs.end);
3037 search_regs.num_regs = saved_search_regs.num_regs;
3038 search_regs.start = saved_search_regs.start;
3039 search_regs.end = saved_search_regs.end;
3040 last_thing_searched = saved_last_thing_searched;
3041 saved_last_thing_searched = Qnil;
3042 search_regs_saved = 0;
3046 static Lisp_Object
3047 unwind_set_match_data (list)
3048 Lisp_Object list;
3050 /* It is safe to free (evaporate) the markers immediately. */
3051 return Fset_match_data (list, Qevaporate);
3054 /* Called to unwind protect the match data. */
3055 void
3056 record_unwind_save_match_data ()
3058 record_unwind_protect (unwind_set_match_data,
3059 Fmatch_data (Qnil, Qnil, Qnil));
3062 /* Quote a string to inactivate reg-expr chars */
3064 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3065 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3066 (string)
3067 Lisp_Object string;
3069 register unsigned char *in, *out, *end;
3070 register unsigned char *temp;
3071 int backslashes_added = 0;
3073 CHECK_STRING (string);
3075 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3077 /* Now copy the data into the new string, inserting escapes. */
3079 in = SDATA (string);
3080 end = in + SBYTES (string);
3081 out = temp;
3083 for (; in != end; in++)
3085 if (*in == '['
3086 || *in == '*' || *in == '.' || *in == '\\'
3087 || *in == '?' || *in == '+'
3088 || *in == '^' || *in == '$')
3089 *out++ = '\\', backslashes_added++;
3090 *out++ = *in;
3093 return make_specified_string (temp,
3094 SCHARS (string) + backslashes_added,
3095 out - temp,
3096 STRING_MULTIBYTE (string));
3099 void
3100 syms_of_search ()
3102 register int i;
3104 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3106 searchbufs[i].buf.allocated = 100;
3107 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3108 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3109 searchbufs[i].regexp = Qnil;
3110 searchbufs[i].whitespace_regexp = Qnil;
3111 searchbufs[i].syntax_table = Qnil;
3112 staticpro (&searchbufs[i].regexp);
3113 staticpro (&searchbufs[i].whitespace_regexp);
3114 staticpro (&searchbufs[i].syntax_table);
3115 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3117 searchbuf_head = &searchbufs[0];
3119 Qsearch_failed = intern ("search-failed");
3120 staticpro (&Qsearch_failed);
3121 Qinvalid_regexp = intern ("invalid-regexp");
3122 staticpro (&Qinvalid_regexp);
3124 Fput (Qsearch_failed, Qerror_conditions,
3125 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3126 Fput (Qsearch_failed, Qerror_message,
3127 build_string ("Search failed"));
3129 Fput (Qinvalid_regexp, Qerror_conditions,
3130 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3131 Fput (Qinvalid_regexp, Qerror_message,
3132 build_string ("Invalid regexp"));
3134 last_thing_searched = Qnil;
3135 staticpro (&last_thing_searched);
3137 saved_last_thing_searched = Qnil;
3138 staticpro (&saved_last_thing_searched);
3140 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3141 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3142 Some commands use this for user-specified regexps.
3143 Spaces that occur inside character classes or repetition operators
3144 or other such regexp constructs are not replaced with this.
3145 A value of nil (which is the normal value) means treat spaces literally. */);
3146 Vsearch_spaces_regexp = Qnil;
3148 defsubr (&Slooking_at);
3149 defsubr (&Sposix_looking_at);
3150 defsubr (&Sstring_match);
3151 defsubr (&Sposix_string_match);
3152 defsubr (&Ssearch_forward);
3153 defsubr (&Ssearch_backward);
3154 defsubr (&Sword_search_forward);
3155 defsubr (&Sword_search_backward);
3156 defsubr (&Sre_search_forward);
3157 defsubr (&Sre_search_backward);
3158 defsubr (&Sposix_search_forward);
3159 defsubr (&Sposix_search_backward);
3160 defsubr (&Sreplace_match);
3161 defsubr (&Smatch_beginning);
3162 defsubr (&Smatch_end);
3163 defsubr (&Smatch_data);
3164 defsubr (&Sset_match_data);
3165 defsubr (&Sregexp_quote);
3168 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3169 (do not change this comment) */