1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
3 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
29 #include "region-cache.h"
31 #include "blockinput.h"
32 #include "intervals.h"
34 #include <sys/types.h>
37 #define REGEXP_CACHE_SIZE 20
39 /* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
43 struct regexp_cache
*next
;
44 Lisp_Object regexp
, whitespace_regexp
;
45 struct re_pattern_buffer buf
;
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
51 /* The instances of that struct. */
52 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
54 /* The head of the linked list; points to the most recently used buffer. */
55 struct regexp_cache
*searchbuf_head
;
58 /* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
65 been allocated by checking search_regs.num_regs.
67 The regex code keeps track of whether it has allocated the search
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
75 static struct re_registers search_regs
;
77 /* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80 static Lisp_Object last_thing_searched
;
82 /* error condition signaled when regexp compile_pattern fails */
84 Lisp_Object Qinvalid_regexp
;
86 /* Error condition used for failing searches */
87 Lisp_Object Qsearch_failed
;
89 Lisp_Object Vsearch_spaces_regexp
;
91 static void set_search_regs ();
92 static void save_search_regs ();
93 static int simple_search ();
94 static int boyer_moore ();
95 static int search_buffer ();
96 static void matcher_overflow () NO_RETURN
;
101 error ("Stack overflow in regexp matcher");
104 /* Compile a regexp and signal a Lisp error if anything goes wrong.
105 PATTERN is the pattern to compile.
106 CP is the place to put the result.
107 TRANSLATE is a translation table for ignoring case, or nil for none.
108 REGP is the structure that says where to store the "register"
109 values that will result from matching this pattern.
110 If it is 0, we should compile the pattern not to record any
111 subexpression bounds.
112 POSIX is nonzero if we want full backtracking (POSIX style)
113 for this pattern. 0 means backtrack only enough to get a valid match.
114 MULTIBYTE is nonzero if we want to handle multibyte characters in
115 PATTERN. 0 means all multibyte characters are recognized just as
116 sequences of binary data.
118 The behavior also depends on Vsearch_spaces_regexp. */
121 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
122 struct regexp_cache
*cp
;
124 Lisp_Object translate
;
125 struct re_registers
*regp
;
129 unsigned char *raw_pattern
;
130 int raw_pattern_size
;
134 /* MULTIBYTE says whether the text to be searched is multibyte.
135 We must convert PATTERN to match that, or we will not really
136 find things right. */
138 if (multibyte
== STRING_MULTIBYTE (pattern
))
140 raw_pattern
= (unsigned char *) SDATA (pattern
);
141 raw_pattern_size
= SBYTES (pattern
);
145 raw_pattern_size
= count_size_as_multibyte (SDATA (pattern
),
147 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
148 copy_text (SDATA (pattern
), raw_pattern
,
149 SCHARS (pattern
), 0, 1);
153 /* Converting multibyte to single-byte.
155 ??? Perhaps this conversion should be done in a special way
156 by subtracting nonascii-insert-offset from each non-ASCII char,
157 so that only the multibyte chars which really correspond to
158 the chosen single-byte character set can possibly match. */
159 raw_pattern_size
= SCHARS (pattern
);
160 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
161 copy_text (SDATA (pattern
), raw_pattern
,
162 SBYTES (pattern
), 1, 0);
166 cp
->buf
.translate
= (! NILP (translate
) ? translate
: make_number (0));
168 cp
->buf
.multibyte
= multibyte
;
169 cp
->whitespace_regexp
= Vsearch_spaces_regexp
;
171 old
= re_set_syntax (RE_SYNTAX_EMACS
172 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
174 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp
) ? NULL
175 : SDATA (Vsearch_spaces_regexp
));
177 val
= (char *) re_compile_pattern ((char *)raw_pattern
,
178 raw_pattern_size
, &cp
->buf
);
180 re_set_whitespace_regexp (NULL
);
185 xsignal1 (Qinvalid_regexp
, build_string (val
));
187 cp
->regexp
= Fcopy_sequence (pattern
);
190 /* Shrink each compiled regexp buffer in the cache
191 to the size actually used right now.
192 This is called from garbage collection. */
195 shrink_regexp_cache ()
197 struct regexp_cache
*cp
;
199 for (cp
= searchbuf_head
; cp
!= 0; cp
= cp
->next
)
201 cp
->buf
.allocated
= cp
->buf
.used
;
203 = (unsigned char *) xrealloc (cp
->buf
.buffer
, cp
->buf
.used
);
207 /* Compile a regexp if necessary, but first check to see if there's one in
209 PATTERN is the pattern to compile.
210 TRANSLATE is a translation table for ignoring case, or nil for none.
211 REGP is the structure that says where to store the "register"
212 values that will result from matching this pattern.
213 If it is 0, we should compile the pattern not to record any
214 subexpression bounds.
215 POSIX is nonzero if we want full backtracking (POSIX style)
216 for this pattern. 0 means backtrack only enough to get a valid match. */
218 struct re_pattern_buffer
*
219 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
221 struct re_registers
*regp
;
222 Lisp_Object translate
;
223 int posix
, multibyte
;
225 struct regexp_cache
*cp
, **cpp
;
227 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
230 /* Entries are initialized to nil, and may be set to nil by
231 compile_pattern_1 if the pattern isn't valid. Don't apply
232 string accessors in those cases. However, compile_pattern_1
233 is only applied to the cache entry we pick here to reuse. So
234 nil should never appear before a non-nil entry. */
235 if (NILP (cp
->regexp
))
237 if (SCHARS (cp
->regexp
) == SCHARS (pattern
)
238 && STRING_MULTIBYTE (cp
->regexp
) == STRING_MULTIBYTE (pattern
)
239 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
240 && EQ (cp
->buf
.translate
, (! NILP (translate
) ? translate
: make_number (0)))
241 && cp
->posix
== posix
242 && cp
->buf
.multibyte
== multibyte
243 && !NILP (Fequal (cp
->whitespace_regexp
, Vsearch_spaces_regexp
)))
246 /* If we're at the end of the cache, compile into the nil cell
247 we found, or the last (least recently used) cell with a
252 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
257 /* When we get here, cp (aka *cpp) contains the compiled pattern,
258 either because we found it in the cache or because we just compiled it.
259 Move it to the front of the queue to mark it as most recently used. */
261 cp
->next
= searchbuf_head
;
264 /* Advise the searching functions about the space we have allocated
265 for register data. */
267 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
274 looking_at_1 (string
, posix
)
279 unsigned char *p1
, *p2
;
282 struct re_pattern_buffer
*bufp
;
284 if (running_asynch_code
)
287 CHECK_STRING (string
);
288 bufp
= compile_pattern (string
, &search_regs
,
289 (!NILP (current_buffer
->case_fold_search
)
290 ? current_buffer
->case_canon_table
: Qnil
),
292 !NILP (current_buffer
->enable_multibyte_characters
));
295 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
297 /* Get pointers and sizes of the two strings
298 that make up the visible portion of the buffer. */
301 s1
= GPT_BYTE
- BEGV_BYTE
;
303 s2
= ZV_BYTE
- GPT_BYTE
;
307 s2
= ZV_BYTE
- BEGV_BYTE
;
312 s1
= ZV_BYTE
- BEGV_BYTE
;
316 re_match_object
= Qnil
;
318 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
319 PT_BYTE
- BEGV_BYTE
, &search_regs
,
320 ZV_BYTE
- BEGV_BYTE
);
326 val
= (0 <= i
? Qt
: Qnil
);
328 for (i
= 0; i
< search_regs
.num_regs
; i
++)
329 if (search_regs
.start
[i
] >= 0)
332 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
334 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
336 XSETBUFFER (last_thing_searched
, current_buffer
);
340 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
341 doc
: /* Return t if text after point matches regular expression REGEXP.
342 This function modifies the match data that `match-beginning',
343 `match-end' and `match-data' access; save and restore the match
344 data if you want to preserve them. */)
348 return looking_at_1 (regexp
, 0);
351 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
352 doc
: /* Return t if text after point matches regular expression REGEXP.
353 Find the longest match, in accord with Posix regular expression rules.
354 This function modifies the match data that `match-beginning',
355 `match-end' and `match-data' access; save and restore the match
356 data if you want to preserve them. */)
360 return looking_at_1 (regexp
, 1);
364 string_match_1 (regexp
, string
, start
, posix
)
365 Lisp_Object regexp
, string
, start
;
369 struct re_pattern_buffer
*bufp
;
373 if (running_asynch_code
)
376 CHECK_STRING (regexp
);
377 CHECK_STRING (string
);
380 pos
= 0, pos_byte
= 0;
383 int len
= SCHARS (string
);
385 CHECK_NUMBER (start
);
387 if (pos
< 0 && -pos
<= len
)
389 else if (0 > pos
|| pos
> len
)
390 args_out_of_range (string
, start
);
391 pos_byte
= string_char_to_byte (string
, pos
);
394 bufp
= compile_pattern (regexp
, &search_regs
,
395 (!NILP (current_buffer
->case_fold_search
)
396 ? current_buffer
->case_canon_table
: Qnil
),
398 STRING_MULTIBYTE (string
));
400 re_match_object
= string
;
402 val
= re_search (bufp
, (char *) SDATA (string
),
403 SBYTES (string
), pos_byte
,
404 SBYTES (string
) - pos_byte
,
407 last_thing_searched
= Qt
;
410 if (val
< 0) return Qnil
;
412 for (i
= 0; i
< search_regs
.num_regs
; i
++)
413 if (search_regs
.start
[i
] >= 0)
416 = string_byte_to_char (string
, search_regs
.start
[i
]);
418 = string_byte_to_char (string
, search_regs
.end
[i
]);
421 return make_number (string_byte_to_char (string
, val
));
424 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
425 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
426 Matching ignores case if `case-fold-search' is non-nil.
427 If third arg START is non-nil, start search at that index in STRING.
428 For index of first char beyond the match, do (match-end 0).
429 `match-end' and `match-beginning' also give indices of substrings
430 matched by parenthesis constructs in the pattern.
432 You can use the function `match-string' to extract the substrings
433 matched by the parenthesis constructions in REGEXP. */)
434 (regexp
, string
, start
)
435 Lisp_Object regexp
, string
, start
;
437 return string_match_1 (regexp
, string
, start
, 0);
440 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
441 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
442 Find the longest match, in accord with Posix regular expression rules.
443 Case is ignored if `case-fold-search' is non-nil in the current buffer.
444 If third arg START is non-nil, start search at that index in STRING.
445 For index of first char beyond the match, do (match-end 0).
446 `match-end' and `match-beginning' also give indices of substrings
447 matched by parenthesis constructs in the pattern. */)
448 (regexp
, string
, start
)
449 Lisp_Object regexp
, string
, start
;
451 return string_match_1 (regexp
, string
, start
, 1);
454 /* Match REGEXP against STRING, searching all of STRING,
455 and return the index of the match, or negative on failure.
456 This does not clobber the match data. */
459 fast_string_match (regexp
, string
)
460 Lisp_Object regexp
, string
;
463 struct re_pattern_buffer
*bufp
;
465 bufp
= compile_pattern (regexp
, 0, Qnil
,
466 0, STRING_MULTIBYTE (string
));
468 re_match_object
= string
;
470 val
= re_search (bufp
, (char *) SDATA (string
),
477 /* Match REGEXP against STRING, searching all of STRING ignoring case,
478 and return the index of the match, or negative on failure.
479 This does not clobber the match data.
480 We assume that STRING contains single-byte characters. */
482 extern Lisp_Object Vascii_downcase_table
;
485 fast_c_string_match_ignore_case (regexp
, string
)
490 struct re_pattern_buffer
*bufp
;
491 int len
= strlen (string
);
493 regexp
= string_make_unibyte (regexp
);
494 re_match_object
= Qt
;
495 bufp
= compile_pattern (regexp
, 0,
496 Vascii_canon_table
, 0,
499 val
= re_search (bufp
, string
, len
, 0, len
, 0);
504 /* Like fast_string_match but ignore case. */
507 fast_string_match_ignore_case (regexp
, string
)
508 Lisp_Object regexp
, string
;
511 struct re_pattern_buffer
*bufp
;
513 bufp
= compile_pattern (regexp
, 0, Vascii_canon_table
,
514 0, STRING_MULTIBYTE (string
));
516 re_match_object
= string
;
518 val
= re_search (bufp
, (char *) SDATA (string
),
525 /* The newline cache: remembering which sections of text have no newlines. */
527 /* If the user has requested newline caching, make sure it's on.
528 Otherwise, make sure it's off.
529 This is our cheezy way of associating an action with the change of
530 state of a buffer-local variable. */
532 newline_cache_on_off (buf
)
535 if (NILP (buf
->cache_long_line_scans
))
537 /* It should be off. */
538 if (buf
->newline_cache
)
540 free_region_cache (buf
->newline_cache
);
541 buf
->newline_cache
= 0;
546 /* It should be on. */
547 if (buf
->newline_cache
== 0)
548 buf
->newline_cache
= new_region_cache ();
553 /* Search for COUNT instances of the character TARGET between START and END.
555 If COUNT is positive, search forwards; END must be >= START.
556 If COUNT is negative, search backwards for the -COUNTth instance;
557 END must be <= START.
558 If COUNT is zero, do anything you please; run rogue, for all I care.
560 If END is zero, use BEGV or ZV instead, as appropriate for the
561 direction indicated by COUNT.
563 If we find COUNT instances, set *SHORTAGE to zero, and return the
564 position past the COUNTth match. Note that for reverse motion
565 this is not the same as the usual convention for Emacs motion commands.
567 If we don't find COUNT instances before reaching END, set *SHORTAGE
568 to the number of TARGETs left unfound, and return END.
570 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
571 except when inside redisplay. */
574 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
581 struct region_cache
*newline_cache
;
592 if (! end
) end
= BEGV
;
595 newline_cache_on_off (current_buffer
);
596 newline_cache
= current_buffer
->newline_cache
;
601 immediate_quit
= allow_quit
;
606 /* Our innermost scanning loop is very simple; it doesn't know
607 about gaps, buffer ends, or the newline cache. ceiling is
608 the position of the last character before the next such
609 obstacle --- the last character the dumb search loop should
611 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
612 int start_byte
= CHAR_TO_BYTE (start
);
615 /* If we're looking for a newline, consult the newline cache
616 to see where we can avoid some scanning. */
617 if (target
== '\n' && newline_cache
)
621 while (region_cache_forward
622 (current_buffer
, newline_cache
, start_byte
, &next_change
))
623 start_byte
= next_change
;
624 immediate_quit
= allow_quit
;
626 /* START should never be after END. */
627 if (start_byte
> ceiling_byte
)
628 start_byte
= ceiling_byte
;
630 /* Now the text after start is an unknown region, and
631 next_change is the position of the next known region. */
632 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
635 /* The dumb loop can only scan text stored in contiguous
636 bytes. BUFFER_CEILING_OF returns the last character
637 position that is contiguous, so the ceiling is the
638 position after that. */
639 tem
= BUFFER_CEILING_OF (start_byte
);
640 ceiling_byte
= min (tem
, ceiling_byte
);
643 /* The termination address of the dumb loop. */
644 register unsigned char *ceiling_addr
645 = BYTE_POS_ADDR (ceiling_byte
) + 1;
646 register unsigned char *cursor
647 = BYTE_POS_ADDR (start_byte
);
648 unsigned char *base
= cursor
;
650 while (cursor
< ceiling_addr
)
652 unsigned char *scan_start
= cursor
;
655 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
658 /* If we're looking for newlines, cache the fact that
659 the region from start to cursor is free of them. */
660 if (target
== '\n' && newline_cache
)
661 know_region_cache (current_buffer
, newline_cache
,
662 start_byte
+ scan_start
- base
,
663 start_byte
+ cursor
- base
);
665 /* Did we find the target character? */
666 if (cursor
< ceiling_addr
)
671 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
677 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
683 /* The last character to check before the next obstacle. */
684 int ceiling_byte
= CHAR_TO_BYTE (end
);
685 int start_byte
= CHAR_TO_BYTE (start
);
688 /* Consult the newline cache, if appropriate. */
689 if (target
== '\n' && newline_cache
)
693 while (region_cache_backward
694 (current_buffer
, newline_cache
, start_byte
, &next_change
))
695 start_byte
= next_change
;
696 immediate_quit
= allow_quit
;
698 /* Start should never be at or before end. */
699 if (start_byte
<= ceiling_byte
)
700 start_byte
= ceiling_byte
+ 1;
702 /* Now the text before start is an unknown region, and
703 next_change is the position of the next known region. */
704 ceiling_byte
= max (next_change
, ceiling_byte
);
707 /* Stop scanning before the gap. */
708 tem
= BUFFER_FLOOR_OF (start_byte
- 1);
709 ceiling_byte
= max (tem
, ceiling_byte
);
712 /* The termination address of the dumb loop. */
713 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
714 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
715 unsigned char *base
= cursor
;
717 while (cursor
>= ceiling_addr
)
719 unsigned char *scan_start
= cursor
;
721 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
724 /* If we're looking for newlines, cache the fact that
725 the region from after the cursor to start is free of them. */
726 if (target
== '\n' && newline_cache
)
727 know_region_cache (current_buffer
, newline_cache
,
728 start_byte
+ cursor
- base
,
729 start_byte
+ scan_start
- base
);
731 /* Did we find the target character? */
732 if (cursor
>= ceiling_addr
)
737 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
743 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
749 *shortage
= count
* direction
;
753 /* Search for COUNT instances of a line boundary, which means either a
754 newline or (if selective display enabled) a carriage return.
755 Start at START. If COUNT is negative, search backwards.
757 We report the resulting position by calling TEMP_SET_PT_BOTH.
759 If we find COUNT instances. we position after (always after,
760 even if scanning backwards) the COUNTth match, and return 0.
762 If we don't find COUNT instances before reaching the end of the
763 buffer (or the beginning, if scanning backwards), we return
764 the number of line boundaries left unfound, and position at
765 the limit we bumped up against.
767 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
768 except in special cases. */
771 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
772 int start
, start_byte
;
773 int limit
, limit_byte
;
777 int direction
= ((count
> 0) ? 1 : -1);
779 register unsigned char *cursor
;
782 register int ceiling
;
783 register unsigned char *ceiling_addr
;
785 int old_immediate_quit
= immediate_quit
;
787 /* The code that follows is like scan_buffer
788 but checks for either newline or carriage return. */
793 start_byte
= CHAR_TO_BYTE (start
);
797 while (start_byte
< limit_byte
)
799 ceiling
= BUFFER_CEILING_OF (start_byte
);
800 ceiling
= min (limit_byte
- 1, ceiling
);
801 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
802 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
805 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
808 if (cursor
!= ceiling_addr
)
812 immediate_quit
= old_immediate_quit
;
813 start_byte
= start_byte
+ cursor
- base
+ 1;
814 start
= BYTE_TO_CHAR (start_byte
);
815 TEMP_SET_PT_BOTH (start
, start_byte
);
819 if (++cursor
== ceiling_addr
)
825 start_byte
+= cursor
- base
;
830 while (start_byte
> limit_byte
)
832 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
833 ceiling
= max (limit_byte
, ceiling
);
834 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
835 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
838 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
841 if (cursor
!= ceiling_addr
)
845 immediate_quit
= old_immediate_quit
;
846 /* Return the position AFTER the match we found. */
847 start_byte
= start_byte
+ cursor
- base
+ 1;
848 start
= BYTE_TO_CHAR (start_byte
);
849 TEMP_SET_PT_BOTH (start
, start_byte
);
856 /* Here we add 1 to compensate for the last decrement
857 of CURSOR, which took it past the valid range. */
858 start_byte
+= cursor
- base
+ 1;
862 TEMP_SET_PT_BOTH (limit
, limit_byte
);
863 immediate_quit
= old_immediate_quit
;
865 return count
* direction
;
869 find_next_newline_no_quit (from
, cnt
)
870 register int from
, cnt
;
872 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
875 /* Like find_next_newline, but returns position before the newline,
876 not after, and only search up to TO. This isn't just
877 find_next_newline (...)-1, because you might hit TO. */
880 find_before_next_newline (from
, to
, cnt
)
884 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
892 /* Subroutines of Lisp buffer search functions. */
895 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
896 Lisp_Object string
, bound
, noerror
, count
;
907 CHECK_NUMBER (count
);
911 CHECK_STRING (string
);
915 lim
= ZV
, lim_byte
= ZV_BYTE
;
917 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
921 CHECK_NUMBER_COERCE_MARKER (bound
);
923 if (n
> 0 ? lim
< PT
: lim
> PT
)
924 error ("Invalid search bound (wrong side of point)");
926 lim
= ZV
, lim_byte
= ZV_BYTE
;
928 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
930 lim_byte
= CHAR_TO_BYTE (lim
);
933 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
934 (!NILP (current_buffer
->case_fold_search
)
935 ? current_buffer
->case_canon_table
937 (!NILP (current_buffer
->case_fold_search
)
938 ? current_buffer
->case_eqv_table
944 xsignal1 (Qsearch_failed
, string
);
946 if (!EQ (noerror
, Qt
))
948 if (lim
< BEGV
|| lim
> ZV
)
950 SET_PT_BOTH (lim
, lim_byte
);
952 #if 0 /* This would be clean, but maybe programs depend on
953 a value of nil here. */
961 if (np
< BEGV
|| np
> ZV
)
966 return make_number (np
);
969 /* Return 1 if REGEXP it matches just one constant string. */
972 trivial_regexp_p (regexp
)
975 int len
= SBYTES (regexp
);
976 unsigned char *s
= SDATA (regexp
);
981 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
988 case '|': case '(': case ')': case '`': case '\'': case 'b':
989 case 'B': case '<': case '>': case 'w': case 'W': case 's':
990 case 'S': case '=': case '{': case '}': case '_':
991 case 'c': case 'C': /* for categoryspec and notcategoryspec */
992 case '1': case '2': case '3': case '4': case '5':
993 case '6': case '7': case '8': case '9':
1001 /* Search for the n'th occurrence of STRING in the current buffer,
1002 starting at position POS and stopping at position LIM,
1003 treating STRING as a literal string if RE is false or as
1004 a regular expression if RE is true.
1006 If N is positive, searching is forward and LIM must be greater than POS.
1007 If N is negative, searching is backward and LIM must be less than POS.
1009 Returns -x if x occurrences remain to be found (x > 0),
1010 or else the position at the beginning of the Nth occurrence
1011 (if searching backward) or the end (if searching forward).
1013 POSIX is nonzero if we want full backtracking (POSIX style)
1014 for this pattern. 0 means backtrack only enough to get a valid match. */
1016 #define TRANSLATE(out, trt, d) \
1022 temp = Faref (trt, make_number (d)); \
1023 if (INTEGERP (temp)) \
1024 out = XINT (temp); \
1034 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
1035 RE
, trt
, inverse_trt
, posix
)
1044 Lisp_Object inverse_trt
;
1047 int len
= SCHARS (string
);
1048 int len_byte
= SBYTES (string
);
1051 if (running_asynch_code
)
1052 save_search_regs ();
1054 /* Searching 0 times means don't move. */
1055 /* Null string is found at starting position. */
1056 if (len
== 0 || n
== 0)
1058 set_search_regs (pos_byte
, 0);
1062 if (RE
&& !(trivial_regexp_p (string
) && NILP (Vsearch_spaces_regexp
)))
1064 unsigned char *p1
, *p2
;
1066 struct re_pattern_buffer
*bufp
;
1068 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
,
1069 !NILP (current_buffer
->enable_multibyte_characters
));
1071 immediate_quit
= 1; /* Quit immediately if user types ^G,
1072 because letting this function finish
1073 can take too long. */
1074 QUIT
; /* Do a pending quit right away,
1075 to avoid paradoxical behavior */
1076 /* Get pointers and sizes of the two strings
1077 that make up the visible portion of the buffer. */
1080 s1
= GPT_BYTE
- BEGV_BYTE
;
1082 s2
= ZV_BYTE
- GPT_BYTE
;
1086 s2
= ZV_BYTE
- BEGV_BYTE
;
1091 s1
= ZV_BYTE
- BEGV_BYTE
;
1094 re_match_object
= Qnil
;
1099 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1100 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1102 /* Don't allow match past current point */
1103 pos_byte
- BEGV_BYTE
);
1106 matcher_overflow ();
1110 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1111 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1112 if (search_regs
.start
[i
] >= 0)
1114 search_regs
.start
[i
]
1115 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1117 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1119 XSETBUFFER (last_thing_searched
, current_buffer
);
1120 /* Set pos to the new position. */
1121 pos
= search_regs
.start
[0];
1133 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1134 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1136 lim_byte
- BEGV_BYTE
);
1139 matcher_overflow ();
1143 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1144 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1145 if (search_regs
.start
[i
] >= 0)
1147 search_regs
.start
[i
]
1148 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1150 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1152 XSETBUFFER (last_thing_searched
, current_buffer
);
1153 pos
= search_regs
.end
[0];
1165 else /* non-RE case */
1167 unsigned char *raw_pattern
, *pat
;
1168 int raw_pattern_size
;
1169 int raw_pattern_size_byte
;
1170 unsigned char *patbuf
;
1171 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1172 unsigned char *base_pat
;
1173 /* Set to positive if we find a non-ASCII char that need
1174 translation. Otherwise set to zero later. */
1175 int charset_base
= -1;
1176 int boyer_moore_ok
= 1;
1178 /* MULTIBYTE says whether the text to be searched is multibyte.
1179 We must convert PATTERN to match that, or we will not really
1180 find things right. */
1182 if (multibyte
== STRING_MULTIBYTE (string
))
1184 raw_pattern
= (unsigned char *) SDATA (string
);
1185 raw_pattern_size
= SCHARS (string
);
1186 raw_pattern_size_byte
= SBYTES (string
);
1190 raw_pattern_size
= SCHARS (string
);
1191 raw_pattern_size_byte
1192 = count_size_as_multibyte (SDATA (string
),
1194 raw_pattern
= (unsigned char *) alloca (raw_pattern_size_byte
+ 1);
1195 copy_text (SDATA (string
), raw_pattern
,
1196 SCHARS (string
), 0, 1);
1200 /* Converting multibyte to single-byte.
1202 ??? Perhaps this conversion should be done in a special way
1203 by subtracting nonascii-insert-offset from each non-ASCII char,
1204 so that only the multibyte chars which really correspond to
1205 the chosen single-byte character set can possibly match. */
1206 raw_pattern_size
= SCHARS (string
);
1207 raw_pattern_size_byte
= SCHARS (string
);
1208 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
1209 copy_text (SDATA (string
), raw_pattern
,
1210 SBYTES (string
), 1, 0);
1213 /* Copy and optionally translate the pattern. */
1214 len
= raw_pattern_size
;
1215 len_byte
= raw_pattern_size_byte
;
1216 patbuf
= (unsigned char *) alloca (len_byte
);
1218 base_pat
= raw_pattern
;
1221 /* Fill patbuf by translated characters in STRING while
1222 checking if we can use boyer-moore search. If TRT is
1223 non-nil, we can use boyer-moore search only if TRT can be
1224 represented by the byte array of 256 elements. For that,
1225 all non-ASCII case-equivalents of all case-senstive
1226 characters in STRING must belong to the same charset and
1231 unsigned char str_base
[MAX_MULTIBYTE_LENGTH
], *str
;
1232 int c
, translated
, inverse
;
1233 int in_charlen
, charlen
;
1235 /* If we got here and the RE flag is set, it's because we're
1236 dealing with a regexp known to be trivial, so the backslash
1237 just quotes the next character. */
1238 if (RE
&& *base_pat
== '\\')
1246 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1251 charlen
= in_charlen
;
1255 /* Translate the character. */
1256 TRANSLATE (translated
, trt
, c
);
1257 charlen
= CHAR_STRING (translated
, str_base
);
1260 /* Check if C has any other case-equivalents. */
1261 TRANSLATE (inverse
, inverse_trt
, c
);
1262 /* If so, check if we can use boyer-moore. */
1263 if (c
!= inverse
&& boyer_moore_ok
)
1265 /* Check if all equivalents belong to the same
1266 charset & row. Note that the check of C
1267 itself is done by the last iteration. Note
1268 also that we don't have to check ASCII
1269 characters because boyer-moore search can
1270 always handle their translation. */
1273 if (ASCII_BYTE_P (inverse
))
1275 if (charset_base
> 0)
1282 else if (SINGLE_BYTE_CHAR_P (inverse
))
1284 /* Boyer-moore search can't handle a
1285 translation of an eight-bit
1290 else if (charset_base
< 0)
1291 charset_base
= inverse
& ~CHAR_FIELD3_MASK
;
1292 else if ((inverse
& ~CHAR_FIELD3_MASK
)
1300 TRANSLATE (inverse
, inverse_trt
, inverse
);
1304 if (charset_base
< 0)
1307 /* Store this character into the translated pattern. */
1308 bcopy (str
, pat
, charlen
);
1310 base_pat
+= in_charlen
;
1311 len_byte
-= in_charlen
;
1316 /* Unibyte buffer. */
1322 /* If we got here and the RE flag is set, it's because we're
1323 dealing with a regexp known to be trivial, so the backslash
1324 just quotes the next character. */
1325 if (RE
&& *base_pat
== '\\')
1332 TRANSLATE (translated
, trt
, c
);
1333 *pat
++ = translated
;
1337 len_byte
= pat
- patbuf
;
1338 len
= raw_pattern_size
;
1339 pat
= base_pat
= patbuf
;
1342 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1343 pos
, pos_byte
, lim
, lim_byte
,
1346 return simple_search (n
, pat
, len
, len_byte
, trt
,
1347 pos
, pos_byte
, lim
, lim_byte
);
1351 /* Do a simple string search N times for the string PAT,
1352 whose length is LEN/LEN_BYTE,
1353 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1354 TRT is the translation table.
1356 Return the character position where the match is found.
1357 Otherwise, if M matches remained to be found, return -M.
1359 This kind of search works regardless of what is in PAT and
1360 regardless of what is in TRT. It is used in cases where
1361 boyer_moore cannot work. */
1364 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1372 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1373 int forward
= n
> 0;
1375 if (lim
> pos
&& multibyte
)
1380 /* Try matching at position POS. */
1382 int this_pos_byte
= pos_byte
;
1384 int this_len_byte
= len_byte
;
1385 unsigned char *p
= pat
;
1386 if (pos
+ len
> lim
)
1389 while (this_len
> 0)
1391 int charlen
, buf_charlen
;
1394 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1395 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1396 ZV_BYTE
- this_pos_byte
,
1398 TRANSLATE (buf_ch
, trt
, buf_ch
);
1400 if (buf_ch
!= pat_ch
)
1403 this_len_byte
-= charlen
;
1407 this_pos_byte
+= buf_charlen
;
1414 pos_byte
+= len_byte
;
1418 INC_BOTH (pos
, pos_byte
);
1428 /* Try matching at position POS. */
1431 unsigned char *p
= pat
;
1433 if (pos
+ len
> lim
)
1436 while (this_len
> 0)
1439 int buf_ch
= FETCH_BYTE (this_pos
);
1440 TRANSLATE (buf_ch
, trt
, buf_ch
);
1442 if (buf_ch
!= pat_ch
)
1460 /* Backwards search. */
1461 else if (lim
< pos
&& multibyte
)
1466 /* Try matching at position POS. */
1467 int this_pos
= pos
- len
;
1468 int this_pos_byte
= pos_byte
- len_byte
;
1470 int this_len_byte
= len_byte
;
1471 unsigned char *p
= pat
;
1473 if (pos
- len
< lim
)
1476 while (this_len
> 0)
1478 int charlen
, buf_charlen
;
1481 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1482 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1483 ZV_BYTE
- this_pos_byte
,
1485 TRANSLATE (buf_ch
, trt
, buf_ch
);
1487 if (buf_ch
!= pat_ch
)
1490 this_len_byte
-= charlen
;
1493 this_pos_byte
+= buf_charlen
;
1500 pos_byte
-= len_byte
;
1504 DEC_BOTH (pos
, pos_byte
);
1514 /* Try matching at position POS. */
1515 int this_pos
= pos
- len
;
1517 unsigned char *p
= pat
;
1519 if (pos
- len
< lim
)
1522 while (this_len
> 0)
1525 int buf_ch
= FETCH_BYTE (this_pos
);
1526 TRANSLATE (buf_ch
, trt
, buf_ch
);
1528 if (buf_ch
!= pat_ch
)
1550 set_search_regs ((multibyte
? pos_byte
: pos
) - len_byte
, len_byte
);
1552 set_search_regs (multibyte
? pos_byte
: pos
, len_byte
);
1562 /* Do Boyer-Moore search N times for the string BASE_PAT,
1563 whose length is LEN/LEN_BYTE,
1564 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1565 DIRECTION says which direction we search in.
1566 TRT and INVERSE_TRT are translation tables.
1567 Characters in PAT are already translated by TRT.
1569 This kind of search works if all the characters in BASE_PAT that
1570 have nontrivial translation are the same aside from the last byte.
1571 This makes it possible to translate just the last byte of a
1572 character, and do so after just a simple test of the context.
1573 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1575 If that criterion is not satisfied, do not call this function. */
1578 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1579 pos
, pos_byte
, lim
, lim_byte
, charset_base
)
1581 unsigned char *base_pat
;
1584 Lisp_Object inverse_trt
;
1589 int direction
= ((n
> 0) ? 1 : -1);
1590 register int dirlen
;
1591 int infinity
, limit
, stride_for_teases
= 0;
1592 register int *BM_tab
;
1594 register unsigned char *cursor
, *p_limit
;
1596 unsigned char *pat
, *pat_end
;
1597 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1599 unsigned char simple_translate
[0400];
1600 /* These are set to the preceding bytes of a byte to be translated
1601 if charset_base is nonzero. As the maximum byte length of a
1602 multibyte character is 4, we have to check at most three previous
1604 int translate_prev_byte1
= 0;
1605 int translate_prev_byte2
= 0;
1606 int translate_prev_byte3
= 0;
1609 int BM_tab_space
[0400];
1610 BM_tab
= &BM_tab_space
[0];
1612 BM_tab
= (int *) alloca (0400 * sizeof (int));
1614 /* The general approach is that we are going to maintain that we know */
1615 /* the first (closest to the present position, in whatever direction */
1616 /* we're searching) character that could possibly be the last */
1617 /* (furthest from present position) character of a valid match. We */
1618 /* advance the state of our knowledge by looking at that character */
1619 /* and seeing whether it indeed matches the last character of the */
1620 /* pattern. If it does, we take a closer look. If it does not, we */
1621 /* move our pointer (to putative last characters) as far as is */
1622 /* logically possible. This amount of movement, which I call a */
1623 /* stride, will be the length of the pattern if the actual character */
1624 /* appears nowhere in the pattern, otherwise it will be the distance */
1625 /* from the last occurrence of that character to the end of the */
1627 /* As a coding trick, an enormous stride is coded into the table for */
1628 /* characters that match the last character. This allows use of only */
1629 /* a single test, a test for having gone past the end of the */
1630 /* permissible match region, to test for both possible matches (when */
1631 /* the stride goes past the end immediately) and failure to */
1632 /* match (where you get nudged past the end one stride at a time). */
1634 /* Here we make a "mickey mouse" BM table. The stride of the search */
1635 /* is determined only by the last character of the putative match. */
1636 /* If that character does not match, we will stride the proper */
1637 /* distance to propose a match that superimposes it on the last */
1638 /* instance of a character that matches it (per trt), or misses */
1639 /* it entirely if there is none. */
1641 dirlen
= len_byte
* direction
;
1642 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1644 /* Record position after the end of the pattern. */
1645 pat_end
= base_pat
+ len_byte
;
1646 /* BASE_PAT points to a character that we start scanning from.
1647 It is the first character in a forward search,
1648 the last character in a backward search. */
1650 base_pat
= pat_end
- 1;
1652 BM_tab_base
= BM_tab
;
1654 j
= dirlen
; /* to get it in a register */
1655 /* A character that does not appear in the pattern induces a */
1656 /* stride equal to the pattern length. */
1657 while (BM_tab_base
!= BM_tab
)
1665 /* We use this for translation, instead of TRT itself.
1666 We fill this in to handle the characters that actually
1667 occur in the pattern. Others don't matter anyway! */
1668 bzero (simple_translate
, sizeof simple_translate
);
1669 for (i
= 0; i
< 0400; i
++)
1670 simple_translate
[i
] = i
;
1674 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1675 byte following them are the target of translation. */
1676 int sample_char
= charset_base
| 0x20;
1677 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
1678 int len
= CHAR_STRING (sample_char
, str
);
1680 translate_prev_byte1
= str
[len
- 2];
1683 translate_prev_byte2
= str
[len
- 3];
1685 translate_prev_byte3
= str
[len
- 4];
1690 while (i
!= infinity
)
1692 unsigned char *ptr
= base_pat
+ i
;
1698 /* If the byte currently looking at is the last of a
1699 character to check case-equivalents, set CH to that
1700 character. An ASCII character and a non-ASCII character
1701 matching with CHARSET_BASE are to be checked. */
1704 if (ASCII_BYTE_P (*ptr
) || ! multibyte
)
1706 else if (charset_base
1707 && ((pat_end
- ptr
) == 1 || CHAR_HEAD_P (ptr
[1])))
1709 unsigned char *charstart
= ptr
- 1;
1711 while (! (CHAR_HEAD_P (*charstart
)))
1713 ch
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1714 if (charset_base
!= (ch
& ~CHAR_FIELD3_MASK
))
1719 j
= ((unsigned char) ch
) | 0200;
1724 stride_for_teases
= BM_tab
[j
];
1726 BM_tab
[j
] = dirlen
- i
;
1727 /* A translation table is accompanied by its inverse -- see */
1728 /* comment following downcase_table for details */
1731 int starting_ch
= ch
;
1736 TRANSLATE (ch
, inverse_trt
, ch
);
1738 j
= ((unsigned char) ch
) | 0200;
1740 j
= (unsigned char) ch
;
1742 /* For all the characters that map into CH,
1743 set up simple_translate to map the last byte
1745 simple_translate
[j
] = starting_j
;
1746 if (ch
== starting_ch
)
1748 BM_tab
[j
] = dirlen
- i
;
1757 stride_for_teases
= BM_tab
[j
];
1758 BM_tab
[j
] = dirlen
- i
;
1760 /* stride_for_teases tells how much to stride if we get a */
1761 /* match on the far character but are subsequently */
1762 /* disappointed, by recording what the stride would have been */
1763 /* for that character if the last character had been */
1766 infinity
= dirlen
- infinity
;
1767 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1768 /* loop invariant - POS_BYTE points at where last char (first
1769 char if reverse) of pattern would align in a possible match. */
1773 unsigned char *tail_end_ptr
;
1775 /* It's been reported that some (broken) compiler thinks that
1776 Boolean expressions in an arithmetic context are unsigned.
1777 Using an explicit ?1:0 prevents this. */
1778 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1780 return (n
* (0 - direction
));
1781 /* First we do the part we can by pointers (maybe nothing) */
1784 limit
= pos_byte
- dirlen
+ direction
;
1787 limit
= BUFFER_CEILING_OF (limit
);
1788 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1789 can take on without hitting edge of buffer or the gap. */
1790 limit
= min (limit
, pos_byte
+ 20000);
1791 limit
= min (limit
, lim_byte
- 1);
1795 limit
= BUFFER_FLOOR_OF (limit
);
1796 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1797 can take on without hitting edge of buffer or the gap. */
1798 limit
= max (limit
, pos_byte
- 20000);
1799 limit
= max (limit
, lim_byte
);
1801 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1802 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1804 if ((limit
- pos_byte
) * direction
> 20)
1808 p_limit
= BYTE_POS_ADDR (limit
);
1809 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1810 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1811 while (1) /* use one cursor setting as long as i can */
1813 if (direction
> 0) /* worth duplicating */
1815 /* Use signed comparison if appropriate
1816 to make cursor+infinity sure to be > p_limit.
1817 Assuming that the buffer lies in a range of addresses
1818 that are all "positive" (as ints) or all "negative",
1819 either kind of comparison will work as long
1820 as we don't step by infinity. So pick the kind
1821 that works when we do step by infinity. */
1822 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1823 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1824 cursor
+= BM_tab
[*cursor
];
1826 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1827 cursor
+= BM_tab
[*cursor
];
1831 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1832 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1833 cursor
+= BM_tab
[*cursor
];
1835 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1836 cursor
+= BM_tab
[*cursor
];
1838 /* If you are here, cursor is beyond the end of the searched region. */
1839 /* This can happen if you match on the far character of the pattern, */
1840 /* because the "stride" of that character is infinity, a number able */
1841 /* to throw you well beyond the end of the search. It can also */
1842 /* happen if you fail to match within the permitted region and would */
1843 /* otherwise try a character beyond that region */
1844 if ((cursor
- p_limit
) * direction
<= len_byte
)
1845 break; /* a small overrun is genuine */
1846 cursor
-= infinity
; /* large overrun = hit */
1847 i
= dirlen
- direction
;
1850 while ((i
-= direction
) + direction
!= 0)
1853 cursor
-= direction
;
1854 /* Translate only the last byte of a character. */
1856 || ((cursor
== tail_end_ptr
1857 || CHAR_HEAD_P (cursor
[1]))
1858 && (CHAR_HEAD_P (cursor
[0])
1859 /* Check if this is the last byte of
1860 a translable character. */
1861 || (translate_prev_byte1
== cursor
[-1]
1862 && (CHAR_HEAD_P (translate_prev_byte1
)
1863 || (translate_prev_byte2
== cursor
[-2]
1864 && (CHAR_HEAD_P (translate_prev_byte2
)
1865 || (translate_prev_byte3
== cursor
[-3]))))))))
1866 ch
= simple_translate
[*cursor
];
1875 while ((i
-= direction
) + direction
!= 0)
1877 cursor
-= direction
;
1878 if (pat
[i
] != *cursor
)
1882 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1883 if (i
+ direction
== 0)
1887 cursor
-= direction
;
1889 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1890 ? 1 - len_byte
: 0);
1891 set_search_regs (position
, len_byte
);
1893 if ((n
-= direction
) != 0)
1894 cursor
+= dirlen
; /* to resume search */
1896 return ((direction
> 0)
1897 ? search_regs
.end
[0] : search_regs
.start
[0]);
1900 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1902 pos_byte
+= cursor
- p2
;
1905 /* Now we'll pick up a clump that has to be done the hard */
1906 /* way because it covers a discontinuity */
1908 limit
= ((direction
> 0)
1909 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1910 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1911 limit
= ((direction
> 0)
1912 ? min (limit
+ len_byte
, lim_byte
- 1)
1913 : max (limit
- len_byte
, lim_byte
));
1914 /* LIMIT is now the last value POS_BYTE can have
1915 and still be valid for a possible match. */
1918 /* This loop can be coded for space rather than */
1919 /* speed because it will usually run only once. */
1920 /* (the reach is at most len + 21, and typically */
1921 /* does not exceed len) */
1922 while ((limit
- pos_byte
) * direction
>= 0)
1923 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1924 /* now run the same tests to distinguish going off the */
1925 /* end, a match or a phony match. */
1926 if ((pos_byte
- limit
) * direction
<= len_byte
)
1927 break; /* ran off the end */
1928 /* Found what might be a match.
1929 Set POS_BYTE back to last (first if reverse) pos. */
1930 pos_byte
-= infinity
;
1931 i
= dirlen
- direction
;
1932 while ((i
-= direction
) + direction
!= 0)
1936 pos_byte
-= direction
;
1937 ptr
= BYTE_POS_ADDR (pos_byte
);
1938 /* Translate only the last byte of a character. */
1940 || ((ptr
== tail_end_ptr
1941 || CHAR_HEAD_P (ptr
[1]))
1942 && (CHAR_HEAD_P (ptr
[0])
1943 /* Check if this is the last byte of a
1944 translable character. */
1945 || (translate_prev_byte1
== ptr
[-1]
1946 && (CHAR_HEAD_P (translate_prev_byte1
)
1947 || (translate_prev_byte2
== ptr
[-2]
1948 && (CHAR_HEAD_P (translate_prev_byte2
)
1949 || translate_prev_byte3
== ptr
[-3])))))))
1950 ch
= simple_translate
[*ptr
];
1956 /* Above loop has moved POS_BYTE part or all the way
1957 back to the first pos (last pos if reverse).
1958 Set it once again at the last (first if reverse) char. */
1959 pos_byte
+= dirlen
- i
- direction
;
1960 if (i
+ direction
== 0)
1963 pos_byte
-= direction
;
1965 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
1967 set_search_regs (position
, len_byte
);
1969 if ((n
-= direction
) != 0)
1970 pos_byte
+= dirlen
; /* to resume search */
1972 return ((direction
> 0)
1973 ? search_regs
.end
[0] : search_regs
.start
[0]);
1976 pos_byte
+= stride_for_teases
;
1979 /* We have done one clump. Can we continue? */
1980 if ((lim_byte
- pos_byte
) * direction
< 0)
1981 return ((0 - n
) * direction
);
1983 return BYTE_TO_CHAR (pos_byte
);
1986 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1987 for the overall match just found in the current buffer.
1988 Also clear out the match data for registers 1 and up. */
1991 set_search_regs (beg_byte
, nbytes
)
1992 int beg_byte
, nbytes
;
1996 /* Make sure we have registers in which to store
1997 the match position. */
1998 if (search_regs
.num_regs
== 0)
2000 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2001 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2002 search_regs
.num_regs
= 2;
2005 /* Clear out the other registers. */
2006 for (i
= 1; i
< search_regs
.num_regs
; i
++)
2008 search_regs
.start
[i
] = -1;
2009 search_regs
.end
[i
] = -1;
2012 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
2013 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
2014 XSETBUFFER (last_thing_searched
, current_buffer
);
2017 /* Given a string of words separated by word delimiters,
2018 compute a regexp that matches those exact words
2019 separated by arbitrary punctuation. */
2025 register unsigned char *p
, *o
;
2026 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
2031 CHECK_STRING (string
);
2033 len
= SCHARS (string
);
2035 for (i
= 0, i_byte
= 0; i
< len
; )
2039 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
2041 if (SYNTAX (c
) != Sword
)
2044 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
2051 if (SYNTAX (prev_c
) == Sword
)
2054 return empty_string
;
2056 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
2057 if (STRING_MULTIBYTE (string
))
2058 val
= make_uninit_multibyte_string (len
+ adjust
,
2062 val
= make_uninit_string (len
+ adjust
);
2069 for (i
= 0, i_byte
= 0; i
< len
; )
2072 int i_byte_orig
= i_byte
;
2074 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
2076 if (SYNTAX (c
) == Sword
)
2078 bcopy (SDATA (string
) + i_byte_orig
, o
,
2079 i_byte
- i_byte_orig
);
2080 o
+= i_byte
- i_byte_orig
;
2082 else if (i
> 0 && SYNTAX (prev_c
) == Sword
&& --word_count
)
2100 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
2101 "MSearch backward: ",
2102 doc
: /* Search backward from point for STRING.
2103 Set point to the beginning of the occurrence found, and return point.
2104 An optional second argument bounds the search; it is a buffer position.
2105 The match found must not extend before that position.
2106 Optional third argument, if t, means if fail just return nil (no error).
2107 If not nil and not t, position at limit of search and return nil.
2108 Optional fourth argument is repeat count--search for successive occurrences.
2110 Search case-sensitivity is determined by the value of the variable
2111 `case-fold-search', which see.
2113 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2114 (string
, bound
, noerror
, count
)
2115 Lisp_Object string
, bound
, noerror
, count
;
2117 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
2120 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
2121 doc
: /* Search forward from point for STRING.
2122 Set point to the end of the occurrence found, and return point.
2123 An optional second argument bounds the search; it is a buffer position.
2124 The match found must not extend after that position. nil is equivalent
2126 Optional third argument, if t, means if fail just return nil (no error).
2127 If not nil and not t, move to limit of search and return nil.
2128 Optional fourth argument is repeat count--search for successive occurrences.
2130 Search case-sensitivity is determined by the value of the variable
2131 `case-fold-search', which see.
2133 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2134 (string
, bound
, noerror
, count
)
2135 Lisp_Object string
, bound
, noerror
, count
;
2137 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2140 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2141 "sWord search backward: ",
2142 doc
: /* Search backward from point for STRING, ignoring differences in punctuation.
2143 Set point to the beginning of the occurrence found, and return point.
2144 An optional second argument bounds the search; it is a buffer position.
2145 The match found must not extend before that position.
2146 Optional third argument, if t, means if fail just return nil (no error).
2147 If not nil and not t, move to limit of search and return nil.
2148 Optional fourth argument is repeat count--search for successive occurrences. */)
2149 (string
, bound
, noerror
, count
)
2150 Lisp_Object string
, bound
, noerror
, count
;
2152 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2155 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2157 doc
: /* Search forward from point for STRING, ignoring differences in punctuation.
2158 Set point to the end of the occurrence found, and return point.
2159 An optional second argument bounds the search; it is a buffer position.
2160 The match found must not extend after that position.
2161 Optional third argument, if t, means if fail just return nil (no error).
2162 If not nil and not t, move to limit of search and return nil.
2163 Optional fourth argument is repeat count--search for successive occurrences. */)
2164 (string
, bound
, noerror
, count
)
2165 Lisp_Object string
, bound
, noerror
, count
;
2167 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2170 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2171 "sRE search backward: ",
2172 doc
: /* Search backward from point for match for regular expression REGEXP.
2173 Set point to the beginning of the match, and return point.
2174 The match found is the one starting last in the buffer
2175 and yet ending before the origin of the search.
2176 An optional second argument bounds the search; it is a buffer position.
2177 The match found must start at or after that position.
2178 Optional third argument, if t, means if fail just return nil (no error).
2179 If not nil and not t, move to limit of search and return nil.
2180 Optional fourth argument is repeat count--search for successive occurrences.
2181 See also the functions `match-beginning', `match-end', `match-string',
2182 and `replace-match'. */)
2183 (regexp
, bound
, noerror
, count
)
2184 Lisp_Object regexp
, bound
, noerror
, count
;
2186 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2189 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2191 doc
: /* Search forward from point for regular expression REGEXP.
2192 Set point to the end of the occurrence found, and return point.
2193 An optional second argument bounds the search; it is a buffer position.
2194 The match found must not extend after that position.
2195 Optional third argument, if t, means if fail just return nil (no error).
2196 If not nil and not t, move to limit of search and return nil.
2197 Optional fourth argument is repeat count--search for successive occurrences.
2198 See also the functions `match-beginning', `match-end', `match-string',
2199 and `replace-match'. */)
2200 (regexp
, bound
, noerror
, count
)
2201 Lisp_Object regexp
, bound
, noerror
, count
;
2203 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2206 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2207 "sPosix search backward: ",
2208 doc
: /* Search backward from point for match for regular expression REGEXP.
2209 Find the longest match in accord with Posix regular expression rules.
2210 Set point to the beginning of the match, and return point.
2211 The match found is the one starting last in the buffer
2212 and yet ending before the origin of the search.
2213 An optional second argument bounds the search; it is a buffer position.
2214 The match found must start at or after that position.
2215 Optional third argument, if t, means if fail just return nil (no error).
2216 If not nil and not t, move to limit of search and return nil.
2217 Optional fourth argument is repeat count--search for successive occurrences.
2218 See also the functions `match-beginning', `match-end', `match-string',
2219 and `replace-match'. */)
2220 (regexp
, bound
, noerror
, count
)
2221 Lisp_Object regexp
, bound
, noerror
, count
;
2223 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2226 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2228 doc
: /* Search forward from point for regular expression REGEXP.
2229 Find the longest match in accord with Posix regular expression rules.
2230 Set point to the end of the occurrence found, and return point.
2231 An optional second argument bounds the search; it is a buffer position.
2232 The match found must not extend after that position.
2233 Optional third argument, if t, means if fail just return nil (no error).
2234 If not nil and not t, move to limit of search and return nil.
2235 Optional fourth argument is repeat count--search for successive occurrences.
2236 See also the functions `match-beginning', `match-end', `match-string',
2237 and `replace-match'. */)
2238 (regexp
, bound
, noerror
, count
)
2239 Lisp_Object regexp
, bound
, noerror
, count
;
2241 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2244 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2245 doc
: /* Replace text matched by last search with NEWTEXT.
2246 Leave point at the end of the replacement text.
2248 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2249 Otherwise maybe capitalize the whole text, or maybe just word initials,
2250 based on the replaced text.
2251 If the replaced text has only capital letters
2252 and has at least one multiletter word, convert NEWTEXT to all caps.
2253 Otherwise if all words are capitalized in the replaced text,
2254 capitalize each word in NEWTEXT.
2256 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2257 Otherwise treat `\\' as special:
2258 `\\&' in NEWTEXT means substitute original matched text.
2259 `\\N' means substitute what matched the Nth `\\(...\\)'.
2260 If Nth parens didn't match, substitute nothing.
2261 `\\\\' means insert one `\\'.
2262 Case conversion does not apply to these substitutions.
2264 FIXEDCASE and LITERAL are optional arguments.
2266 The optional fourth argument STRING can be a string to modify.
2267 This is meaningful when the previous match was done against STRING,
2268 using `string-match'. When used this way, `replace-match'
2269 creates and returns a new string made by copying STRING and replacing
2270 the part of STRING that was matched.
2272 The optional fifth argument SUBEXP specifies a subexpression;
2273 it says to replace just that subexpression with NEWTEXT,
2274 rather than replacing the entire matched text.
2275 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2276 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2277 NEWTEXT in place of subexp N.
2278 This is useful only after a regular expression search or match,
2279 since only regular expressions have distinguished subexpressions. */)
2280 (newtext
, fixedcase
, literal
, string
, subexp
)
2281 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2283 enum { nochange
, all_caps
, cap_initial
} case_action
;
2284 register int pos
, pos_byte
;
2285 int some_multiletter_word
;
2288 int some_nonuppercase_initial
;
2289 register int c
, prevc
;
2291 int opoint
, newpoint
;
2293 CHECK_STRING (newtext
);
2295 if (! NILP (string
))
2296 CHECK_STRING (string
);
2298 case_action
= nochange
; /* We tried an initialization */
2299 /* but some C compilers blew it */
2301 if (search_regs
.num_regs
<= 0)
2302 error ("`replace-match' called before any match found");
2308 CHECK_NUMBER (subexp
);
2309 sub
= XINT (subexp
);
2310 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2311 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2316 if (search_regs
.start
[sub
] < BEGV
2317 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2318 || search_regs
.end
[sub
] > ZV
)
2319 args_out_of_range (make_number (search_regs
.start
[sub
]),
2320 make_number (search_regs
.end
[sub
]));
2324 if (search_regs
.start
[sub
] < 0
2325 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2326 || search_regs
.end
[sub
] > SCHARS (string
))
2327 args_out_of_range (make_number (search_regs
.start
[sub
]),
2328 make_number (search_regs
.end
[sub
]));
2331 if (NILP (fixedcase
))
2333 /* Decide how to casify by examining the matched text. */
2336 pos
= search_regs
.start
[sub
];
2337 last
= search_regs
.end
[sub
];
2340 pos_byte
= CHAR_TO_BYTE (pos
);
2342 pos_byte
= string_char_to_byte (string
, pos
);
2345 case_action
= all_caps
;
2347 /* some_multiletter_word is set nonzero if any original word
2348 is more than one letter long. */
2349 some_multiletter_word
= 0;
2351 some_nonuppercase_initial
= 0;
2358 c
= FETCH_CHAR (pos_byte
);
2359 INC_BOTH (pos
, pos_byte
);
2362 FETCH_STRING_CHAR_ADVANCE (c
, string
, pos
, pos_byte
);
2366 /* Cannot be all caps if any original char is lower case */
2369 if (SYNTAX (prevc
) != Sword
)
2370 some_nonuppercase_initial
= 1;
2372 some_multiletter_word
= 1;
2374 else if (UPPERCASEP (c
))
2377 if (SYNTAX (prevc
) != Sword
)
2380 some_multiletter_word
= 1;
2384 /* If the initial is a caseless word constituent,
2385 treat that like a lowercase initial. */
2386 if (SYNTAX (prevc
) != Sword
)
2387 some_nonuppercase_initial
= 1;
2393 /* Convert to all caps if the old text is all caps
2394 and has at least one multiletter word. */
2395 if (! some_lowercase
&& some_multiletter_word
)
2396 case_action
= all_caps
;
2397 /* Capitalize each word, if the old text has all capitalized words. */
2398 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2399 case_action
= cap_initial
;
2400 else if (!some_nonuppercase_initial
&& some_uppercase
)
2401 /* Should x -> yz, operating on X, give Yz or YZ?
2402 We'll assume the latter. */
2403 case_action
= all_caps
;
2405 case_action
= nochange
;
2408 /* Do replacement in a string. */
2411 Lisp_Object before
, after
;
2413 before
= Fsubstring (string
, make_number (0),
2414 make_number (search_regs
.start
[sub
]));
2415 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2417 /* Substitute parts of the match into NEWTEXT
2422 int lastpos_byte
= 0;
2423 /* We build up the substituted string in ACCUM. */
2426 int length
= SBYTES (newtext
);
2430 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2434 int delbackslash
= 0;
2436 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2440 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2444 substart
= search_regs
.start
[sub
];
2445 subend
= search_regs
.end
[sub
];
2447 else if (c
>= '1' && c
<= '9')
2449 if (search_regs
.start
[c
- '0'] >= 0
2450 && c
<= search_regs
.num_regs
+ '0')
2452 substart
= search_regs
.start
[c
- '0'];
2453 subend
= search_regs
.end
[c
- '0'];
2457 /* If that subexp did not match,
2458 replace \\N with nothing. */
2466 error ("Invalid use of `\\' in replacement text");
2470 if (pos
- 2 != lastpos
)
2471 middle
= substring_both (newtext
, lastpos
,
2473 pos
- 2, pos_byte
- 2);
2476 accum
= concat3 (accum
, middle
,
2478 make_number (substart
),
2479 make_number (subend
)));
2481 lastpos_byte
= pos_byte
;
2483 else if (delbackslash
)
2485 middle
= substring_both (newtext
, lastpos
,
2487 pos
- 1, pos_byte
- 1);
2489 accum
= concat2 (accum
, middle
);
2491 lastpos_byte
= pos_byte
;
2496 middle
= substring_both (newtext
, lastpos
,
2502 newtext
= concat2 (accum
, middle
);
2505 /* Do case substitution in NEWTEXT if desired. */
2506 if (case_action
== all_caps
)
2507 newtext
= Fupcase (newtext
);
2508 else if (case_action
== cap_initial
)
2509 newtext
= Fupcase_initials (newtext
);
2511 return concat3 (before
, newtext
, after
);
2514 /* Record point, then move (quietly) to the start of the match. */
2515 if (PT
>= search_regs
.end
[sub
])
2517 else if (PT
> search_regs
.start
[sub
])
2518 opoint
= search_regs
.end
[sub
] - ZV
;
2522 /* If we want non-literal replacement,
2523 perform substitution on the replacement string. */
2526 int length
= SBYTES (newtext
);
2527 unsigned char *substed
;
2528 int substed_alloc_size
, substed_len
;
2529 int buf_multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
2530 int str_multibyte
= STRING_MULTIBYTE (newtext
);
2531 Lisp_Object rev_tbl
;
2532 int really_changed
= 0;
2534 rev_tbl
= (!buf_multibyte
&& CHAR_TABLE_P (Vnonascii_translation_table
)
2535 ? Fchar_table_extra_slot (Vnonascii_translation_table
,
2539 substed_alloc_size
= length
* 2 + 100;
2540 substed
= (unsigned char *) xmalloc (substed_alloc_size
+ 1);
2543 /* Go thru NEWTEXT, producing the actual text to insert in
2544 SUBSTED while adjusting multibyteness to that of the current
2547 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2549 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
2550 unsigned char *add_stuff
= NULL
;
2556 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
, pos
, pos_byte
);
2558 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2562 /* Note that we don't have to increment POS. */
2563 c
= SREF (newtext
, pos_byte
++);
2565 c
= unibyte_char_to_multibyte (c
);
2568 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2569 or set IDX to a match index, which means put that part
2570 of the buffer text into SUBSTED. */
2578 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
,
2580 if (!buf_multibyte
&& !SINGLE_BYTE_CHAR_P (c
))
2581 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2585 c
= SREF (newtext
, pos_byte
++);
2587 c
= unibyte_char_to_multibyte (c
);
2592 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2594 if (search_regs
.start
[c
- '0'] >= 1)
2598 add_len
= 1, add_stuff
= "\\";
2602 error ("Invalid use of `\\' in replacement text");
2607 add_len
= CHAR_STRING (c
, str
);
2611 /* If we want to copy part of a previous match,
2612 set up ADD_STUFF and ADD_LEN to point to it. */
2615 int begbyte
= CHAR_TO_BYTE (search_regs
.start
[idx
]);
2616 add_len
= CHAR_TO_BYTE (search_regs
.end
[idx
]) - begbyte
;
2617 if (search_regs
.start
[idx
] < GPT
&& GPT
< search_regs
.end
[idx
])
2618 move_gap (search_regs
.start
[idx
]);
2619 add_stuff
= BYTE_POS_ADDR (begbyte
);
2622 /* Now the stuff we want to add to SUBSTED
2623 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2625 /* Make sure SUBSTED is big enough. */
2626 if (substed_len
+ add_len
>= substed_alloc_size
)
2628 substed_alloc_size
= substed_len
+ add_len
+ 500;
2629 substed
= (unsigned char *) xrealloc (substed
,
2630 substed_alloc_size
+ 1);
2633 /* Now add to the end of SUBSTED. */
2636 bcopy (add_stuff
, substed
+ substed_len
, add_len
);
2637 substed_len
+= add_len
;
2645 int nchars
= multibyte_chars_in_text (substed
, substed_len
);
2647 newtext
= make_multibyte_string (substed
, nchars
, substed_len
);
2650 newtext
= make_unibyte_string (substed
, substed_len
);
2655 /* Replace the old text with the new in the cleanest possible way. */
2656 replace_range (search_regs
.start
[sub
], search_regs
.end
[sub
],
2658 newpoint
= search_regs
.start
[sub
] + SCHARS (newtext
);
2660 if (case_action
== all_caps
)
2661 Fupcase_region (make_number (search_regs
.start
[sub
]),
2662 make_number (newpoint
));
2663 else if (case_action
== cap_initial
)
2664 Fupcase_initials_region (make_number (search_regs
.start
[sub
]),
2665 make_number (newpoint
));
2667 /* Adjust search data for this change. */
2669 int oldend
= search_regs
.end
[sub
];
2670 int oldstart
= search_regs
.start
[sub
];
2671 int change
= newpoint
- search_regs
.end
[sub
];
2674 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2676 if (search_regs
.start
[i
] >= oldend
)
2677 search_regs
.start
[i
] += change
;
2678 else if (search_regs
.start
[i
] > oldstart
)
2679 search_regs
.start
[i
] = oldstart
;
2680 if (search_regs
.end
[i
] >= oldend
)
2681 search_regs
.end
[i
] += change
;
2682 else if (search_regs
.end
[i
] > oldstart
)
2683 search_regs
.end
[i
] = oldstart
;
2687 /* Put point back where it was in the text. */
2689 TEMP_SET_PT (opoint
+ ZV
);
2691 TEMP_SET_PT (opoint
);
2693 /* Now move point "officially" to the start of the inserted replacement. */
2694 move_if_not_intangible (newpoint
);
2700 match_limit (num
, beginningp
)
2709 args_out_of_range (num
, make_number (0));
2710 if (search_regs
.num_regs
<= 0)
2711 error ("No match data, because no search succeeded");
2712 if (n
>= search_regs
.num_regs
2713 || search_regs
.start
[n
] < 0)
2715 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2716 : search_regs
.end
[n
]));
2719 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2720 doc
: /* Return position of start of text matched by last search.
2721 SUBEXP, a number, specifies which parenthesized expression in the last
2723 Value is nil if SUBEXPth pair didn't match, or there were less than
2725 Zero means the entire text matched by the whole regexp or whole string. */)
2729 return match_limit (subexp
, 1);
2732 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2733 doc
: /* Return position of end of text matched by last search.
2734 SUBEXP, a number, specifies which parenthesized expression in the last
2736 Value is nil if SUBEXPth pair didn't match, or there were less than
2738 Zero means the entire text matched by the whole regexp or whole string. */)
2742 return match_limit (subexp
, 0);
2745 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 3, 0,
2746 doc
: /* Return a list containing all info on what the last search matched.
2747 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2748 All the elements are markers or nil (nil if the Nth pair didn't match)
2749 if the last match was on a buffer; integers or nil if a string was matched.
2750 Use `store-match-data' to reinstate the data in this list.
2752 If INTEGERS (the optional first argument) is non-nil, always use
2753 integers \(rather than markers) to represent buffer positions. In
2754 this case, and if the last match was in a buffer, the buffer will get
2755 stored as one additional element at the end of the list.
2757 If REUSE is a list, reuse it as part of the value. If REUSE is long
2758 enough to hold all the values, and if INTEGERS is non-nil, no consing
2761 If optional third arg RESEAT is non-nil, any previous markers on the
2762 REUSE list will be modified to point to nowhere.
2764 Return value is undefined if the last search failed. */)
2765 (integers
, reuse
, reseat
)
2766 Lisp_Object integers
, reuse
, reseat
;
2768 Lisp_Object tail
, prev
;
2773 for (tail
= reuse
; CONSP (tail
); tail
= XCDR (tail
))
2774 if (MARKERP (XCAR (tail
)))
2776 unchain_marker (XMARKER (XCAR (tail
)));
2777 XSETCAR (tail
, Qnil
);
2780 if (NILP (last_thing_searched
))
2785 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
+ 1)
2786 * sizeof (Lisp_Object
));
2789 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2791 int start
= search_regs
.start
[i
];
2794 if (EQ (last_thing_searched
, Qt
)
2795 || ! NILP (integers
))
2797 XSETFASTINT (data
[2 * i
], start
);
2798 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2800 else if (BUFFERP (last_thing_searched
))
2802 data
[2 * i
] = Fmake_marker ();
2803 Fset_marker (data
[2 * i
],
2804 make_number (start
),
2805 last_thing_searched
);
2806 data
[2 * i
+ 1] = Fmake_marker ();
2807 Fset_marker (data
[2 * i
+ 1],
2808 make_number (search_regs
.end
[i
]),
2809 last_thing_searched
);
2812 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2818 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2821 if (BUFFERP (last_thing_searched
) && !NILP (integers
))
2823 data
[len
] = last_thing_searched
;
2827 /* If REUSE is not usable, cons up the values and return them. */
2828 if (! CONSP (reuse
))
2829 return Flist (len
, data
);
2831 /* If REUSE is a list, store as many value elements as will fit
2832 into the elements of REUSE. */
2833 for (i
= 0, tail
= reuse
; CONSP (tail
);
2834 i
++, tail
= XCDR (tail
))
2837 XSETCAR (tail
, data
[i
]);
2839 XSETCAR (tail
, Qnil
);
2843 /* If we couldn't fit all value elements into REUSE,
2844 cons up the rest of them and add them to the end of REUSE. */
2846 XSETCDR (prev
, Flist (len
- i
, data
+ i
));
2851 /* Internal usage only:
2852 If RESEAT is `evaporate', put the markers back on the free list
2853 immediately. No other references to the markers must exist in this case,
2854 so it is used only internally on the unwind stack and save-match-data from
2857 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 2, 0,
2858 doc
: /* Set internal data on last search match from elements of LIST.
2859 LIST should have been created by calling `match-data' previously.
2861 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2863 register Lisp_Object list
, reseat
;
2866 register Lisp_Object marker
;
2868 if (running_asynch_code
)
2869 save_search_regs ();
2873 /* Unless we find a marker with a buffer or an explicit buffer
2874 in LIST, assume that this match data came from a string. */
2875 last_thing_searched
= Qt
;
2877 /* Allocate registers if they don't already exist. */
2879 int length
= XFASTINT (Flength (list
)) / 2;
2881 if (length
> search_regs
.num_regs
)
2883 if (search_regs
.num_regs
== 0)
2886 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2888 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2893 = (regoff_t
*) xrealloc (search_regs
.start
,
2894 length
* sizeof (regoff_t
));
2896 = (regoff_t
*) xrealloc (search_regs
.end
,
2897 length
* sizeof (regoff_t
));
2900 for (i
= search_regs
.num_regs
; i
< length
; i
++)
2901 search_regs
.start
[i
] = -1;
2903 search_regs
.num_regs
= length
;
2906 for (i
= 0; CONSP (list
); i
++)
2908 marker
= XCAR (list
);
2909 if (BUFFERP (marker
))
2911 last_thing_searched
= marker
;
2918 search_regs
.start
[i
] = -1;
2927 if (MARKERP (marker
))
2929 if (XMARKER (marker
)->buffer
== 0)
2930 XSETFASTINT (marker
, 0);
2932 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2935 CHECK_NUMBER_COERCE_MARKER (marker
);
2936 from
= XINT (marker
);
2938 if (!NILP (reseat
) && MARKERP (m
))
2940 if (EQ (reseat
, Qevaporate
))
2943 unchain_marker (XMARKER (m
));
2944 XSETCAR (list
, Qnil
);
2947 if ((list
= XCDR (list
), !CONSP (list
)))
2950 m
= marker
= XCAR (list
);
2952 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2953 XSETFASTINT (marker
, 0);
2955 CHECK_NUMBER_COERCE_MARKER (marker
);
2956 search_regs
.start
[i
] = from
;
2957 search_regs
.end
[i
] = XINT (marker
);
2959 if (!NILP (reseat
) && MARKERP (m
))
2961 if (EQ (reseat
, Qevaporate
))
2964 unchain_marker (XMARKER (m
));
2965 XSETCAR (list
, Qnil
);
2971 for (; i
< search_regs
.num_regs
; i
++)
2972 search_regs
.start
[i
] = -1;
2978 /* If non-zero the match data have been saved in saved_search_regs
2979 during the execution of a sentinel or filter. */
2980 static int search_regs_saved
;
2981 static struct re_registers saved_search_regs
;
2982 static Lisp_Object saved_last_thing_searched
;
2984 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2985 if asynchronous code (filter or sentinel) is running. */
2989 if (!search_regs_saved
)
2991 saved_search_regs
.num_regs
= search_regs
.num_regs
;
2992 saved_search_regs
.start
= search_regs
.start
;
2993 saved_search_regs
.end
= search_regs
.end
;
2994 saved_last_thing_searched
= last_thing_searched
;
2995 last_thing_searched
= Qnil
;
2996 search_regs
.num_regs
= 0;
2997 search_regs
.start
= 0;
2998 search_regs
.end
= 0;
3000 search_regs_saved
= 1;
3004 /* Called upon exit from filters and sentinels. */
3006 restore_search_regs ()
3008 if (search_regs_saved
)
3010 if (search_regs
.num_regs
> 0)
3012 xfree (search_regs
.start
);
3013 xfree (search_regs
.end
);
3015 search_regs
.num_regs
= saved_search_regs
.num_regs
;
3016 search_regs
.start
= saved_search_regs
.start
;
3017 search_regs
.end
= saved_search_regs
.end
;
3018 last_thing_searched
= saved_last_thing_searched
;
3019 saved_last_thing_searched
= Qnil
;
3020 search_regs_saved
= 0;
3025 unwind_set_match_data (list
)
3028 /* It is safe to free (evaporate) the markers immediately. */
3029 return Fset_match_data (list
, Qevaporate
);
3032 /* Called to unwind protect the match data. */
3034 record_unwind_save_match_data ()
3036 record_unwind_protect (unwind_set_match_data
,
3037 Fmatch_data (Qnil
, Qnil
, Qnil
));
3040 /* Quote a string to inactivate reg-expr chars */
3042 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
3043 doc
: /* Return a regexp string which matches exactly STRING and nothing else. */)
3047 register unsigned char *in
, *out
, *end
;
3048 register unsigned char *temp
;
3049 int backslashes_added
= 0;
3051 CHECK_STRING (string
);
3053 temp
= (unsigned char *) alloca (SBYTES (string
) * 2);
3055 /* Now copy the data into the new string, inserting escapes. */
3057 in
= SDATA (string
);
3058 end
= in
+ SBYTES (string
);
3061 for (; in
!= end
; in
++)
3064 || *in
== '*' || *in
== '.' || *in
== '\\'
3065 || *in
== '?' || *in
== '+'
3066 || *in
== '^' || *in
== '$')
3067 *out
++ = '\\', backslashes_added
++;
3071 return make_specified_string (temp
,
3072 SCHARS (string
) + backslashes_added
,
3074 STRING_MULTIBYTE (string
));
3082 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
3084 searchbufs
[i
].buf
.allocated
= 100;
3085 searchbufs
[i
].buf
.buffer
= (unsigned char *) xmalloc (100);
3086 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
3087 searchbufs
[i
].regexp
= Qnil
;
3088 searchbufs
[i
].whitespace_regexp
= Qnil
;
3089 staticpro (&searchbufs
[i
].regexp
);
3090 staticpro (&searchbufs
[i
].whitespace_regexp
);
3091 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
3093 searchbuf_head
= &searchbufs
[0];
3095 Qsearch_failed
= intern ("search-failed");
3096 staticpro (&Qsearch_failed
);
3097 Qinvalid_regexp
= intern ("invalid-regexp");
3098 staticpro (&Qinvalid_regexp
);
3100 Fput (Qsearch_failed
, Qerror_conditions
,
3101 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
3102 Fput (Qsearch_failed
, Qerror_message
,
3103 build_string ("Search failed"));
3105 Fput (Qinvalid_regexp
, Qerror_conditions
,
3106 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
3107 Fput (Qinvalid_regexp
, Qerror_message
,
3108 build_string ("Invalid regexp"));
3110 last_thing_searched
= Qnil
;
3111 staticpro (&last_thing_searched
);
3113 saved_last_thing_searched
= Qnil
;
3114 staticpro (&saved_last_thing_searched
);
3116 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp
,
3117 doc
: /* Regexp to substitute for bunches of spaces in regexp search.
3118 Some commands use this for user-specified regexps.
3119 Spaces that occur inside character classes or repetition operators
3120 or other such regexp constructs are not replaced with this.
3121 A value of nil (which is the normal value) means treat spaces literally. */);
3122 Vsearch_spaces_regexp
= Qnil
;
3124 defsubr (&Slooking_at
);
3125 defsubr (&Sposix_looking_at
);
3126 defsubr (&Sstring_match
);
3127 defsubr (&Sposix_string_match
);
3128 defsubr (&Ssearch_forward
);
3129 defsubr (&Ssearch_backward
);
3130 defsubr (&Sword_search_forward
);
3131 defsubr (&Sword_search_backward
);
3132 defsubr (&Sre_search_forward
);
3133 defsubr (&Sre_search_backward
);
3134 defsubr (&Sposix_search_forward
);
3135 defsubr (&Sposix_search_backward
);
3136 defsubr (&Sreplace_match
);
3137 defsubr (&Smatch_beginning
);
3138 defsubr (&Smatch_end
);
3139 defsubr (&Smatch_data
);
3140 defsubr (&Sset_match_data
);
3141 defsubr (&Sregexp_quote
);
3144 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3145 (do not change this comment) */