1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
3 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
29 #include "region-cache.h"
31 #include "blockinput.h"
32 #include "intervals.h"
34 #include <sys/types.h>
37 #define REGEXP_CACHE_SIZE 20
39 /* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
43 struct regexp_cache
*next
;
44 Lisp_Object regexp
, whitespace_regexp
;
45 struct re_pattern_buffer buf
;
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
51 /* The instances of that struct. */
52 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
54 /* The head of the linked list; points to the most recently used buffer. */
55 struct regexp_cache
*searchbuf_head
;
58 /* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
65 been allocated by checking search_regs.num_regs.
67 The regex code keeps track of whether it has allocated the search
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
75 static struct re_registers search_regs
;
77 /* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80 static Lisp_Object last_thing_searched
;
82 /* error condition signaled when regexp compile_pattern fails */
84 Lisp_Object Qinvalid_regexp
;
86 Lisp_Object Vsearch_spaces_regexp
;
88 static void set_search_regs ();
89 static void save_search_regs ();
90 static int simple_search ();
91 static int boyer_moore ();
92 static int search_buffer ();
97 error ("Stack overflow in regexp matcher");
100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
103 TRANSLATE is a translation table for ignoring case, or nil for none.
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
112 sequences of binary data.
114 The behavior also depends on Vsearch_spaces_regexp. */
117 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
118 struct regexp_cache
*cp
;
120 Lisp_Object translate
;
121 struct re_registers
*regp
;
125 unsigned char *raw_pattern
;
126 int raw_pattern_size
;
130 /* MULTIBYTE says whether the text to be searched is multibyte.
131 We must convert PATTERN to match that, or we will not really
132 find things right. */
134 if (multibyte
== STRING_MULTIBYTE (pattern
))
136 raw_pattern
= (unsigned char *) SDATA (pattern
);
137 raw_pattern_size
= SBYTES (pattern
);
141 raw_pattern_size
= count_size_as_multibyte (SDATA (pattern
),
143 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
144 copy_text (SDATA (pattern
), raw_pattern
,
145 SCHARS (pattern
), 0, 1);
149 /* Converting multibyte to single-byte.
151 ??? Perhaps this conversion should be done in a special way
152 by subtracting nonascii-insert-offset from each non-ASCII char,
153 so that only the multibyte chars which really correspond to
154 the chosen single-byte character set can possibly match. */
155 raw_pattern_size
= SCHARS (pattern
);
156 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
157 copy_text (SDATA (pattern
), raw_pattern
,
158 SBYTES (pattern
), 1, 0);
162 cp
->buf
.translate
= (! NILP (translate
) ? translate
: make_number (0));
164 cp
->buf
.multibyte
= multibyte
;
165 cp
->whitespace_regexp
= Vsearch_spaces_regexp
;
167 old
= re_set_syntax (RE_SYNTAX_EMACS
168 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
170 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp
) ? NULL
171 : SDATA (Vsearch_spaces_regexp
));
173 val
= (char *) re_compile_pattern ((char *)raw_pattern
,
174 raw_pattern_size
, &cp
->buf
);
176 re_set_whitespace_regexp (NULL
);
181 Fsignal (Qinvalid_regexp
, Fcons (build_string (val
), Qnil
));
183 cp
->regexp
= Fcopy_sequence (pattern
);
186 /* Shrink each compiled regexp buffer in the cache
187 to the size actually used right now.
188 This is called from garbage collection. */
191 shrink_regexp_cache ()
193 struct regexp_cache
*cp
;
195 for (cp
= searchbuf_head
; cp
!= 0; cp
= cp
->next
)
197 cp
->buf
.allocated
= cp
->buf
.used
;
199 = (unsigned char *) xrealloc (cp
->buf
.buffer
, cp
->buf
.used
);
203 /* Compile a regexp if necessary, but first check to see if there's one in
205 PATTERN is the pattern to compile.
206 TRANSLATE is a translation table for ignoring case, or nil for none.
207 REGP is the structure that says where to store the "register"
208 values that will result from matching this pattern.
209 If it is 0, we should compile the pattern not to record any
210 subexpression bounds.
211 POSIX is nonzero if we want full backtracking (POSIX style)
212 for this pattern. 0 means backtrack only enough to get a valid match. */
214 struct re_pattern_buffer
*
215 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
217 struct re_registers
*regp
;
218 Lisp_Object translate
;
219 int posix
, multibyte
;
221 struct regexp_cache
*cp
, **cpp
;
223 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
226 /* Entries are initialized to nil, and may be set to nil by
227 compile_pattern_1 if the pattern isn't valid. Don't apply
228 string accessors in those cases. However, compile_pattern_1
229 is only applied to the cache entry we pick here to reuse. So
230 nil should never appear before a non-nil entry. */
231 if (NILP (cp
->regexp
))
233 if (SCHARS (cp
->regexp
) == SCHARS (pattern
)
234 && STRING_MULTIBYTE (cp
->regexp
) == STRING_MULTIBYTE (pattern
)
235 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
236 && EQ (cp
->buf
.translate
, (! NILP (translate
) ? translate
: make_number (0)))
237 && cp
->posix
== posix
238 && cp
->buf
.multibyte
== multibyte
239 && !NILP (Fequal (cp
->whitespace_regexp
, Vsearch_spaces_regexp
)))
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
248 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
257 cp
->next
= searchbuf_head
;
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
263 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
268 /* Error condition used for failing searches */
269 Lisp_Object Qsearch_failed
;
275 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
280 looking_at_1 (string
, posix
)
285 unsigned char *p1
, *p2
;
288 struct re_pattern_buffer
*bufp
;
290 if (running_asynch_code
)
293 CHECK_STRING (string
);
294 bufp
= compile_pattern (string
, &search_regs
,
295 (!NILP (current_buffer
->case_fold_search
)
296 ? current_buffer
->case_canon_table
: Qnil
),
298 !NILP (current_buffer
->enable_multibyte_characters
));
301 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
303 /* Get pointers and sizes of the two strings
304 that make up the visible portion of the buffer. */
307 s1
= GPT_BYTE
- BEGV_BYTE
;
309 s2
= ZV_BYTE
- GPT_BYTE
;
313 s2
= ZV_BYTE
- BEGV_BYTE
;
318 s1
= ZV_BYTE
- BEGV_BYTE
;
322 re_match_object
= Qnil
;
324 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
325 PT_BYTE
- BEGV_BYTE
, &search_regs
,
326 ZV_BYTE
- BEGV_BYTE
);
332 val
= (0 <= i
? Qt
: Qnil
);
334 for (i
= 0; i
< search_regs
.num_regs
; i
++)
335 if (search_regs
.start
[i
] >= 0)
338 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
340 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
342 XSETBUFFER (last_thing_searched
, current_buffer
);
346 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
347 doc
: /* Return t if text after point matches regular expression REGEXP.
348 This function modifies the match data that `match-beginning',
349 `match-end' and `match-data' access; save and restore the match
350 data if you want to preserve them. */)
354 return looking_at_1 (regexp
, 0);
357 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
358 doc
: /* Return t if text after point matches regular expression REGEXP.
359 Find the longest match, in accord with Posix regular expression rules.
360 This function modifies the match data that `match-beginning',
361 `match-end' and `match-data' access; save and restore the match
362 data if you want to preserve them. */)
366 return looking_at_1 (regexp
, 1);
370 string_match_1 (regexp
, string
, start
, posix
)
371 Lisp_Object regexp
, string
, start
;
375 struct re_pattern_buffer
*bufp
;
379 if (running_asynch_code
)
382 CHECK_STRING (regexp
);
383 CHECK_STRING (string
);
386 pos
= 0, pos_byte
= 0;
389 int len
= SCHARS (string
);
391 CHECK_NUMBER (start
);
393 if (pos
< 0 && -pos
<= len
)
395 else if (0 > pos
|| pos
> len
)
396 args_out_of_range (string
, start
);
397 pos_byte
= string_char_to_byte (string
, pos
);
400 bufp
= compile_pattern (regexp
, &search_regs
,
401 (!NILP (current_buffer
->case_fold_search
)
402 ? current_buffer
->case_canon_table
: Qnil
),
404 STRING_MULTIBYTE (string
));
406 re_match_object
= string
;
408 val
= re_search (bufp
, (char *) SDATA (string
),
409 SBYTES (string
), pos_byte
,
410 SBYTES (string
) - pos_byte
,
413 last_thing_searched
= Qt
;
416 if (val
< 0) return Qnil
;
418 for (i
= 0; i
< search_regs
.num_regs
; i
++)
419 if (search_regs
.start
[i
] >= 0)
422 = string_byte_to_char (string
, search_regs
.start
[i
]);
424 = string_byte_to_char (string
, search_regs
.end
[i
]);
427 return make_number (string_byte_to_char (string
, val
));
430 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
431 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
432 Matching ignores case if `case-fold-search' is non-nil.
433 If third arg START is non-nil, start search at that index in STRING.
434 For index of first char beyond the match, do (match-end 0).
435 `match-end' and `match-beginning' also give indices of substrings
436 matched by parenthesis constructs in the pattern.
438 You can use the function `match-string' to extract the substrings
439 matched by the parenthesis constructions in REGEXP. */)
440 (regexp
, string
, start
)
441 Lisp_Object regexp
, string
, start
;
443 return string_match_1 (regexp
, string
, start
, 0);
446 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
447 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
448 Find the longest match, in accord with Posix regular expression rules.
449 Case is ignored if `case-fold-search' is non-nil in the current buffer.
450 If third arg START is non-nil, start search at that index in STRING.
451 For index of first char beyond the match, do (match-end 0).
452 `match-end' and `match-beginning' also give indices of substrings
453 matched by parenthesis constructs in the pattern. */)
454 (regexp
, string
, start
)
455 Lisp_Object regexp
, string
, start
;
457 return string_match_1 (regexp
, string
, start
, 1);
460 /* Match REGEXP against STRING, searching all of STRING,
461 and return the index of the match, or negative on failure.
462 This does not clobber the match data. */
465 fast_string_match (regexp
, string
)
466 Lisp_Object regexp
, string
;
469 struct re_pattern_buffer
*bufp
;
471 bufp
= compile_pattern (regexp
, 0, Qnil
,
472 0, STRING_MULTIBYTE (string
));
474 re_match_object
= string
;
476 val
= re_search (bufp
, (char *) SDATA (string
),
483 /* Match REGEXP against STRING, searching all of STRING ignoring case,
484 and return the index of the match, or negative on failure.
485 This does not clobber the match data.
486 We assume that STRING contains single-byte characters. */
488 extern Lisp_Object Vascii_downcase_table
;
491 fast_c_string_match_ignore_case (regexp
, string
)
496 struct re_pattern_buffer
*bufp
;
497 int len
= strlen (string
);
499 regexp
= string_make_unibyte (regexp
);
500 re_match_object
= Qt
;
501 bufp
= compile_pattern (regexp
, 0,
502 Vascii_canon_table
, 0,
505 val
= re_search (bufp
, string
, len
, 0, len
, 0);
510 /* Like fast_string_match but ignore case. */
513 fast_string_match_ignore_case (regexp
, string
)
514 Lisp_Object regexp
, string
;
517 struct re_pattern_buffer
*bufp
;
519 bufp
= compile_pattern (regexp
, 0, Vascii_canon_table
,
520 0, STRING_MULTIBYTE (string
));
522 re_match_object
= string
;
524 val
= re_search (bufp
, (char *) SDATA (string
),
531 /* The newline cache: remembering which sections of text have no newlines. */
533 /* If the user has requested newline caching, make sure it's on.
534 Otherwise, make sure it's off.
535 This is our cheezy way of associating an action with the change of
536 state of a buffer-local variable. */
538 newline_cache_on_off (buf
)
541 if (NILP (buf
->cache_long_line_scans
))
543 /* It should be off. */
544 if (buf
->newline_cache
)
546 free_region_cache (buf
->newline_cache
);
547 buf
->newline_cache
= 0;
552 /* It should be on. */
553 if (buf
->newline_cache
== 0)
554 buf
->newline_cache
= new_region_cache ();
559 /* Search for COUNT instances of the character TARGET between START and END.
561 If COUNT is positive, search forwards; END must be >= START.
562 If COUNT is negative, search backwards for the -COUNTth instance;
563 END must be <= START.
564 If COUNT is zero, do anything you please; run rogue, for all I care.
566 If END is zero, use BEGV or ZV instead, as appropriate for the
567 direction indicated by COUNT.
569 If we find COUNT instances, set *SHORTAGE to zero, and return the
570 position past the COUNTth match. Note that for reverse motion
571 this is not the same as the usual convention for Emacs motion commands.
573 If we don't find COUNT instances before reaching END, set *SHORTAGE
574 to the number of TARGETs left unfound, and return END.
576 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
577 except when inside redisplay. */
580 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
587 struct region_cache
*newline_cache
;
598 if (! end
) end
= BEGV
;
601 newline_cache_on_off (current_buffer
);
602 newline_cache
= current_buffer
->newline_cache
;
607 immediate_quit
= allow_quit
;
612 /* Our innermost scanning loop is very simple; it doesn't know
613 about gaps, buffer ends, or the newline cache. ceiling is
614 the position of the last character before the next such
615 obstacle --- the last character the dumb search loop should
617 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
618 int start_byte
= CHAR_TO_BYTE (start
);
621 /* If we're looking for a newline, consult the newline cache
622 to see where we can avoid some scanning. */
623 if (target
== '\n' && newline_cache
)
627 while (region_cache_forward
628 (current_buffer
, newline_cache
, start_byte
, &next_change
))
629 start_byte
= next_change
;
630 immediate_quit
= allow_quit
;
632 /* START should never be after END. */
633 if (start_byte
> ceiling_byte
)
634 start_byte
= ceiling_byte
;
636 /* Now the text after start is an unknown region, and
637 next_change is the position of the next known region. */
638 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
641 /* The dumb loop can only scan text stored in contiguous
642 bytes. BUFFER_CEILING_OF returns the last character
643 position that is contiguous, so the ceiling is the
644 position after that. */
645 tem
= BUFFER_CEILING_OF (start_byte
);
646 ceiling_byte
= min (tem
, ceiling_byte
);
649 /* The termination address of the dumb loop. */
650 register unsigned char *ceiling_addr
651 = BYTE_POS_ADDR (ceiling_byte
) + 1;
652 register unsigned char *cursor
653 = BYTE_POS_ADDR (start_byte
);
654 unsigned char *base
= cursor
;
656 while (cursor
< ceiling_addr
)
658 unsigned char *scan_start
= cursor
;
661 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
664 /* If we're looking for newlines, cache the fact that
665 the region from start to cursor is free of them. */
666 if (target
== '\n' && newline_cache
)
667 know_region_cache (current_buffer
, newline_cache
,
668 start_byte
+ scan_start
- base
,
669 start_byte
+ cursor
- base
);
671 /* Did we find the target character? */
672 if (cursor
< ceiling_addr
)
677 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
683 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
689 /* The last character to check before the next obstacle. */
690 int ceiling_byte
= CHAR_TO_BYTE (end
);
691 int start_byte
= CHAR_TO_BYTE (start
);
694 /* Consult the newline cache, if appropriate. */
695 if (target
== '\n' && newline_cache
)
699 while (region_cache_backward
700 (current_buffer
, newline_cache
, start_byte
, &next_change
))
701 start_byte
= next_change
;
702 immediate_quit
= allow_quit
;
704 /* Start should never be at or before end. */
705 if (start_byte
<= ceiling_byte
)
706 start_byte
= ceiling_byte
+ 1;
708 /* Now the text before start is an unknown region, and
709 next_change is the position of the next known region. */
710 ceiling_byte
= max (next_change
, ceiling_byte
);
713 /* Stop scanning before the gap. */
714 tem
= BUFFER_FLOOR_OF (start_byte
- 1);
715 ceiling_byte
= max (tem
, ceiling_byte
);
718 /* The termination address of the dumb loop. */
719 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
720 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
721 unsigned char *base
= cursor
;
723 while (cursor
>= ceiling_addr
)
725 unsigned char *scan_start
= cursor
;
727 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
730 /* If we're looking for newlines, cache the fact that
731 the region from after the cursor to start is free of them. */
732 if (target
== '\n' && newline_cache
)
733 know_region_cache (current_buffer
, newline_cache
,
734 start_byte
+ cursor
- base
,
735 start_byte
+ scan_start
- base
);
737 /* Did we find the target character? */
738 if (cursor
>= ceiling_addr
)
743 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
749 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
755 *shortage
= count
* direction
;
759 /* Search for COUNT instances of a line boundary, which means either a
760 newline or (if selective display enabled) a carriage return.
761 Start at START. If COUNT is negative, search backwards.
763 We report the resulting position by calling TEMP_SET_PT_BOTH.
765 If we find COUNT instances. we position after (always after,
766 even if scanning backwards) the COUNTth match, and return 0.
768 If we don't find COUNT instances before reaching the end of the
769 buffer (or the beginning, if scanning backwards), we return
770 the number of line boundaries left unfound, and position at
771 the limit we bumped up against.
773 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
774 except in special cases. */
777 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
778 int start
, start_byte
;
779 int limit
, limit_byte
;
783 int direction
= ((count
> 0) ? 1 : -1);
785 register unsigned char *cursor
;
788 register int ceiling
;
789 register unsigned char *ceiling_addr
;
791 int old_immediate_quit
= immediate_quit
;
793 /* The code that follows is like scan_buffer
794 but checks for either newline or carriage return. */
799 start_byte
= CHAR_TO_BYTE (start
);
803 while (start_byte
< limit_byte
)
805 ceiling
= BUFFER_CEILING_OF (start_byte
);
806 ceiling
= min (limit_byte
- 1, ceiling
);
807 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
808 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
811 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
814 if (cursor
!= ceiling_addr
)
818 immediate_quit
= old_immediate_quit
;
819 start_byte
= start_byte
+ cursor
- base
+ 1;
820 start
= BYTE_TO_CHAR (start_byte
);
821 TEMP_SET_PT_BOTH (start
, start_byte
);
825 if (++cursor
== ceiling_addr
)
831 start_byte
+= cursor
- base
;
836 while (start_byte
> limit_byte
)
838 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
839 ceiling
= max (limit_byte
, ceiling
);
840 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
841 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
844 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
847 if (cursor
!= ceiling_addr
)
851 immediate_quit
= old_immediate_quit
;
852 /* Return the position AFTER the match we found. */
853 start_byte
= start_byte
+ cursor
- base
+ 1;
854 start
= BYTE_TO_CHAR (start_byte
);
855 TEMP_SET_PT_BOTH (start
, start_byte
);
862 /* Here we add 1 to compensate for the last decrement
863 of CURSOR, which took it past the valid range. */
864 start_byte
+= cursor
- base
+ 1;
868 TEMP_SET_PT_BOTH (limit
, limit_byte
);
869 immediate_quit
= old_immediate_quit
;
871 return count
* direction
;
875 find_next_newline_no_quit (from
, cnt
)
876 register int from
, cnt
;
878 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
881 /* Like find_next_newline, but returns position before the newline,
882 not after, and only search up to TO. This isn't just
883 find_next_newline (...)-1, because you might hit TO. */
886 find_before_next_newline (from
, to
, cnt
)
890 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
898 /* Subroutines of Lisp buffer search functions. */
901 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
902 Lisp_Object string
, bound
, noerror
, count
;
913 CHECK_NUMBER (count
);
917 CHECK_STRING (string
);
921 lim
= ZV
, lim_byte
= ZV_BYTE
;
923 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
927 CHECK_NUMBER_COERCE_MARKER (bound
);
929 if (n
> 0 ? lim
< PT
: lim
> PT
)
930 error ("Invalid search bound (wrong side of point)");
932 lim
= ZV
, lim_byte
= ZV_BYTE
;
934 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
936 lim_byte
= CHAR_TO_BYTE (lim
);
939 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
940 (!NILP (current_buffer
->case_fold_search
)
941 ? current_buffer
->case_canon_table
943 (!NILP (current_buffer
->case_fold_search
)
944 ? current_buffer
->case_eqv_table
950 return signal_failure (string
);
951 if (!EQ (noerror
, Qt
))
953 if (lim
< BEGV
|| lim
> ZV
)
955 SET_PT_BOTH (lim
, lim_byte
);
957 #if 0 /* This would be clean, but maybe programs depend on
958 a value of nil here. */
966 if (np
< BEGV
|| np
> ZV
)
971 return make_number (np
);
974 /* Return 1 if REGEXP it matches just one constant string. */
977 trivial_regexp_p (regexp
)
980 int len
= SBYTES (regexp
);
981 unsigned char *s
= SDATA (regexp
);
986 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
993 case '|': case '(': case ')': case '`': case '\'': case 'b':
994 case 'B': case '<': case '>': case 'w': case 'W': case 's':
995 case 'S': case '=': case '{': case '}': case '_':
996 case 'c': case 'C': /* for categoryspec and notcategoryspec */
997 case '1': case '2': case '3': case '4': case '5':
998 case '6': case '7': case '8': case '9':
1006 /* Search for the n'th occurrence of STRING in the current buffer,
1007 starting at position POS and stopping at position LIM,
1008 treating STRING as a literal string if RE is false or as
1009 a regular expression if RE is true.
1011 If N is positive, searching is forward and LIM must be greater than POS.
1012 If N is negative, searching is backward and LIM must be less than POS.
1014 Returns -x if x occurrences remain to be found (x > 0),
1015 or else the position at the beginning of the Nth occurrence
1016 (if searching backward) or the end (if searching forward).
1018 POSIX is nonzero if we want full backtracking (POSIX style)
1019 for this pattern. 0 means backtrack only enough to get a valid match. */
1021 #define TRANSLATE(out, trt, d) \
1027 temp = Faref (trt, make_number (d)); \
1028 if (INTEGERP (temp)) \
1029 out = XINT (temp); \
1039 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
1040 RE
, trt
, inverse_trt
, posix
)
1049 Lisp_Object inverse_trt
;
1052 int len
= SCHARS (string
);
1053 int len_byte
= SBYTES (string
);
1056 if (running_asynch_code
)
1057 save_search_regs ();
1059 /* Searching 0 times means don't move. */
1060 /* Null string is found at starting position. */
1061 if (len
== 0 || n
== 0)
1063 set_search_regs (pos_byte
, 0);
1067 if (RE
&& !(trivial_regexp_p (string
) && NILP (Vsearch_spaces_regexp
)))
1069 unsigned char *p1
, *p2
;
1071 struct re_pattern_buffer
*bufp
;
1073 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
,
1074 !NILP (current_buffer
->enable_multibyte_characters
));
1076 immediate_quit
= 1; /* Quit immediately if user types ^G,
1077 because letting this function finish
1078 can take too long. */
1079 QUIT
; /* Do a pending quit right away,
1080 to avoid paradoxical behavior */
1081 /* Get pointers and sizes of the two strings
1082 that make up the visible portion of the buffer. */
1085 s1
= GPT_BYTE
- BEGV_BYTE
;
1087 s2
= ZV_BYTE
- GPT_BYTE
;
1091 s2
= ZV_BYTE
- BEGV_BYTE
;
1096 s1
= ZV_BYTE
- BEGV_BYTE
;
1099 re_match_object
= Qnil
;
1104 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1105 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1107 /* Don't allow match past current point */
1108 pos_byte
- BEGV_BYTE
);
1111 matcher_overflow ();
1115 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1116 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1117 if (search_regs
.start
[i
] >= 0)
1119 search_regs
.start
[i
]
1120 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1122 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1124 XSETBUFFER (last_thing_searched
, current_buffer
);
1125 /* Set pos to the new position. */
1126 pos
= search_regs
.start
[0];
1138 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1139 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1141 lim_byte
- BEGV_BYTE
);
1144 matcher_overflow ();
1148 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1149 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1150 if (search_regs
.start
[i
] >= 0)
1152 search_regs
.start
[i
]
1153 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1155 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1157 XSETBUFFER (last_thing_searched
, current_buffer
);
1158 pos
= search_regs
.end
[0];
1170 else /* non-RE case */
1172 unsigned char *raw_pattern
, *pat
;
1173 int raw_pattern_size
;
1174 int raw_pattern_size_byte
;
1175 unsigned char *patbuf
;
1176 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1177 unsigned char *base_pat
;
1178 /* Set to positive if we find a non-ASCII char that need
1179 translation. Otherwise set to zero later. */
1180 int charset_base
= -1;
1181 int boyer_moore_ok
= 1;
1183 /* MULTIBYTE says whether the text to be searched is multibyte.
1184 We must convert PATTERN to match that, or we will not really
1185 find things right. */
1187 if (multibyte
== STRING_MULTIBYTE (string
))
1189 raw_pattern
= (unsigned char *) SDATA (string
);
1190 raw_pattern_size
= SCHARS (string
);
1191 raw_pattern_size_byte
= SBYTES (string
);
1195 raw_pattern_size
= SCHARS (string
);
1196 raw_pattern_size_byte
1197 = count_size_as_multibyte (SDATA (string
),
1199 raw_pattern
= (unsigned char *) alloca (raw_pattern_size_byte
+ 1);
1200 copy_text (SDATA (string
), raw_pattern
,
1201 SCHARS (string
), 0, 1);
1205 /* Converting multibyte to single-byte.
1207 ??? Perhaps this conversion should be done in a special way
1208 by subtracting nonascii-insert-offset from each non-ASCII char,
1209 so that only the multibyte chars which really correspond to
1210 the chosen single-byte character set can possibly match. */
1211 raw_pattern_size
= SCHARS (string
);
1212 raw_pattern_size_byte
= SCHARS (string
);
1213 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
1214 copy_text (SDATA (string
), raw_pattern
,
1215 SBYTES (string
), 1, 0);
1218 /* Copy and optionally translate the pattern. */
1219 len
= raw_pattern_size
;
1220 len_byte
= raw_pattern_size_byte
;
1221 patbuf
= (unsigned char *) alloca (len_byte
);
1223 base_pat
= raw_pattern
;
1226 /* Fill patbuf by translated characters in STRING while
1227 checking if we can use boyer-moore search. If TRT is
1228 non-nil, we can use boyer-moore search only if TRT can be
1229 represented by the byte array of 256 elements. For that,
1230 all non-ASCII case-equivalents of all case-senstive
1231 characters in STRING must belong to the same charset and
1236 unsigned char str_base
[MAX_MULTIBYTE_LENGTH
], *str
;
1237 int c
, translated
, inverse
;
1238 int in_charlen
, charlen
;
1240 /* If we got here and the RE flag is set, it's because we're
1241 dealing with a regexp known to be trivial, so the backslash
1242 just quotes the next character. */
1243 if (RE
&& *base_pat
== '\\')
1251 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1256 charlen
= in_charlen
;
1260 /* Translate the character. */
1261 TRANSLATE (translated
, trt
, c
);
1262 charlen
= CHAR_STRING (translated
, str_base
);
1265 /* Check if C has any other case-equivalents. */
1266 TRANSLATE (inverse
, inverse_trt
, c
);
1267 /* If so, check if we can use boyer-moore. */
1268 if (c
!= inverse
&& boyer_moore_ok
)
1270 /* Check if all equivalents belong to the same
1271 charset & row. Note that the check of C
1272 itself is done by the last iteration. Note
1273 also that we don't have to check ASCII
1274 characters because boyer-moore search can
1275 always handle their translation. */
1278 if (ASCII_BYTE_P (inverse
))
1280 if (charset_base
> 0)
1287 else if (SINGLE_BYTE_CHAR_P (inverse
))
1289 /* Boyer-moore search can't handle a
1290 translation of an eight-bit
1295 else if (charset_base
< 0)
1296 charset_base
= inverse
& ~CHAR_FIELD3_MASK
;
1297 else if ((inverse
& ~CHAR_FIELD3_MASK
)
1305 TRANSLATE (inverse
, inverse_trt
, inverse
);
1309 if (charset_base
< 0)
1312 /* Store this character into the translated pattern. */
1313 bcopy (str
, pat
, charlen
);
1315 base_pat
+= in_charlen
;
1316 len_byte
-= in_charlen
;
1321 /* Unibyte buffer. */
1327 /* If we got here and the RE flag is set, it's because we're
1328 dealing with a regexp known to be trivial, so the backslash
1329 just quotes the next character. */
1330 if (RE
&& *base_pat
== '\\')
1337 TRANSLATE (translated
, trt
, c
);
1338 *pat
++ = translated
;
1342 len_byte
= pat
- patbuf
;
1343 len
= raw_pattern_size
;
1344 pat
= base_pat
= patbuf
;
1347 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1348 pos
, pos_byte
, lim
, lim_byte
,
1351 return simple_search (n
, pat
, len
, len_byte
, trt
,
1352 pos
, pos_byte
, lim
, lim_byte
);
1356 /* Do a simple string search N times for the string PAT,
1357 whose length is LEN/LEN_BYTE,
1358 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1359 TRT is the translation table.
1361 Return the character position where the match is found.
1362 Otherwise, if M matches remained to be found, return -M.
1364 This kind of search works regardless of what is in PAT and
1365 regardless of what is in TRT. It is used in cases where
1366 boyer_moore cannot work. */
1369 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1377 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1378 int forward
= n
> 0;
1380 if (lim
> pos
&& multibyte
)
1385 /* Try matching at position POS. */
1387 int this_pos_byte
= pos_byte
;
1389 int this_len_byte
= len_byte
;
1390 unsigned char *p
= pat
;
1391 if (pos
+ len
> lim
)
1394 while (this_len
> 0)
1396 int charlen
, buf_charlen
;
1399 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1400 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1401 ZV_BYTE
- this_pos_byte
,
1403 TRANSLATE (buf_ch
, trt
, buf_ch
);
1405 if (buf_ch
!= pat_ch
)
1408 this_len_byte
-= charlen
;
1412 this_pos_byte
+= buf_charlen
;
1419 pos_byte
+= len_byte
;
1423 INC_BOTH (pos
, pos_byte
);
1433 /* Try matching at position POS. */
1436 unsigned char *p
= pat
;
1438 if (pos
+ len
> lim
)
1441 while (this_len
> 0)
1444 int buf_ch
= FETCH_BYTE (this_pos
);
1445 TRANSLATE (buf_ch
, trt
, buf_ch
);
1447 if (buf_ch
!= pat_ch
)
1465 /* Backwards search. */
1466 else if (lim
< pos
&& multibyte
)
1471 /* Try matching at position POS. */
1472 int this_pos
= pos
- len
;
1473 int this_pos_byte
= pos_byte
- len_byte
;
1475 int this_len_byte
= len_byte
;
1476 unsigned char *p
= pat
;
1478 if (pos
- len
< lim
)
1481 while (this_len
> 0)
1483 int charlen
, buf_charlen
;
1486 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1487 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1488 ZV_BYTE
- this_pos_byte
,
1490 TRANSLATE (buf_ch
, trt
, buf_ch
);
1492 if (buf_ch
!= pat_ch
)
1495 this_len_byte
-= charlen
;
1498 this_pos_byte
+= buf_charlen
;
1505 pos_byte
-= len_byte
;
1509 DEC_BOTH (pos
, pos_byte
);
1519 /* Try matching at position POS. */
1520 int this_pos
= pos
- len
;
1522 unsigned char *p
= pat
;
1524 if (pos
- len
< lim
)
1527 while (this_len
> 0)
1530 int buf_ch
= FETCH_BYTE (this_pos
);
1531 TRANSLATE (buf_ch
, trt
, buf_ch
);
1533 if (buf_ch
!= pat_ch
)
1555 set_search_regs ((multibyte
? pos_byte
: pos
) - len_byte
, len_byte
);
1557 set_search_regs (multibyte
? pos_byte
: pos
, len_byte
);
1567 /* Do Boyer-Moore search N times for the string BASE_PAT,
1568 whose length is LEN/LEN_BYTE,
1569 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1570 DIRECTION says which direction we search in.
1571 TRT and INVERSE_TRT are translation tables.
1572 Characters in PAT are already translated by TRT.
1574 This kind of search works if all the characters in BASE_PAT that
1575 have nontrivial translation are the same aside from the last byte.
1576 This makes it possible to translate just the last byte of a
1577 character, and do so after just a simple test of the context.
1578 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1580 If that criterion is not satisfied, do not call this function. */
1583 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1584 pos
, pos_byte
, lim
, lim_byte
, charset_base
)
1586 unsigned char *base_pat
;
1589 Lisp_Object inverse_trt
;
1594 int direction
= ((n
> 0) ? 1 : -1);
1595 register int dirlen
;
1596 int infinity
, limit
, stride_for_teases
= 0;
1597 register int *BM_tab
;
1599 register unsigned char *cursor
, *p_limit
;
1601 unsigned char *pat
, *pat_end
;
1602 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1604 unsigned char simple_translate
[0400];
1605 /* These are set to the preceding bytes of a byte to be translated
1606 if charset_base is nonzero. As the maximum byte length of a
1607 multibyte character is 4, we have to check at most three previous
1609 int translate_prev_byte1
= 0;
1610 int translate_prev_byte2
= 0;
1611 int translate_prev_byte3
= 0;
1614 int BM_tab_space
[0400];
1615 BM_tab
= &BM_tab_space
[0];
1617 BM_tab
= (int *) alloca (0400 * sizeof (int));
1619 /* The general approach is that we are going to maintain that we know */
1620 /* the first (closest to the present position, in whatever direction */
1621 /* we're searching) character that could possibly be the last */
1622 /* (furthest from present position) character of a valid match. We */
1623 /* advance the state of our knowledge by looking at that character */
1624 /* and seeing whether it indeed matches the last character of the */
1625 /* pattern. If it does, we take a closer look. If it does not, we */
1626 /* move our pointer (to putative last characters) as far as is */
1627 /* logically possible. This amount of movement, which I call a */
1628 /* stride, will be the length of the pattern if the actual character */
1629 /* appears nowhere in the pattern, otherwise it will be the distance */
1630 /* from the last occurrence of that character to the end of the */
1632 /* As a coding trick, an enormous stride is coded into the table for */
1633 /* characters that match the last character. This allows use of only */
1634 /* a single test, a test for having gone past the end of the */
1635 /* permissible match region, to test for both possible matches (when */
1636 /* the stride goes past the end immediately) and failure to */
1637 /* match (where you get nudged past the end one stride at a time). */
1639 /* Here we make a "mickey mouse" BM table. The stride of the search */
1640 /* is determined only by the last character of the putative match. */
1641 /* If that character does not match, we will stride the proper */
1642 /* distance to propose a match that superimposes it on the last */
1643 /* instance of a character that matches it (per trt), or misses */
1644 /* it entirely if there is none. */
1646 dirlen
= len_byte
* direction
;
1647 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1649 /* Record position after the end of the pattern. */
1650 pat_end
= base_pat
+ len_byte
;
1651 /* BASE_PAT points to a character that we start scanning from.
1652 It is the first character in a forward search,
1653 the last character in a backward search. */
1655 base_pat
= pat_end
- 1;
1657 BM_tab_base
= BM_tab
;
1659 j
= dirlen
; /* to get it in a register */
1660 /* A character that does not appear in the pattern induces a */
1661 /* stride equal to the pattern length. */
1662 while (BM_tab_base
!= BM_tab
)
1670 /* We use this for translation, instead of TRT itself.
1671 We fill this in to handle the characters that actually
1672 occur in the pattern. Others don't matter anyway! */
1673 bzero (simple_translate
, sizeof simple_translate
);
1674 for (i
= 0; i
< 0400; i
++)
1675 simple_translate
[i
] = i
;
1679 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1680 byte following them are the target of translation. */
1681 int sample_char
= charset_base
| 0x20;
1682 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
1683 int len
= CHAR_STRING (sample_char
, str
);
1685 translate_prev_byte1
= str
[len
- 2];
1688 translate_prev_byte2
= str
[len
- 3];
1690 translate_prev_byte3
= str
[len
- 4];
1695 while (i
!= infinity
)
1697 unsigned char *ptr
= base_pat
+ i
;
1703 /* If the byte currently looking at is the last of a
1704 character to check case-equivalents, set CH to that
1705 character. An ASCII character and a non-ASCII character
1706 matching with CHARSET_BASE are to be checked. */
1709 if (ASCII_BYTE_P (*ptr
) || ! multibyte
)
1711 else if (charset_base
1712 && ((pat_end
- ptr
) == 1 || CHAR_HEAD_P (ptr
[1])))
1714 unsigned char *charstart
= ptr
- 1;
1716 while (! (CHAR_HEAD_P (*charstart
)))
1718 ch
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1719 if (charset_base
!= (ch
& ~CHAR_FIELD3_MASK
))
1724 j
= ((unsigned char) ch
) | 0200;
1729 stride_for_teases
= BM_tab
[j
];
1731 BM_tab
[j
] = dirlen
- i
;
1732 /* A translation table is accompanied by its inverse -- see */
1733 /* comment following downcase_table for details */
1736 int starting_ch
= ch
;
1741 TRANSLATE (ch
, inverse_trt
, ch
);
1743 j
= ((unsigned char) ch
) | 0200;
1745 j
= (unsigned char) ch
;
1747 /* For all the characters that map into CH,
1748 set up simple_translate to map the last byte
1750 simple_translate
[j
] = starting_j
;
1751 if (ch
== starting_ch
)
1753 BM_tab
[j
] = dirlen
- i
;
1762 stride_for_teases
= BM_tab
[j
];
1763 BM_tab
[j
] = dirlen
- i
;
1765 /* stride_for_teases tells how much to stride if we get a */
1766 /* match on the far character but are subsequently */
1767 /* disappointed, by recording what the stride would have been */
1768 /* for that character if the last character had been */
1771 infinity
= dirlen
- infinity
;
1772 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1773 /* loop invariant - POS_BYTE points at where last char (first
1774 char if reverse) of pattern would align in a possible match. */
1778 unsigned char *tail_end_ptr
;
1780 /* It's been reported that some (broken) compiler thinks that
1781 Boolean expressions in an arithmetic context are unsigned.
1782 Using an explicit ?1:0 prevents this. */
1783 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1785 return (n
* (0 - direction
));
1786 /* First we do the part we can by pointers (maybe nothing) */
1789 limit
= pos_byte
- dirlen
+ direction
;
1792 limit
= BUFFER_CEILING_OF (limit
);
1793 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1794 can take on without hitting edge of buffer or the gap. */
1795 limit
= min (limit
, pos_byte
+ 20000);
1796 limit
= min (limit
, lim_byte
- 1);
1800 limit
= BUFFER_FLOOR_OF (limit
);
1801 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1802 can take on without hitting edge of buffer or the gap. */
1803 limit
= max (limit
, pos_byte
- 20000);
1804 limit
= max (limit
, lim_byte
);
1806 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1807 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1809 if ((limit
- pos_byte
) * direction
> 20)
1813 p_limit
= BYTE_POS_ADDR (limit
);
1814 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1815 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1816 while (1) /* use one cursor setting as long as i can */
1818 if (direction
> 0) /* worth duplicating */
1820 /* Use signed comparison if appropriate
1821 to make cursor+infinity sure to be > p_limit.
1822 Assuming that the buffer lies in a range of addresses
1823 that are all "positive" (as ints) or all "negative",
1824 either kind of comparison will work as long
1825 as we don't step by infinity. So pick the kind
1826 that works when we do step by infinity. */
1827 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1828 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1829 cursor
+= BM_tab
[*cursor
];
1831 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1832 cursor
+= BM_tab
[*cursor
];
1836 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1837 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1838 cursor
+= BM_tab
[*cursor
];
1840 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1841 cursor
+= BM_tab
[*cursor
];
1843 /* If you are here, cursor is beyond the end of the searched region. */
1844 /* This can happen if you match on the far character of the pattern, */
1845 /* because the "stride" of that character is infinity, a number able */
1846 /* to throw you well beyond the end of the search. It can also */
1847 /* happen if you fail to match within the permitted region and would */
1848 /* otherwise try a character beyond that region */
1849 if ((cursor
- p_limit
) * direction
<= len_byte
)
1850 break; /* a small overrun is genuine */
1851 cursor
-= infinity
; /* large overrun = hit */
1852 i
= dirlen
- direction
;
1855 while ((i
-= direction
) + direction
!= 0)
1858 cursor
-= direction
;
1859 /* Translate only the last byte of a character. */
1861 || ((cursor
== tail_end_ptr
1862 || CHAR_HEAD_P (cursor
[1]))
1863 && (CHAR_HEAD_P (cursor
[0])
1864 /* Check if this is the last byte of
1865 a translable character. */
1866 || (translate_prev_byte1
== cursor
[-1]
1867 && (CHAR_HEAD_P (translate_prev_byte1
)
1868 || (translate_prev_byte2
== cursor
[-2]
1869 && (CHAR_HEAD_P (translate_prev_byte2
)
1870 || (translate_prev_byte3
== cursor
[-3]))))))))
1871 ch
= simple_translate
[*cursor
];
1880 while ((i
-= direction
) + direction
!= 0)
1882 cursor
-= direction
;
1883 if (pat
[i
] != *cursor
)
1887 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1888 if (i
+ direction
== 0)
1892 cursor
-= direction
;
1894 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1895 ? 1 - len_byte
: 0);
1896 set_search_regs (position
, len_byte
);
1898 if ((n
-= direction
) != 0)
1899 cursor
+= dirlen
; /* to resume search */
1901 return ((direction
> 0)
1902 ? search_regs
.end
[0] : search_regs
.start
[0]);
1905 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1907 pos_byte
+= cursor
- p2
;
1910 /* Now we'll pick up a clump that has to be done the hard */
1911 /* way because it covers a discontinuity */
1913 limit
= ((direction
> 0)
1914 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1915 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1916 limit
= ((direction
> 0)
1917 ? min (limit
+ len_byte
, lim_byte
- 1)
1918 : max (limit
- len_byte
, lim_byte
));
1919 /* LIMIT is now the last value POS_BYTE can have
1920 and still be valid for a possible match. */
1923 /* This loop can be coded for space rather than */
1924 /* speed because it will usually run only once. */
1925 /* (the reach is at most len + 21, and typically */
1926 /* does not exceed len) */
1927 while ((limit
- pos_byte
) * direction
>= 0)
1928 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1929 /* now run the same tests to distinguish going off the */
1930 /* end, a match or a phony match. */
1931 if ((pos_byte
- limit
) * direction
<= len_byte
)
1932 break; /* ran off the end */
1933 /* Found what might be a match.
1934 Set POS_BYTE back to last (first if reverse) pos. */
1935 pos_byte
-= infinity
;
1936 i
= dirlen
- direction
;
1937 while ((i
-= direction
) + direction
!= 0)
1941 pos_byte
-= direction
;
1942 ptr
= BYTE_POS_ADDR (pos_byte
);
1943 /* Translate only the last byte of a character. */
1945 || ((ptr
== tail_end_ptr
1946 || CHAR_HEAD_P (ptr
[1]))
1947 && (CHAR_HEAD_P (ptr
[0])
1948 /* Check if this is the last byte of a
1949 translable character. */
1950 || (translate_prev_byte1
== ptr
[-1]
1951 && (CHAR_HEAD_P (translate_prev_byte1
)
1952 || (translate_prev_byte2
== ptr
[-2]
1953 && (CHAR_HEAD_P (translate_prev_byte2
)
1954 || translate_prev_byte3
== ptr
[-3])))))))
1955 ch
= simple_translate
[*ptr
];
1961 /* Above loop has moved POS_BYTE part or all the way
1962 back to the first pos (last pos if reverse).
1963 Set it once again at the last (first if reverse) char. */
1964 pos_byte
+= dirlen
- i
- direction
;
1965 if (i
+ direction
== 0)
1968 pos_byte
-= direction
;
1970 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
1972 set_search_regs (position
, len_byte
);
1974 if ((n
-= direction
) != 0)
1975 pos_byte
+= dirlen
; /* to resume search */
1977 return ((direction
> 0)
1978 ? search_regs
.end
[0] : search_regs
.start
[0]);
1981 pos_byte
+= stride_for_teases
;
1984 /* We have done one clump. Can we continue? */
1985 if ((lim_byte
- pos_byte
) * direction
< 0)
1986 return ((0 - n
) * direction
);
1988 return BYTE_TO_CHAR (pos_byte
);
1991 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1992 for the overall match just found in the current buffer.
1993 Also clear out the match data for registers 1 and up. */
1996 set_search_regs (beg_byte
, nbytes
)
1997 int beg_byte
, nbytes
;
2001 /* Make sure we have registers in which to store
2002 the match position. */
2003 if (search_regs
.num_regs
== 0)
2005 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2006 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2007 search_regs
.num_regs
= 2;
2010 /* Clear out the other registers. */
2011 for (i
= 1; i
< search_regs
.num_regs
; i
++)
2013 search_regs
.start
[i
] = -1;
2014 search_regs
.end
[i
] = -1;
2017 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
2018 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
2019 XSETBUFFER (last_thing_searched
, current_buffer
);
2022 /* Given a string of words separated by word delimiters,
2023 compute a regexp that matches those exact words
2024 separated by arbitrary punctuation. */
2030 register unsigned char *p
, *o
;
2031 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
2036 CHECK_STRING (string
);
2038 len
= SCHARS (string
);
2040 for (i
= 0, i_byte
= 0; i
< len
; )
2044 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
2046 if (SYNTAX (c
) != Sword
)
2049 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
2056 if (SYNTAX (prev_c
) == Sword
)
2059 return empty_string
;
2061 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
2062 if (STRING_MULTIBYTE (string
))
2063 val
= make_uninit_multibyte_string (len
+ adjust
,
2067 val
= make_uninit_string (len
+ adjust
);
2074 for (i
= 0, i_byte
= 0; i
< len
; )
2077 int i_byte_orig
= i_byte
;
2079 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
2081 if (SYNTAX (c
) == Sword
)
2083 bcopy (SDATA (string
) + i_byte_orig
, o
,
2084 i_byte
- i_byte_orig
);
2085 o
+= i_byte
- i_byte_orig
;
2087 else if (i
> 0 && SYNTAX (prev_c
) == Sword
&& --word_count
)
2105 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
2106 "MSearch backward: ",
2107 doc
: /* Search backward from point for STRING.
2108 Set point to the beginning of the occurrence found, and return point.
2109 An optional second argument bounds the search; it is a buffer position.
2110 The match found must not extend before that position.
2111 Optional third argument, if t, means if fail just return nil (no error).
2112 If not nil and not t, position at limit of search and return nil.
2113 Optional fourth argument is repeat count--search for successive occurrences.
2115 Search case-sensitivity is determined by the value of the variable
2116 `case-fold-search', which see.
2118 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2119 (string
, bound
, noerror
, count
)
2120 Lisp_Object string
, bound
, noerror
, count
;
2122 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
2125 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
2126 doc
: /* Search forward from point for STRING.
2127 Set point to the end of the occurrence found, and return point.
2128 An optional second argument bounds the search; it is a buffer position.
2129 The match found must not extend after that position. nil is equivalent
2131 Optional third argument, if t, means if fail just return nil (no error).
2132 If not nil and not t, move to limit of search and return nil.
2133 Optional fourth argument is repeat count--search for successive occurrences.
2135 Search case-sensitivity is determined by the value of the variable
2136 `case-fold-search', which see.
2138 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2139 (string
, bound
, noerror
, count
)
2140 Lisp_Object string
, bound
, noerror
, count
;
2142 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2145 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2146 "sWord search backward: ",
2147 doc
: /* Search backward from point for STRING, ignoring differences in punctuation.
2148 Set point to the beginning of the occurrence found, and return point.
2149 An optional second argument bounds the search; it is a buffer position.
2150 The match found must not extend before that position.
2151 Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, move to limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences. */)
2154 (string
, bound
, noerror
, count
)
2155 Lisp_Object string
, bound
, noerror
, count
;
2157 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2160 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2162 doc
: /* Search forward from point for STRING, ignoring differences in punctuation.
2163 Set point to the end of the occurrence found, and return point.
2164 An optional second argument bounds the search; it is a buffer position.
2165 The match found must not extend after that position.
2166 Optional third argument, if t, means if fail just return nil (no error).
2167 If not nil and not t, move to limit of search and return nil.
2168 Optional fourth argument is repeat count--search for successive occurrences. */)
2169 (string
, bound
, noerror
, count
)
2170 Lisp_Object string
, bound
, noerror
, count
;
2172 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2175 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2176 "sRE search backward: ",
2177 doc
: /* Search backward from point for match for regular expression REGEXP.
2178 Set point to the beginning of the match, and return point.
2179 The match found is the one starting last in the buffer
2180 and yet ending before the origin of the search.
2181 An optional second argument bounds the search; it is a buffer position.
2182 The match found must start at or after that position.
2183 Optional third argument, if t, means if fail just return nil (no error).
2184 If not nil and not t, move to limit of search and return nil.
2185 Optional fourth argument is repeat count--search for successive occurrences.
2186 See also the functions `match-beginning', `match-end', `match-string',
2187 and `replace-match'. */)
2188 (regexp
, bound
, noerror
, count
)
2189 Lisp_Object regexp
, bound
, noerror
, count
;
2191 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2194 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2196 doc
: /* Search forward from point for regular expression REGEXP.
2197 Set point to the end of the occurrence found, and return point.
2198 An optional second argument bounds the search; it is a buffer position.
2199 The match found must not extend after that position.
2200 Optional third argument, if t, means if fail just return nil (no error).
2201 If not nil and not t, move to limit of search and return nil.
2202 Optional fourth argument is repeat count--search for successive occurrences.
2203 See also the functions `match-beginning', `match-end', `match-string',
2204 and `replace-match'. */)
2205 (regexp
, bound
, noerror
, count
)
2206 Lisp_Object regexp
, bound
, noerror
, count
;
2208 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2211 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2212 "sPosix search backward: ",
2213 doc
: /* Search backward from point for match for regular expression REGEXP.
2214 Find the longest match in accord with Posix regular expression rules.
2215 Set point to the beginning of the match, and return point.
2216 The match found is the one starting last in the buffer
2217 and yet ending before the origin of the search.
2218 An optional second argument bounds the search; it is a buffer position.
2219 The match found must start at or after that position.
2220 Optional third argument, if t, means if fail just return nil (no error).
2221 If not nil and not t, move to limit of search and return nil.
2222 Optional fourth argument is repeat count--search for successive occurrences.
2223 See also the functions `match-beginning', `match-end', `match-string',
2224 and `replace-match'. */)
2225 (regexp
, bound
, noerror
, count
)
2226 Lisp_Object regexp
, bound
, noerror
, count
;
2228 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2231 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2233 doc
: /* Search forward from point for regular expression REGEXP.
2234 Find the longest match in accord with Posix regular expression rules.
2235 Set point to the end of the occurrence found, and return point.
2236 An optional second argument bounds the search; it is a buffer position.
2237 The match found must not extend after that position.
2238 Optional third argument, if t, means if fail just return nil (no error).
2239 If not nil and not t, move to limit of search and return nil.
2240 Optional fourth argument is repeat count--search for successive occurrences.
2241 See also the functions `match-beginning', `match-end', `match-string',
2242 and `replace-match'. */)
2243 (regexp
, bound
, noerror
, count
)
2244 Lisp_Object regexp
, bound
, noerror
, count
;
2246 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2249 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2250 doc
: /* Replace text matched by last search with NEWTEXT.
2251 Leave point at the end of the replacement text.
2253 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2254 Otherwise maybe capitalize the whole text, or maybe just word initials,
2255 based on the replaced text.
2256 If the replaced text has only capital letters
2257 and has at least one multiletter word, convert NEWTEXT to all caps.
2258 Otherwise if all words are capitalized in the replaced text,
2259 capitalize each word in NEWTEXT.
2261 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2262 Otherwise treat `\\' as special:
2263 `\\&' in NEWTEXT means substitute original matched text.
2264 `\\N' means substitute what matched the Nth `\\(...\\)'.
2265 If Nth parens didn't match, substitute nothing.
2266 `\\\\' means insert one `\\'.
2267 Case conversion does not apply to these substitutions.
2269 FIXEDCASE and LITERAL are optional arguments.
2271 The optional fourth argument STRING can be a string to modify.
2272 This is meaningful when the previous match was done against STRING,
2273 using `string-match'. When used this way, `replace-match'
2274 creates and returns a new string made by copying STRING and replacing
2275 the part of STRING that was matched.
2277 The optional fifth argument SUBEXP specifies a subexpression;
2278 it says to replace just that subexpression with NEWTEXT,
2279 rather than replacing the entire matched text.
2280 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2281 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2282 NEWTEXT in place of subexp N.
2283 This is useful only after a regular expression search or match,
2284 since only regular expressions have distinguished subexpressions. */)
2285 (newtext
, fixedcase
, literal
, string
, subexp
)
2286 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2288 enum { nochange
, all_caps
, cap_initial
} case_action
;
2289 register int pos
, pos_byte
;
2290 int some_multiletter_word
;
2293 int some_nonuppercase_initial
;
2294 register int c
, prevc
;
2296 int opoint
, newpoint
;
2298 CHECK_STRING (newtext
);
2300 if (! NILP (string
))
2301 CHECK_STRING (string
);
2303 case_action
= nochange
; /* We tried an initialization */
2304 /* but some C compilers blew it */
2306 if (search_regs
.num_regs
<= 0)
2307 error ("`replace-match' called before any match found");
2313 CHECK_NUMBER (subexp
);
2314 sub
= XINT (subexp
);
2315 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2316 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2321 if (search_regs
.start
[sub
] < BEGV
2322 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2323 || search_regs
.end
[sub
] > ZV
)
2324 args_out_of_range (make_number (search_regs
.start
[sub
]),
2325 make_number (search_regs
.end
[sub
]));
2329 if (search_regs
.start
[sub
] < 0
2330 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2331 || search_regs
.end
[sub
] > SCHARS (string
))
2332 args_out_of_range (make_number (search_regs
.start
[sub
]),
2333 make_number (search_regs
.end
[sub
]));
2336 if (NILP (fixedcase
))
2338 /* Decide how to casify by examining the matched text. */
2341 pos
= search_regs
.start
[sub
];
2342 last
= search_regs
.end
[sub
];
2345 pos_byte
= CHAR_TO_BYTE (pos
);
2347 pos_byte
= string_char_to_byte (string
, pos
);
2350 case_action
= all_caps
;
2352 /* some_multiletter_word is set nonzero if any original word
2353 is more than one letter long. */
2354 some_multiletter_word
= 0;
2356 some_nonuppercase_initial
= 0;
2363 c
= FETCH_CHAR (pos_byte
);
2364 INC_BOTH (pos
, pos_byte
);
2367 FETCH_STRING_CHAR_ADVANCE (c
, string
, pos
, pos_byte
);
2371 /* Cannot be all caps if any original char is lower case */
2374 if (SYNTAX (prevc
) != Sword
)
2375 some_nonuppercase_initial
= 1;
2377 some_multiletter_word
= 1;
2379 else if (UPPERCASEP (c
))
2382 if (SYNTAX (prevc
) != Sword
)
2385 some_multiletter_word
= 1;
2389 /* If the initial is a caseless word constituent,
2390 treat that like a lowercase initial. */
2391 if (SYNTAX (prevc
) != Sword
)
2392 some_nonuppercase_initial
= 1;
2398 /* Convert to all caps if the old text is all caps
2399 and has at least one multiletter word. */
2400 if (! some_lowercase
&& some_multiletter_word
)
2401 case_action
= all_caps
;
2402 /* Capitalize each word, if the old text has all capitalized words. */
2403 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2404 case_action
= cap_initial
;
2405 else if (!some_nonuppercase_initial
&& some_uppercase
)
2406 /* Should x -> yz, operating on X, give Yz or YZ?
2407 We'll assume the latter. */
2408 case_action
= all_caps
;
2410 case_action
= nochange
;
2413 /* Do replacement in a string. */
2416 Lisp_Object before
, after
;
2418 before
= Fsubstring (string
, make_number (0),
2419 make_number (search_regs
.start
[sub
]));
2420 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2422 /* Substitute parts of the match into NEWTEXT
2427 int lastpos_byte
= 0;
2428 /* We build up the substituted string in ACCUM. */
2431 int length
= SBYTES (newtext
);
2435 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2439 int delbackslash
= 0;
2441 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2445 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2449 substart
= search_regs
.start
[sub
];
2450 subend
= search_regs
.end
[sub
];
2452 else if (c
>= '1' && c
<= '9')
2454 if (search_regs
.start
[c
- '0'] >= 0
2455 && c
<= search_regs
.num_regs
+ '0')
2457 substart
= search_regs
.start
[c
- '0'];
2458 subend
= search_regs
.end
[c
- '0'];
2462 /* If that subexp did not match,
2463 replace \\N with nothing. */
2471 error ("Invalid use of `\\' in replacement text");
2475 if (pos
- 2 != lastpos
)
2476 middle
= substring_both (newtext
, lastpos
,
2478 pos
- 2, pos_byte
- 2);
2481 accum
= concat3 (accum
, middle
,
2483 make_number (substart
),
2484 make_number (subend
)));
2486 lastpos_byte
= pos_byte
;
2488 else if (delbackslash
)
2490 middle
= substring_both (newtext
, lastpos
,
2492 pos
- 1, pos_byte
- 1);
2494 accum
= concat2 (accum
, middle
);
2496 lastpos_byte
= pos_byte
;
2501 middle
= substring_both (newtext
, lastpos
,
2507 newtext
= concat2 (accum
, middle
);
2510 /* Do case substitution in NEWTEXT if desired. */
2511 if (case_action
== all_caps
)
2512 newtext
= Fupcase (newtext
);
2513 else if (case_action
== cap_initial
)
2514 newtext
= Fupcase_initials (newtext
);
2516 return concat3 (before
, newtext
, after
);
2519 /* Record point, then move (quietly) to the start of the match. */
2520 if (PT
>= search_regs
.end
[sub
])
2522 else if (PT
> search_regs
.start
[sub
])
2523 opoint
= search_regs
.end
[sub
] - ZV
;
2527 /* If we want non-literal replacement,
2528 perform substitution on the replacement string. */
2531 int length
= SBYTES (newtext
);
2532 unsigned char *substed
;
2533 int substed_alloc_size
, substed_len
;
2534 int buf_multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
2535 int str_multibyte
= STRING_MULTIBYTE (newtext
);
2536 Lisp_Object rev_tbl
;
2537 int really_changed
= 0;
2539 rev_tbl
= (!buf_multibyte
&& CHAR_TABLE_P (Vnonascii_translation_table
)
2540 ? Fchar_table_extra_slot (Vnonascii_translation_table
,
2544 substed_alloc_size
= length
* 2 + 100;
2545 substed
= (unsigned char *) xmalloc (substed_alloc_size
+ 1);
2548 /* Go thru NEWTEXT, producing the actual text to insert in
2549 SUBSTED while adjusting multibyteness to that of the current
2552 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2554 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
2555 unsigned char *add_stuff
= NULL
;
2561 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
, pos
, pos_byte
);
2563 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2567 /* Note that we don't have to increment POS. */
2568 c
= SREF (newtext
, pos_byte
++);
2570 c
= unibyte_char_to_multibyte (c
);
2573 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2574 or set IDX to a match index, which means put that part
2575 of the buffer text into SUBSTED. */
2583 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
,
2585 if (!buf_multibyte
&& !SINGLE_BYTE_CHAR_P (c
))
2586 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2590 c
= SREF (newtext
, pos_byte
++);
2592 c
= unibyte_char_to_multibyte (c
);
2597 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2599 if (search_regs
.start
[c
- '0'] >= 1)
2603 add_len
= 1, add_stuff
= "\\";
2607 error ("Invalid use of `\\' in replacement text");
2612 add_len
= CHAR_STRING (c
, str
);
2616 /* If we want to copy part of a previous match,
2617 set up ADD_STUFF and ADD_LEN to point to it. */
2620 int begbyte
= CHAR_TO_BYTE (search_regs
.start
[idx
]);
2621 add_len
= CHAR_TO_BYTE (search_regs
.end
[idx
]) - begbyte
;
2622 if (search_regs
.start
[idx
] < GPT
&& GPT
< search_regs
.end
[idx
])
2623 move_gap (search_regs
.start
[idx
]);
2624 add_stuff
= BYTE_POS_ADDR (begbyte
);
2627 /* Now the stuff we want to add to SUBSTED
2628 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2630 /* Make sure SUBSTED is big enough. */
2631 if (substed_len
+ add_len
>= substed_alloc_size
)
2633 substed_alloc_size
= substed_len
+ add_len
+ 500;
2634 substed
= (unsigned char *) xrealloc (substed
,
2635 substed_alloc_size
+ 1);
2638 /* Now add to the end of SUBSTED. */
2641 bcopy (add_stuff
, substed
+ substed_len
, add_len
);
2642 substed_len
+= add_len
;
2650 int nchars
= multibyte_chars_in_text (substed
, substed_len
);
2652 newtext
= make_multibyte_string (substed
, nchars
, substed_len
);
2655 newtext
= make_unibyte_string (substed
, substed_len
);
2660 /* Replace the old text with the new in the cleanest possible way. */
2661 replace_range (search_regs
.start
[sub
], search_regs
.end
[sub
],
2663 newpoint
= search_regs
.start
[sub
] + SCHARS (newtext
);
2665 if (case_action
== all_caps
)
2666 Fupcase_region (make_number (search_regs
.start
[sub
]),
2667 make_number (newpoint
));
2668 else if (case_action
== cap_initial
)
2669 Fupcase_initials_region (make_number (search_regs
.start
[sub
]),
2670 make_number (newpoint
));
2672 /* Adjust search data for this change. */
2674 int oldend
= search_regs
.end
[sub
];
2675 int oldstart
= search_regs
.start
[sub
];
2676 int change
= newpoint
- search_regs
.end
[sub
];
2679 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2681 if (search_regs
.start
[i
] >= oldend
)
2682 search_regs
.start
[i
] += change
;
2683 else if (search_regs
.start
[i
] > oldstart
)
2684 search_regs
.start
[i
] = oldstart
;
2685 if (search_regs
.end
[i
] >= oldend
)
2686 search_regs
.end
[i
] += change
;
2687 else if (search_regs
.end
[i
] > oldstart
)
2688 search_regs
.end
[i
] = oldstart
;
2692 /* Put point back where it was in the text. */
2694 TEMP_SET_PT (opoint
+ ZV
);
2696 TEMP_SET_PT (opoint
);
2698 /* Now move point "officially" to the start of the inserted replacement. */
2699 move_if_not_intangible (newpoint
);
2705 match_limit (num
, beginningp
)
2714 args_out_of_range (num
, make_number (0));
2715 if (search_regs
.num_regs
<= 0)
2716 error ("No match data, because no search succeeded");
2717 if (n
>= search_regs
.num_regs
2718 || search_regs
.start
[n
] < 0)
2720 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2721 : search_regs
.end
[n
]));
2724 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2725 doc
: /* Return position of start of text matched by last search.
2726 SUBEXP, a number, specifies which parenthesized expression in the last
2728 Value is nil if SUBEXPth pair didn't match, or there were less than
2730 Zero means the entire text matched by the whole regexp or whole string. */)
2734 return match_limit (subexp
, 1);
2737 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2738 doc
: /* Return position of end of text matched by last search.
2739 SUBEXP, a number, specifies which parenthesized expression in the last
2741 Value is nil if SUBEXPth pair didn't match, or there were less than
2743 Zero means the entire text matched by the whole regexp or whole string. */)
2747 return match_limit (subexp
, 0);
2750 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 3, 0,
2751 doc
: /* Return a list containing all info on what the last search matched.
2752 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2753 All the elements are markers or nil (nil if the Nth pair didn't match)
2754 if the last match was on a buffer; integers or nil if a string was matched.
2755 Use `store-match-data' to reinstate the data in this list.
2757 If INTEGERS (the optional first argument) is non-nil, always use
2758 integers \(rather than markers) to represent buffer positions. In
2759 this case, and if the last match was in a buffer, the buffer will get
2760 stored as one additional element at the end of the list.
2762 If REUSE is a list, reuse it as part of the value. If REUSE is long
2763 enough to hold all the values, and if INTEGERS is non-nil, no consing
2766 If optional third arg RESEAT is non-nil, any previous markers on the
2767 REUSE list will be modified to point to nowhere.
2769 Return value is undefined if the last search failed. */)
2770 (integers
, reuse
, reseat
)
2771 Lisp_Object integers
, reuse
, reseat
;
2773 Lisp_Object tail
, prev
;
2778 for (tail
= reuse
; CONSP (tail
); tail
= XCDR (tail
))
2779 if (MARKERP (XCAR (tail
)))
2781 unchain_marker (XMARKER (XCAR (tail
)));
2782 XSETCAR (tail
, Qnil
);
2785 if (NILP (last_thing_searched
))
2790 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
+ 1)
2791 * sizeof (Lisp_Object
));
2794 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2796 int start
= search_regs
.start
[i
];
2799 if (EQ (last_thing_searched
, Qt
)
2800 || ! NILP (integers
))
2802 XSETFASTINT (data
[2 * i
], start
);
2803 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2805 else if (BUFFERP (last_thing_searched
))
2807 data
[2 * i
] = Fmake_marker ();
2808 Fset_marker (data
[2 * i
],
2809 make_number (start
),
2810 last_thing_searched
);
2811 data
[2 * i
+ 1] = Fmake_marker ();
2812 Fset_marker (data
[2 * i
+ 1],
2813 make_number (search_regs
.end
[i
]),
2814 last_thing_searched
);
2817 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2823 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2826 if (BUFFERP (last_thing_searched
) && !NILP (integers
))
2828 data
[len
] = last_thing_searched
;
2832 /* If REUSE is not usable, cons up the values and return them. */
2833 if (! CONSP (reuse
))
2834 return Flist (len
, data
);
2836 /* If REUSE is a list, store as many value elements as will fit
2837 into the elements of REUSE. */
2838 for (i
= 0, tail
= reuse
; CONSP (tail
);
2839 i
++, tail
= XCDR (tail
))
2842 XSETCAR (tail
, data
[i
]);
2844 XSETCAR (tail
, Qnil
);
2848 /* If we couldn't fit all value elements into REUSE,
2849 cons up the rest of them and add them to the end of REUSE. */
2851 XSETCDR (prev
, Flist (len
- i
, data
+ i
));
2856 /* Internal usage only:
2857 If RESEAT is `evaporate', put the markers back on the free list
2858 immediately. No other references to the markers must exist in this case,
2859 so it is used only internally on the unwind stack and save-match-data from
2862 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 2, 0,
2863 doc
: /* Set internal data on last search match from elements of LIST.
2864 LIST should have been created by calling `match-data' previously.
2866 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2868 register Lisp_Object list
, reseat
;
2871 register Lisp_Object marker
;
2873 if (running_asynch_code
)
2874 save_search_regs ();
2876 if (!CONSP (list
) && !NILP (list
))
2877 list
= wrong_type_argument (Qconsp
, list
);
2879 /* Unless we find a marker with a buffer or an explicit buffer
2880 in LIST, assume that this match data came from a string. */
2881 last_thing_searched
= Qt
;
2883 /* Allocate registers if they don't already exist. */
2885 int length
= XFASTINT (Flength (list
)) / 2;
2887 if (length
> search_regs
.num_regs
)
2889 if (search_regs
.num_regs
== 0)
2892 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2894 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2899 = (regoff_t
*) xrealloc (search_regs
.start
,
2900 length
* sizeof (regoff_t
));
2902 = (regoff_t
*) xrealloc (search_regs
.end
,
2903 length
* sizeof (regoff_t
));
2906 for (i
= search_regs
.num_regs
; i
< length
; i
++)
2907 search_regs
.start
[i
] = -1;
2909 search_regs
.num_regs
= length
;
2912 for (i
= 0; CONSP (list
); i
++)
2914 marker
= XCAR (list
);
2915 if (BUFFERP (marker
))
2917 last_thing_searched
= marker
;
2924 search_regs
.start
[i
] = -1;
2933 if (MARKERP (marker
))
2935 if (XMARKER (marker
)->buffer
== 0)
2936 XSETFASTINT (marker
, 0);
2938 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2941 CHECK_NUMBER_COERCE_MARKER (marker
);
2942 from
= XINT (marker
);
2944 if (!NILP (reseat
) && MARKERP (m
))
2946 if (EQ (reseat
, Qevaporate
))
2949 unchain_marker (XMARKER (m
));
2950 XSETCAR (list
, Qnil
);
2953 if ((list
= XCDR (list
), !CONSP (list
)))
2956 m
= marker
= XCAR (list
);
2958 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2959 XSETFASTINT (marker
, 0);
2961 CHECK_NUMBER_COERCE_MARKER (marker
);
2962 search_regs
.start
[i
] = from
;
2963 search_regs
.end
[i
] = XINT (marker
);
2965 if (!NILP (reseat
) && MARKERP (m
))
2967 if (EQ (reseat
, Qevaporate
))
2970 unchain_marker (XMARKER (m
));
2971 XSETCAR (list
, Qnil
);
2977 for (; i
< search_regs
.num_regs
; i
++)
2978 search_regs
.start
[i
] = -1;
2984 /* If non-zero the match data have been saved in saved_search_regs
2985 during the execution of a sentinel or filter. */
2986 static int search_regs_saved
;
2987 static struct re_registers saved_search_regs
;
2988 static Lisp_Object saved_last_thing_searched
;
2990 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2991 if asynchronous code (filter or sentinel) is running. */
2995 if (!search_regs_saved
)
2997 saved_search_regs
.num_regs
= search_regs
.num_regs
;
2998 saved_search_regs
.start
= search_regs
.start
;
2999 saved_search_regs
.end
= search_regs
.end
;
3000 saved_last_thing_searched
= last_thing_searched
;
3001 last_thing_searched
= Qnil
;
3002 search_regs
.num_regs
= 0;
3003 search_regs
.start
= 0;
3004 search_regs
.end
= 0;
3006 search_regs_saved
= 1;
3010 /* Called upon exit from filters and sentinels. */
3012 restore_search_regs ()
3014 if (search_regs_saved
)
3016 if (search_regs
.num_regs
> 0)
3018 xfree (search_regs
.start
);
3019 xfree (search_regs
.end
);
3021 search_regs
.num_regs
= saved_search_regs
.num_regs
;
3022 search_regs
.start
= saved_search_regs
.start
;
3023 search_regs
.end
= saved_search_regs
.end
;
3024 last_thing_searched
= saved_last_thing_searched
;
3025 saved_last_thing_searched
= Qnil
;
3026 search_regs_saved
= 0;
3031 unwind_set_match_data (list
)
3034 /* It is safe to free (evaporate) the markers immediately. */
3035 return Fset_match_data (list
, Qevaporate
);
3038 /* Called to unwind protect the match data. */
3040 record_unwind_save_match_data ()
3042 record_unwind_protect (unwind_set_match_data
,
3043 Fmatch_data (Qnil
, Qnil
, Qnil
));
3046 /* Quote a string to inactivate reg-expr chars */
3048 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
3049 doc
: /* Return a regexp string which matches exactly STRING and nothing else. */)
3053 register unsigned char *in
, *out
, *end
;
3054 register unsigned char *temp
;
3055 int backslashes_added
= 0;
3057 CHECK_STRING (string
);
3059 temp
= (unsigned char *) alloca (SBYTES (string
) * 2);
3061 /* Now copy the data into the new string, inserting escapes. */
3063 in
= SDATA (string
);
3064 end
= in
+ SBYTES (string
);
3067 for (; in
!= end
; in
++)
3069 if (*in
== '[' || *in
== ']'
3070 || *in
== '*' || *in
== '.' || *in
== '\\'
3071 || *in
== '?' || *in
== '+'
3072 || *in
== '^' || *in
== '$')
3073 *out
++ = '\\', backslashes_added
++;
3077 return make_specified_string (temp
,
3078 SCHARS (string
) + backslashes_added
,
3080 STRING_MULTIBYTE (string
));
3088 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
3090 searchbufs
[i
].buf
.allocated
= 100;
3091 searchbufs
[i
].buf
.buffer
= (unsigned char *) xmalloc (100);
3092 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
3093 searchbufs
[i
].regexp
= Qnil
;
3094 searchbufs
[i
].whitespace_regexp
= Qnil
;
3095 staticpro (&searchbufs
[i
].regexp
);
3096 staticpro (&searchbufs
[i
].whitespace_regexp
);
3097 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
3099 searchbuf_head
= &searchbufs
[0];
3101 Qsearch_failed
= intern ("search-failed");
3102 staticpro (&Qsearch_failed
);
3103 Qinvalid_regexp
= intern ("invalid-regexp");
3104 staticpro (&Qinvalid_regexp
);
3106 Fput (Qsearch_failed
, Qerror_conditions
,
3107 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
3108 Fput (Qsearch_failed
, Qerror_message
,
3109 build_string ("Search failed"));
3111 Fput (Qinvalid_regexp
, Qerror_conditions
,
3112 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
3113 Fput (Qinvalid_regexp
, Qerror_message
,
3114 build_string ("Invalid regexp"));
3116 last_thing_searched
= Qnil
;
3117 staticpro (&last_thing_searched
);
3119 saved_last_thing_searched
= Qnil
;
3120 staticpro (&saved_last_thing_searched
);
3122 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp
,
3123 doc
: /* Regexp to substitute for bunches of spaces in regexp search.
3124 Some commands use this for user-specified regexps.
3125 Spaces that occur inside character classes or repetition operators
3126 or other such regexp constructs are not replaced with this.
3127 A value of nil (which is the normal value) means treat spaces literally. */);
3128 Vsearch_spaces_regexp
= Qnil
;
3130 defsubr (&Slooking_at
);
3131 defsubr (&Sposix_looking_at
);
3132 defsubr (&Sstring_match
);
3133 defsubr (&Sposix_string_match
);
3134 defsubr (&Ssearch_forward
);
3135 defsubr (&Ssearch_backward
);
3136 defsubr (&Sword_search_forward
);
3137 defsubr (&Sword_search_backward
);
3138 defsubr (&Sre_search_forward
);
3139 defsubr (&Sre_search_backward
);
3140 defsubr (&Sposix_search_forward
);
3141 defsubr (&Sposix_search_backward
);
3142 defsubr (&Sreplace_match
);
3143 defsubr (&Smatch_beginning
);
3144 defsubr (&Smatch_end
);
3145 defsubr (&Smatch_data
);
3146 defsubr (&Sset_match_data
);
3147 defsubr (&Sregexp_quote
);
3150 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3151 (do not change this comment) */