1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
30 #include "region-cache.h"
32 #include "blockinput.h"
33 #include "intervals.h"
35 #include <sys/types.h>
38 #define REGEXP_CACHE_SIZE 20
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
44 struct regexp_cache
*next
;
45 Lisp_Object regexp
, whitespace_regexp
;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table
;
50 struct re_pattern_buffer buf
;
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache
*searchbuf_head
;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs
;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched
;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp
;
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed
;
94 Lisp_Object Vsearch_spaces_regexp
;
96 static void set_search_regs ();
97 static void save_search_regs ();
98 static int simple_search ();
99 static int boyer_moore ();
100 static int search_buffer ();
101 static void matcher_overflow () NO_RETURN
;
106 error ("Stack overflow in regexp matcher");
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data.
123 The behavior also depends on Vsearch_spaces_regexp. */
126 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
127 struct regexp_cache
*cp
;
129 Lisp_Object translate
;
130 struct re_registers
*regp
;
134 unsigned char *raw_pattern
;
135 int raw_pattern_size
;
139 /* MULTIBYTE says whether the text to be searched is multibyte.
140 We must convert PATTERN to match that, or we will not really
141 find things right. */
143 if (multibyte
== STRING_MULTIBYTE (pattern
))
145 raw_pattern
= (unsigned char *) SDATA (pattern
);
146 raw_pattern_size
= SBYTES (pattern
);
150 raw_pattern_size
= count_size_as_multibyte (SDATA (pattern
),
152 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
153 copy_text (SDATA (pattern
), raw_pattern
,
154 SCHARS (pattern
), 0, 1);
158 /* Converting multibyte to single-byte.
160 ??? Perhaps this conversion should be done in a special way
161 by subtracting nonascii-insert-offset from each non-ASCII char,
162 so that only the multibyte chars which really correspond to
163 the chosen single-byte character set can possibly match. */
164 raw_pattern_size
= SCHARS (pattern
);
165 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
166 copy_text (SDATA (pattern
), raw_pattern
,
167 SBYTES (pattern
), 1, 0);
171 cp
->buf
.translate
= (! NILP (translate
) ? translate
: make_number (0));
173 cp
->buf
.multibyte
= multibyte
;
174 cp
->whitespace_regexp
= Vsearch_spaces_regexp
;
175 /* rms: I think BLOCK_INPUT is not needed here any more,
176 because regex.c defines malloc to call xmalloc.
177 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
178 So let's turn it off. */
180 old
= re_set_syntax (RE_SYNTAX_EMACS
181 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
183 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp
) ? NULL
184 : SDATA (Vsearch_spaces_regexp
));
186 val
= (char *) re_compile_pattern ((char *)raw_pattern
,
187 raw_pattern_size
, &cp
->buf
);
189 /* If the compiled pattern hard codes some of the contents of the
190 syntax-table, it can only be reused with *this* syntax table. */
191 cp
->syntax_table
= cp
->buf
.used_syntax
? current_buffer
->syntax_table
: Qt
;
193 re_set_whitespace_regexp (NULL
);
198 xsignal1 (Qinvalid_regexp
, build_string (val
));
200 cp
->regexp
= Fcopy_sequence (pattern
);
203 /* Shrink each compiled regexp buffer in the cache
204 to the size actually used right now.
205 This is called from garbage collection. */
208 shrink_regexp_cache ()
210 struct regexp_cache
*cp
;
212 for (cp
= searchbuf_head
; cp
!= 0; cp
= cp
->next
)
214 cp
->buf
.allocated
= cp
->buf
.used
;
216 = (unsigned char *) xrealloc (cp
->buf
.buffer
, cp
->buf
.used
);
220 /* Clear the regexp cache w.r.t. a particular syntax table,
221 because it was changed.
222 There is no danger of memory leak here because re_compile_pattern
223 automagically manages the memory in each re_pattern_buffer struct,
224 based on its `allocated' and `buffer' values. */
226 clear_regexp_cache ()
230 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
231 /* It's tempting to compare with the syntax-table we've actually changd,
232 but it's not sufficient because char-table inheritance mewans that
233 modifying one syntax-table can change others at the same time. */
234 if (!EQ (searchbufs
[i
].syntax_table
, Qt
))
235 searchbufs
[i
].regexp
= Qnil
;
238 /* Compile a regexp if necessary, but first check to see if there's one in
240 PATTERN is the pattern to compile.
241 TRANSLATE is a translation table for ignoring case, or nil for none.
242 REGP is the structure that says where to store the "register"
243 values that will result from matching this pattern.
244 If it is 0, we should compile the pattern not to record any
245 subexpression bounds.
246 POSIX is nonzero if we want full backtracking (POSIX style)
247 for this pattern. 0 means backtrack only enough to get a valid match. */
249 struct re_pattern_buffer
*
250 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
252 struct re_registers
*regp
;
253 Lisp_Object translate
;
254 int posix
, multibyte
;
256 struct regexp_cache
*cp
, **cpp
;
258 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
261 /* Entries are initialized to nil, and may be set to nil by
262 compile_pattern_1 if the pattern isn't valid. Don't apply
263 string accessors in those cases. However, compile_pattern_1
264 is only applied to the cache entry we pick here to reuse. So
265 nil should never appear before a non-nil entry. */
266 if (NILP (cp
->regexp
))
268 if (SCHARS (cp
->regexp
) == SCHARS (pattern
)
269 && STRING_MULTIBYTE (cp
->regexp
) == STRING_MULTIBYTE (pattern
)
270 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
271 && EQ (cp
->buf
.translate
, (! NILP (translate
) ? translate
: make_number (0)))
272 && cp
->posix
== posix
273 && cp
->buf
.multibyte
== multibyte
274 && (EQ (cp
->syntax_table
, Qt
)
275 || EQ (cp
->syntax_table
, current_buffer
->syntax_table
))
276 && !NILP (Fequal (cp
->whitespace_regexp
, Vsearch_spaces_regexp
)))
279 /* If we're at the end of the cache, compile into the nil cell
280 we found, or the last (least recently used) cell with a
285 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
290 /* When we get here, cp (aka *cpp) contains the compiled pattern,
291 either because we found it in the cache or because we just compiled it.
292 Move it to the front of the queue to mark it as most recently used. */
294 cp
->next
= searchbuf_head
;
297 /* Advise the searching functions about the space we have allocated
298 for register data. */
300 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
307 looking_at_1 (string
, posix
)
312 unsigned char *p1
, *p2
;
315 struct re_pattern_buffer
*bufp
;
317 if (running_asynch_code
)
320 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
321 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
322 = current_buffer
->case_eqv_table
;
324 CHECK_STRING (string
);
325 bufp
= compile_pattern (string
, &search_regs
,
326 (!NILP (current_buffer
->case_fold_search
)
327 ? current_buffer
->case_canon_table
: Qnil
),
329 !NILP (current_buffer
->enable_multibyte_characters
));
332 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
334 /* Get pointers and sizes of the two strings
335 that make up the visible portion of the buffer. */
338 s1
= GPT_BYTE
- BEGV_BYTE
;
340 s2
= ZV_BYTE
- GPT_BYTE
;
344 s2
= ZV_BYTE
- BEGV_BYTE
;
349 s1
= ZV_BYTE
- BEGV_BYTE
;
353 re_match_object
= Qnil
;
355 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
356 PT_BYTE
- BEGV_BYTE
, &search_regs
,
357 ZV_BYTE
- BEGV_BYTE
);
363 val
= (0 <= i
? Qt
: Qnil
);
365 for (i
= 0; i
< search_regs
.num_regs
; i
++)
366 if (search_regs
.start
[i
] >= 0)
369 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
371 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
373 XSETBUFFER (last_thing_searched
, current_buffer
);
377 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
378 doc
: /* Return t if text after point matches regular expression REGEXP.
379 This function modifies the match data that `match-beginning',
380 `match-end' and `match-data' access; save and restore the match
381 data if you want to preserve them. */)
385 return looking_at_1 (regexp
, 0);
388 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
389 doc
: /* Return t if text after point matches regular expression REGEXP.
390 Find the longest match, in accord with Posix regular expression rules.
391 This function modifies the match data that `match-beginning',
392 `match-end' and `match-data' access; save and restore the match
393 data if you want to preserve them. */)
397 return looking_at_1 (regexp
, 1);
401 string_match_1 (regexp
, string
, start
, posix
)
402 Lisp_Object regexp
, string
, start
;
406 struct re_pattern_buffer
*bufp
;
410 if (running_asynch_code
)
413 CHECK_STRING (regexp
);
414 CHECK_STRING (string
);
417 pos
= 0, pos_byte
= 0;
420 int len
= SCHARS (string
);
422 CHECK_NUMBER (start
);
424 if (pos
< 0 && -pos
<= len
)
426 else if (0 > pos
|| pos
> len
)
427 args_out_of_range (string
, start
);
428 pos_byte
= string_char_to_byte (string
, pos
);
431 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
432 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
433 = current_buffer
->case_eqv_table
;
435 bufp
= compile_pattern (regexp
, &search_regs
,
436 (!NILP (current_buffer
->case_fold_search
)
437 ? current_buffer
->case_canon_table
: Qnil
),
439 STRING_MULTIBYTE (string
));
441 re_match_object
= string
;
443 val
= re_search (bufp
, (char *) SDATA (string
),
444 SBYTES (string
), pos_byte
,
445 SBYTES (string
) - pos_byte
,
448 last_thing_searched
= Qt
;
451 if (val
< 0) return Qnil
;
453 for (i
= 0; i
< search_regs
.num_regs
; i
++)
454 if (search_regs
.start
[i
] >= 0)
457 = string_byte_to_char (string
, search_regs
.start
[i
]);
459 = string_byte_to_char (string
, search_regs
.end
[i
]);
462 return make_number (string_byte_to_char (string
, val
));
465 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
466 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
467 Matching ignores case if `case-fold-search' is non-nil.
468 If third arg START is non-nil, start search at that index in STRING.
469 For index of first char beyond the match, do (match-end 0).
470 `match-end' and `match-beginning' also give indices of substrings
471 matched by parenthesis constructs in the pattern.
473 You can use the function `match-string' to extract the substrings
474 matched by the parenthesis constructions in REGEXP. */)
475 (regexp
, string
, start
)
476 Lisp_Object regexp
, string
, start
;
478 return string_match_1 (regexp
, string
, start
, 0);
481 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
482 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
483 Find the longest match, in accord with Posix regular expression rules.
484 Case is ignored if `case-fold-search' is non-nil in the current buffer.
485 If third arg START is non-nil, start search at that index in STRING.
486 For index of first char beyond the match, do (match-end 0).
487 `match-end' and `match-beginning' also give indices of substrings
488 matched by parenthesis constructs in the pattern. */)
489 (regexp
, string
, start
)
490 Lisp_Object regexp
, string
, start
;
492 return string_match_1 (regexp
, string
, start
, 1);
495 /* Match REGEXP against STRING, searching all of STRING,
496 and return the index of the match, or negative on failure.
497 This does not clobber the match data. */
500 fast_string_match (regexp
, string
)
501 Lisp_Object regexp
, string
;
504 struct re_pattern_buffer
*bufp
;
506 bufp
= compile_pattern (regexp
, 0, Qnil
,
507 0, STRING_MULTIBYTE (string
));
509 re_match_object
= string
;
511 val
= re_search (bufp
, (char *) SDATA (string
),
518 /* Match REGEXP against STRING, searching all of STRING ignoring case,
519 and return the index of the match, or negative on failure.
520 This does not clobber the match data.
521 We assume that STRING contains single-byte characters. */
523 extern Lisp_Object Vascii_downcase_table
;
526 fast_c_string_match_ignore_case (regexp
, string
)
531 struct re_pattern_buffer
*bufp
;
532 int len
= strlen (string
);
534 regexp
= string_make_unibyte (regexp
);
535 re_match_object
= Qt
;
536 bufp
= compile_pattern (regexp
, 0,
537 Vascii_canon_table
, 0,
540 val
= re_search (bufp
, string
, len
, 0, len
, 0);
545 /* Like fast_string_match but ignore case. */
548 fast_string_match_ignore_case (regexp
, string
)
549 Lisp_Object regexp
, string
;
552 struct re_pattern_buffer
*bufp
;
554 bufp
= compile_pattern (regexp
, 0, Vascii_canon_table
,
555 0, STRING_MULTIBYTE (string
));
557 re_match_object
= string
;
559 val
= re_search (bufp
, (char *) SDATA (string
),
566 /* The newline cache: remembering which sections of text have no newlines. */
568 /* If the user has requested newline caching, make sure it's on.
569 Otherwise, make sure it's off.
570 This is our cheezy way of associating an action with the change of
571 state of a buffer-local variable. */
573 newline_cache_on_off (buf
)
576 if (NILP (buf
->cache_long_line_scans
))
578 /* It should be off. */
579 if (buf
->newline_cache
)
581 free_region_cache (buf
->newline_cache
);
582 buf
->newline_cache
= 0;
587 /* It should be on. */
588 if (buf
->newline_cache
== 0)
589 buf
->newline_cache
= new_region_cache ();
594 /* Search for COUNT instances of the character TARGET between START and END.
596 If COUNT is positive, search forwards; END must be >= START.
597 If COUNT is negative, search backwards for the -COUNTth instance;
598 END must be <= START.
599 If COUNT is zero, do anything you please; run rogue, for all I care.
601 If END is zero, use BEGV or ZV instead, as appropriate for the
602 direction indicated by COUNT.
604 If we find COUNT instances, set *SHORTAGE to zero, and return the
605 position past the COUNTth match. Note that for reverse motion
606 this is not the same as the usual convention for Emacs motion commands.
608 If we don't find COUNT instances before reaching END, set *SHORTAGE
609 to the number of TARGETs left unfound, and return END.
611 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
612 except when inside redisplay. */
615 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
622 struct region_cache
*newline_cache
;
633 if (! end
) end
= BEGV
;
636 newline_cache_on_off (current_buffer
);
637 newline_cache
= current_buffer
->newline_cache
;
642 immediate_quit
= allow_quit
;
647 /* Our innermost scanning loop is very simple; it doesn't know
648 about gaps, buffer ends, or the newline cache. ceiling is
649 the position of the last character before the next such
650 obstacle --- the last character the dumb search loop should
652 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
653 int start_byte
= CHAR_TO_BYTE (start
);
656 /* If we're looking for a newline, consult the newline cache
657 to see where we can avoid some scanning. */
658 if (target
== '\n' && newline_cache
)
662 while (region_cache_forward
663 (current_buffer
, newline_cache
, start_byte
, &next_change
))
664 start_byte
= next_change
;
665 immediate_quit
= allow_quit
;
667 /* START should never be after END. */
668 if (start_byte
> ceiling_byte
)
669 start_byte
= ceiling_byte
;
671 /* Now the text after start is an unknown region, and
672 next_change is the position of the next known region. */
673 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
676 /* The dumb loop can only scan text stored in contiguous
677 bytes. BUFFER_CEILING_OF returns the last character
678 position that is contiguous, so the ceiling is the
679 position after that. */
680 tem
= BUFFER_CEILING_OF (start_byte
);
681 ceiling_byte
= min (tem
, ceiling_byte
);
684 /* The termination address of the dumb loop. */
685 register unsigned char *ceiling_addr
686 = BYTE_POS_ADDR (ceiling_byte
) + 1;
687 register unsigned char *cursor
688 = BYTE_POS_ADDR (start_byte
);
689 unsigned char *base
= cursor
;
691 while (cursor
< ceiling_addr
)
693 unsigned char *scan_start
= cursor
;
696 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
699 /* If we're looking for newlines, cache the fact that
700 the region from start to cursor is free of them. */
701 if (target
== '\n' && newline_cache
)
702 know_region_cache (current_buffer
, newline_cache
,
703 start_byte
+ scan_start
- base
,
704 start_byte
+ cursor
- base
);
706 /* Did we find the target character? */
707 if (cursor
< ceiling_addr
)
712 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
718 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
724 /* The last character to check before the next obstacle. */
725 int ceiling_byte
= CHAR_TO_BYTE (end
);
726 int start_byte
= CHAR_TO_BYTE (start
);
729 /* Consult the newline cache, if appropriate. */
730 if (target
== '\n' && newline_cache
)
734 while (region_cache_backward
735 (current_buffer
, newline_cache
, start_byte
, &next_change
))
736 start_byte
= next_change
;
737 immediate_quit
= allow_quit
;
739 /* Start should never be at or before end. */
740 if (start_byte
<= ceiling_byte
)
741 start_byte
= ceiling_byte
+ 1;
743 /* Now the text before start is an unknown region, and
744 next_change is the position of the next known region. */
745 ceiling_byte
= max (next_change
, ceiling_byte
);
748 /* Stop scanning before the gap. */
749 tem
= BUFFER_FLOOR_OF (start_byte
- 1);
750 ceiling_byte
= max (tem
, ceiling_byte
);
753 /* The termination address of the dumb loop. */
754 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
755 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
756 unsigned char *base
= cursor
;
758 while (cursor
>= ceiling_addr
)
760 unsigned char *scan_start
= cursor
;
762 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
765 /* If we're looking for newlines, cache the fact that
766 the region from after the cursor to start is free of them. */
767 if (target
== '\n' && newline_cache
)
768 know_region_cache (current_buffer
, newline_cache
,
769 start_byte
+ cursor
- base
,
770 start_byte
+ scan_start
- base
);
772 /* Did we find the target character? */
773 if (cursor
>= ceiling_addr
)
778 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
784 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
790 *shortage
= count
* direction
;
794 /* Search for COUNT instances of a line boundary, which means either a
795 newline or (if selective display enabled) a carriage return.
796 Start at START. If COUNT is negative, search backwards.
798 We report the resulting position by calling TEMP_SET_PT_BOTH.
800 If we find COUNT instances. we position after (always after,
801 even if scanning backwards) the COUNTth match, and return 0.
803 If we don't find COUNT instances before reaching the end of the
804 buffer (or the beginning, if scanning backwards), we return
805 the number of line boundaries left unfound, and position at
806 the limit we bumped up against.
808 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
809 except in special cases. */
812 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
813 int start
, start_byte
;
814 int limit
, limit_byte
;
818 int direction
= ((count
> 0) ? 1 : -1);
820 register unsigned char *cursor
;
823 register int ceiling
;
824 register unsigned char *ceiling_addr
;
826 int old_immediate_quit
= immediate_quit
;
828 /* The code that follows is like scan_buffer
829 but checks for either newline or carriage return. */
834 start_byte
= CHAR_TO_BYTE (start
);
838 while (start_byte
< limit_byte
)
840 ceiling
= BUFFER_CEILING_OF (start_byte
);
841 ceiling
= min (limit_byte
- 1, ceiling
);
842 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
843 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
846 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
849 if (cursor
!= ceiling_addr
)
853 immediate_quit
= old_immediate_quit
;
854 start_byte
= start_byte
+ cursor
- base
+ 1;
855 start
= BYTE_TO_CHAR (start_byte
);
856 TEMP_SET_PT_BOTH (start
, start_byte
);
860 if (++cursor
== ceiling_addr
)
866 start_byte
+= cursor
- base
;
871 while (start_byte
> limit_byte
)
873 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
874 ceiling
= max (limit_byte
, ceiling
);
875 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
876 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
879 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
882 if (cursor
!= ceiling_addr
)
886 immediate_quit
= old_immediate_quit
;
887 /* Return the position AFTER the match we found. */
888 start_byte
= start_byte
+ cursor
- base
+ 1;
889 start
= BYTE_TO_CHAR (start_byte
);
890 TEMP_SET_PT_BOTH (start
, start_byte
);
897 /* Here we add 1 to compensate for the last decrement
898 of CURSOR, which took it past the valid range. */
899 start_byte
+= cursor
- base
+ 1;
903 TEMP_SET_PT_BOTH (limit
, limit_byte
);
904 immediate_quit
= old_immediate_quit
;
906 return count
* direction
;
910 find_next_newline_no_quit (from
, cnt
)
911 register int from
, cnt
;
913 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
916 /* Like find_next_newline, but returns position before the newline,
917 not after, and only search up to TO. This isn't just
918 find_next_newline (...)-1, because you might hit TO. */
921 find_before_next_newline (from
, to
, cnt
)
925 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
933 /* Subroutines of Lisp buffer search functions. */
936 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
937 Lisp_Object string
, bound
, noerror
, count
;
948 CHECK_NUMBER (count
);
952 CHECK_STRING (string
);
956 lim
= ZV
, lim_byte
= ZV_BYTE
;
958 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
962 CHECK_NUMBER_COERCE_MARKER (bound
);
964 if (n
> 0 ? lim
< PT
: lim
> PT
)
965 error ("Invalid search bound (wrong side of point)");
967 lim
= ZV
, lim_byte
= ZV_BYTE
;
969 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
971 lim_byte
= CHAR_TO_BYTE (lim
);
974 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
975 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
976 = current_buffer
->case_eqv_table
;
978 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
979 (!NILP (current_buffer
->case_fold_search
)
980 ? current_buffer
->case_canon_table
982 (!NILP (current_buffer
->case_fold_search
)
983 ? current_buffer
->case_eqv_table
989 xsignal1 (Qsearch_failed
, string
);
991 if (!EQ (noerror
, Qt
))
993 if (lim
< BEGV
|| lim
> ZV
)
995 SET_PT_BOTH (lim
, lim_byte
);
997 #if 0 /* This would be clean, but maybe programs depend on
998 a value of nil here. */
1006 if (np
< BEGV
|| np
> ZV
)
1011 return make_number (np
);
1014 /* Return 1 if REGEXP it matches just one constant string. */
1017 trivial_regexp_p (regexp
)
1020 int len
= SBYTES (regexp
);
1021 unsigned char *s
= SDATA (regexp
);
1026 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1033 case '|': case '(': case ')': case '`': case '\'': case 'b':
1034 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1035 case 'S': case '=': case '{': case '}': case '_':
1036 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1037 case '1': case '2': case '3': case '4': case '5':
1038 case '6': case '7': case '8': case '9':
1046 /* Search for the n'th occurrence of STRING in the current buffer,
1047 starting at position POS and stopping at position LIM,
1048 treating STRING as a literal string if RE is false or as
1049 a regular expression if RE is true.
1051 If N is positive, searching is forward and LIM must be greater than POS.
1052 If N is negative, searching is backward and LIM must be less than POS.
1054 Returns -x if x occurrences remain to be found (x > 0),
1055 or else the position at the beginning of the Nth occurrence
1056 (if searching backward) or the end (if searching forward).
1058 POSIX is nonzero if we want full backtracking (POSIX style)
1059 for this pattern. 0 means backtrack only enough to get a valid match. */
1061 #define TRANSLATE(out, trt, d) \
1067 temp = Faref (trt, make_number (d)); \
1068 if (INTEGERP (temp)) \
1069 out = XINT (temp); \
1079 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
1080 RE
, trt
, inverse_trt
, posix
)
1089 Lisp_Object inverse_trt
;
1092 int len
= SCHARS (string
);
1093 int len_byte
= SBYTES (string
);
1096 if (running_asynch_code
)
1097 save_search_regs ();
1099 /* Searching 0 times means don't move. */
1100 /* Null string is found at starting position. */
1101 if (len
== 0 || n
== 0)
1103 set_search_regs (pos_byte
, 0);
1107 if (RE
&& !(trivial_regexp_p (string
) && NILP (Vsearch_spaces_regexp
)))
1109 unsigned char *p1
, *p2
;
1111 struct re_pattern_buffer
*bufp
;
1113 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
,
1114 !NILP (current_buffer
->enable_multibyte_characters
));
1116 immediate_quit
= 1; /* Quit immediately if user types ^G,
1117 because letting this function finish
1118 can take too long. */
1119 QUIT
; /* Do a pending quit right away,
1120 to avoid paradoxical behavior */
1121 /* Get pointers and sizes of the two strings
1122 that make up the visible portion of the buffer. */
1125 s1
= GPT_BYTE
- BEGV_BYTE
;
1127 s2
= ZV_BYTE
- GPT_BYTE
;
1131 s2
= ZV_BYTE
- BEGV_BYTE
;
1136 s1
= ZV_BYTE
- BEGV_BYTE
;
1139 re_match_object
= Qnil
;
1144 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1145 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1147 /* Don't allow match past current point */
1148 pos_byte
- BEGV_BYTE
);
1151 matcher_overflow ();
1155 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1156 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1157 if (search_regs
.start
[i
] >= 0)
1159 search_regs
.start
[i
]
1160 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1162 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1164 XSETBUFFER (last_thing_searched
, current_buffer
);
1165 /* Set pos to the new position. */
1166 pos
= search_regs
.start
[0];
1178 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1179 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1181 lim_byte
- BEGV_BYTE
);
1184 matcher_overflow ();
1188 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1189 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1190 if (search_regs
.start
[i
] >= 0)
1192 search_regs
.start
[i
]
1193 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1195 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1197 XSETBUFFER (last_thing_searched
, current_buffer
);
1198 pos
= search_regs
.end
[0];
1210 else /* non-RE case */
1212 unsigned char *raw_pattern
, *pat
;
1213 int raw_pattern_size
;
1214 int raw_pattern_size_byte
;
1215 unsigned char *patbuf
;
1216 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1217 unsigned char *base_pat
;
1218 /* Set to positive if we find a non-ASCII char that need
1219 translation. Otherwise set to zero later. */
1220 int charset_base
= -1;
1221 int boyer_moore_ok
= 1;
1223 /* MULTIBYTE says whether the text to be searched is multibyte.
1224 We must convert PATTERN to match that, or we will not really
1225 find things right. */
1227 if (multibyte
== STRING_MULTIBYTE (string
))
1229 raw_pattern
= (unsigned char *) SDATA (string
);
1230 raw_pattern_size
= SCHARS (string
);
1231 raw_pattern_size_byte
= SBYTES (string
);
1235 raw_pattern_size
= SCHARS (string
);
1236 raw_pattern_size_byte
1237 = count_size_as_multibyte (SDATA (string
),
1239 raw_pattern
= (unsigned char *) alloca (raw_pattern_size_byte
+ 1);
1240 copy_text (SDATA (string
), raw_pattern
,
1241 SCHARS (string
), 0, 1);
1245 /* Converting multibyte to single-byte.
1247 ??? Perhaps this conversion should be done in a special way
1248 by subtracting nonascii-insert-offset from each non-ASCII char,
1249 so that only the multibyte chars which really correspond to
1250 the chosen single-byte character set can possibly match. */
1251 raw_pattern_size
= SCHARS (string
);
1252 raw_pattern_size_byte
= SCHARS (string
);
1253 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
1254 copy_text (SDATA (string
), raw_pattern
,
1255 SBYTES (string
), 1, 0);
1258 /* Copy and optionally translate the pattern. */
1259 len
= raw_pattern_size
;
1260 len_byte
= raw_pattern_size_byte
;
1261 patbuf
= (unsigned char *) alloca (len_byte
);
1263 base_pat
= raw_pattern
;
1266 /* Fill patbuf by translated characters in STRING while
1267 checking if we can use boyer-moore search. If TRT is
1268 non-nil, we can use boyer-moore search only if TRT can be
1269 represented by the byte array of 256 elements. For that,
1270 all non-ASCII case-equivalents of all case-senstive
1271 characters in STRING must belong to the same charset and
1276 unsigned char str_base
[MAX_MULTIBYTE_LENGTH
], *str
;
1277 int c
, translated
, inverse
;
1278 int in_charlen
, charlen
;
1280 /* If we got here and the RE flag is set, it's because we're
1281 dealing with a regexp known to be trivial, so the backslash
1282 just quotes the next character. */
1283 if (RE
&& *base_pat
== '\\')
1291 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1296 charlen
= in_charlen
;
1300 /* Translate the character. */
1301 TRANSLATE (translated
, trt
, c
);
1302 charlen
= CHAR_STRING (translated
, str_base
);
1305 /* Check if C has any other case-equivalents. */
1306 TRANSLATE (inverse
, inverse_trt
, c
);
1307 /* If so, check if we can use boyer-moore. */
1308 if (c
!= inverse
&& boyer_moore_ok
)
1310 /* Check if all equivalents belong to the same
1311 charset & row. Note that the check of C
1312 itself is done by the last iteration. Note
1313 also that we don't have to check ASCII
1314 characters because boyer-moore search can
1315 always handle their translation. */
1318 if (ASCII_BYTE_P (inverse
))
1320 if (charset_base
> 0)
1327 else if (SINGLE_BYTE_CHAR_P (inverse
))
1329 /* Boyer-moore search can't handle a
1330 translation of an eight-bit
1335 else if (charset_base
< 0)
1336 charset_base
= inverse
& ~CHAR_FIELD3_MASK
;
1337 else if ((inverse
& ~CHAR_FIELD3_MASK
)
1345 TRANSLATE (inverse
, inverse_trt
, inverse
);
1349 if (charset_base
< 0)
1352 /* Store this character into the translated pattern. */
1353 bcopy (str
, pat
, charlen
);
1355 base_pat
+= in_charlen
;
1356 len_byte
-= in_charlen
;
1361 /* Unibyte buffer. */
1367 /* If we got here and the RE flag is set, it's because we're
1368 dealing with a regexp known to be trivial, so the backslash
1369 just quotes the next character. */
1370 if (RE
&& *base_pat
== '\\')
1377 TRANSLATE (translated
, trt
, c
);
1378 *pat
++ = translated
;
1382 len_byte
= pat
- patbuf
;
1383 len
= raw_pattern_size
;
1384 pat
= base_pat
= patbuf
;
1387 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1388 pos
, pos_byte
, lim
, lim_byte
,
1391 return simple_search (n
, pat
, len
, len_byte
, trt
,
1392 pos
, pos_byte
, lim
, lim_byte
);
1396 /* Do a simple string search N times for the string PAT,
1397 whose length is LEN/LEN_BYTE,
1398 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1399 TRT is the translation table.
1401 Return the character position where the match is found.
1402 Otherwise, if M matches remained to be found, return -M.
1404 This kind of search works regardless of what is in PAT and
1405 regardless of what is in TRT. It is used in cases where
1406 boyer_moore cannot work. */
1409 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1417 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1418 int forward
= n
> 0;
1420 if (lim
> pos
&& multibyte
)
1425 /* Try matching at position POS. */
1427 int this_pos_byte
= pos_byte
;
1429 int this_len_byte
= len_byte
;
1430 unsigned char *p
= pat
;
1431 if (pos
+ len
> lim
)
1434 while (this_len
> 0)
1436 int charlen
, buf_charlen
;
1439 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1440 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1441 ZV_BYTE
- this_pos_byte
,
1443 TRANSLATE (buf_ch
, trt
, buf_ch
);
1445 if (buf_ch
!= pat_ch
)
1448 this_len_byte
-= charlen
;
1452 this_pos_byte
+= buf_charlen
;
1459 pos_byte
+= len_byte
;
1463 INC_BOTH (pos
, pos_byte
);
1473 /* Try matching at position POS. */
1476 unsigned char *p
= pat
;
1478 if (pos
+ len
> lim
)
1481 while (this_len
> 0)
1484 int buf_ch
= FETCH_BYTE (this_pos
);
1485 TRANSLATE (buf_ch
, trt
, buf_ch
);
1487 if (buf_ch
!= pat_ch
)
1505 /* Backwards search. */
1506 else if (lim
< pos
&& multibyte
)
1511 /* Try matching at position POS. */
1512 int this_pos
= pos
- len
;
1513 int this_pos_byte
= pos_byte
- len_byte
;
1515 int this_len_byte
= len_byte
;
1516 unsigned char *p
= pat
;
1518 if (this_pos
< lim
|| this_pos_byte
< lim_byte
)
1521 while (this_len
> 0)
1523 int charlen
, buf_charlen
;
1526 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1527 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1528 ZV_BYTE
- this_pos_byte
,
1530 TRANSLATE (buf_ch
, trt
, buf_ch
);
1532 if (buf_ch
!= pat_ch
)
1535 this_len_byte
-= charlen
;
1538 this_pos_byte
+= buf_charlen
;
1545 pos_byte
-= len_byte
;
1549 DEC_BOTH (pos
, pos_byte
);
1559 /* Try matching at position POS. */
1560 int this_pos
= pos
- len
;
1562 unsigned char *p
= pat
;
1564 if (pos
- len
< lim
)
1567 while (this_len
> 0)
1570 int buf_ch
= FETCH_BYTE (this_pos
);
1571 TRANSLATE (buf_ch
, trt
, buf_ch
);
1573 if (buf_ch
!= pat_ch
)
1595 set_search_regs ((multibyte
? pos_byte
: pos
) - len_byte
, len_byte
);
1597 set_search_regs (multibyte
? pos_byte
: pos
, len_byte
);
1607 /* Do Boyer-Moore search N times for the string BASE_PAT,
1608 whose length is LEN/LEN_BYTE,
1609 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1610 DIRECTION says which direction we search in.
1611 TRT and INVERSE_TRT are translation tables.
1612 Characters in PAT are already translated by TRT.
1614 This kind of search works if all the characters in BASE_PAT that
1615 have nontrivial translation are the same aside from the last byte.
1616 This makes it possible to translate just the last byte of a
1617 character, and do so after just a simple test of the context.
1618 CHARSET_BASE is nonzero if there is such a non-ASCII character.
1620 If that criterion is not satisfied, do not call this function. */
1623 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1624 pos
, pos_byte
, lim
, lim_byte
, charset_base
)
1626 unsigned char *base_pat
;
1629 Lisp_Object inverse_trt
;
1634 int direction
= ((n
> 0) ? 1 : -1);
1635 register int dirlen
;
1636 int infinity
, limit
, stride_for_teases
= 0;
1637 register int *BM_tab
;
1639 register unsigned char *cursor
, *p_limit
;
1641 unsigned char *pat
, *pat_end
;
1642 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1644 unsigned char simple_translate
[0400];
1645 /* These are set to the preceding bytes of a byte to be translated
1646 if charset_base is nonzero. As the maximum byte length of a
1647 multibyte character is 4, we have to check at most three previous
1649 int translate_prev_byte1
= 0;
1650 int translate_prev_byte2
= 0;
1651 int translate_prev_byte3
= 0;
1654 int BM_tab_space
[0400];
1655 BM_tab
= &BM_tab_space
[0];
1657 BM_tab
= (int *) alloca (0400 * sizeof (int));
1659 /* The general approach is that we are going to maintain that we know */
1660 /* the first (closest to the present position, in whatever direction */
1661 /* we're searching) character that could possibly be the last */
1662 /* (furthest from present position) character of a valid match. We */
1663 /* advance the state of our knowledge by looking at that character */
1664 /* and seeing whether it indeed matches the last character of the */
1665 /* pattern. If it does, we take a closer look. If it does not, we */
1666 /* move our pointer (to putative last characters) as far as is */
1667 /* logically possible. This amount of movement, which I call a */
1668 /* stride, will be the length of the pattern if the actual character */
1669 /* appears nowhere in the pattern, otherwise it will be the distance */
1670 /* from the last occurrence of that character to the end of the */
1672 /* As a coding trick, an enormous stride is coded into the table for */
1673 /* characters that match the last character. This allows use of only */
1674 /* a single test, a test for having gone past the end of the */
1675 /* permissible match region, to test for both possible matches (when */
1676 /* the stride goes past the end immediately) and failure to */
1677 /* match (where you get nudged past the end one stride at a time). */
1679 /* Here we make a "mickey mouse" BM table. The stride of the search */
1680 /* is determined only by the last character of the putative match. */
1681 /* If that character does not match, we will stride the proper */
1682 /* distance to propose a match that superimposes it on the last */
1683 /* instance of a character that matches it (per trt), or misses */
1684 /* it entirely if there is none. */
1686 dirlen
= len_byte
* direction
;
1687 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1689 /* Record position after the end of the pattern. */
1690 pat_end
= base_pat
+ len_byte
;
1691 /* BASE_PAT points to a character that we start scanning from.
1692 It is the first character in a forward search,
1693 the last character in a backward search. */
1695 base_pat
= pat_end
- 1;
1697 BM_tab_base
= BM_tab
;
1699 j
= dirlen
; /* to get it in a register */
1700 /* A character that does not appear in the pattern induces a */
1701 /* stride equal to the pattern length. */
1702 while (BM_tab_base
!= BM_tab
)
1710 /* We use this for translation, instead of TRT itself.
1711 We fill this in to handle the characters that actually
1712 occur in the pattern. Others don't matter anyway! */
1713 bzero (simple_translate
, sizeof simple_translate
);
1714 for (i
= 0; i
< 0400; i
++)
1715 simple_translate
[i
] = i
;
1719 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1720 byte following them are the target of translation. */
1721 int sample_char
= charset_base
| 0x20;
1722 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
1723 int len
= CHAR_STRING (sample_char
, str
);
1725 translate_prev_byte1
= str
[len
- 2];
1728 translate_prev_byte2
= str
[len
- 3];
1730 translate_prev_byte3
= str
[len
- 4];
1735 while (i
!= infinity
)
1737 unsigned char *ptr
= base_pat
+ i
;
1743 /* If the byte currently looking at is the last of a
1744 character to check case-equivalents, set CH to that
1745 character. An ASCII character and a non-ASCII character
1746 matching with CHARSET_BASE are to be checked. */
1749 if (ASCII_BYTE_P (*ptr
) || ! multibyte
)
1751 else if (charset_base
1752 && ((pat_end
- ptr
) == 1 || CHAR_HEAD_P (ptr
[1])))
1754 unsigned char *charstart
= ptr
- 1;
1756 while (! (CHAR_HEAD_P (*charstart
)))
1758 ch
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1759 if (charset_base
!= (ch
& ~CHAR_FIELD3_MASK
))
1764 j
= ((unsigned char) ch
) | 0200;
1769 stride_for_teases
= BM_tab
[j
];
1771 BM_tab
[j
] = dirlen
- i
;
1772 /* A translation table is accompanied by its inverse -- see */
1773 /* comment following downcase_table for details */
1776 int starting_ch
= ch
;
1781 TRANSLATE (ch
, inverse_trt
, ch
);
1783 j
= ((unsigned char) ch
) | 0200;
1785 j
= (unsigned char) ch
;
1787 /* For all the characters that map into CH,
1788 set up simple_translate to map the last byte
1790 simple_translate
[j
] = starting_j
;
1791 if (ch
== starting_ch
)
1793 BM_tab
[j
] = dirlen
- i
;
1802 stride_for_teases
= BM_tab
[j
];
1803 BM_tab
[j
] = dirlen
- i
;
1805 /* stride_for_teases tells how much to stride if we get a */
1806 /* match on the far character but are subsequently */
1807 /* disappointed, by recording what the stride would have been */
1808 /* for that character if the last character had been */
1811 infinity
= dirlen
- infinity
;
1812 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1813 /* loop invariant - POS_BYTE points at where last char (first
1814 char if reverse) of pattern would align in a possible match. */
1818 unsigned char *tail_end_ptr
;
1820 /* It's been reported that some (broken) compiler thinks that
1821 Boolean expressions in an arithmetic context are unsigned.
1822 Using an explicit ?1:0 prevents this. */
1823 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1825 return (n
* (0 - direction
));
1826 /* First we do the part we can by pointers (maybe nothing) */
1829 limit
= pos_byte
- dirlen
+ direction
;
1832 limit
= BUFFER_CEILING_OF (limit
);
1833 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1834 can take on without hitting edge of buffer or the gap. */
1835 limit
= min (limit
, pos_byte
+ 20000);
1836 limit
= min (limit
, lim_byte
- 1);
1840 limit
= BUFFER_FLOOR_OF (limit
);
1841 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1842 can take on without hitting edge of buffer or the gap. */
1843 limit
= max (limit
, pos_byte
- 20000);
1844 limit
= max (limit
, lim_byte
);
1846 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1847 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1849 if ((limit
- pos_byte
) * direction
> 20)
1853 p_limit
= BYTE_POS_ADDR (limit
);
1854 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1855 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1856 while (1) /* use one cursor setting as long as i can */
1858 if (direction
> 0) /* worth duplicating */
1860 /* Use signed comparison if appropriate
1861 to make cursor+infinity sure to be > p_limit.
1862 Assuming that the buffer lies in a range of addresses
1863 that are all "positive" (as ints) or all "negative",
1864 either kind of comparison will work as long
1865 as we don't step by infinity. So pick the kind
1866 that works when we do step by infinity. */
1867 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1868 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1869 cursor
+= BM_tab
[*cursor
];
1871 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1872 cursor
+= BM_tab
[*cursor
];
1876 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1877 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1878 cursor
+= BM_tab
[*cursor
];
1880 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1881 cursor
+= BM_tab
[*cursor
];
1883 /* If you are here, cursor is beyond the end of the searched region. */
1884 /* This can happen if you match on the far character of the pattern, */
1885 /* because the "stride" of that character is infinity, a number able */
1886 /* to throw you well beyond the end of the search. It can also */
1887 /* happen if you fail to match within the permitted region and would */
1888 /* otherwise try a character beyond that region */
1889 if ((cursor
- p_limit
) * direction
<= len_byte
)
1890 break; /* a small overrun is genuine */
1891 cursor
-= infinity
; /* large overrun = hit */
1892 i
= dirlen
- direction
;
1895 while ((i
-= direction
) + direction
!= 0)
1898 cursor
-= direction
;
1899 /* Translate only the last byte of a character. */
1901 || ((cursor
== tail_end_ptr
1902 || CHAR_HEAD_P (cursor
[1]))
1903 && (CHAR_HEAD_P (cursor
[0])
1904 /* Check if this is the last byte of
1905 a translable character. */
1906 || (translate_prev_byte1
== cursor
[-1]
1907 && (CHAR_HEAD_P (translate_prev_byte1
)
1908 || (translate_prev_byte2
== cursor
[-2]
1909 && (CHAR_HEAD_P (translate_prev_byte2
)
1910 || (translate_prev_byte3
== cursor
[-3]))))))))
1911 ch
= simple_translate
[*cursor
];
1920 while ((i
-= direction
) + direction
!= 0)
1922 cursor
-= direction
;
1923 if (pat
[i
] != *cursor
)
1927 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1928 if (i
+ direction
== 0)
1932 cursor
-= direction
;
1934 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1935 ? 1 - len_byte
: 0);
1936 set_search_regs (position
, len_byte
);
1938 if ((n
-= direction
) != 0)
1939 cursor
+= dirlen
; /* to resume search */
1941 return ((direction
> 0)
1942 ? search_regs
.end
[0] : search_regs
.start
[0]);
1945 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1947 pos_byte
+= cursor
- p2
;
1950 /* Now we'll pick up a clump that has to be done the hard */
1951 /* way because it covers a discontinuity */
1953 limit
= ((direction
> 0)
1954 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1955 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1956 limit
= ((direction
> 0)
1957 ? min (limit
+ len_byte
, lim_byte
- 1)
1958 : max (limit
- len_byte
, lim_byte
));
1959 /* LIMIT is now the last value POS_BYTE can have
1960 and still be valid for a possible match. */
1963 /* This loop can be coded for space rather than */
1964 /* speed because it will usually run only once. */
1965 /* (the reach is at most len + 21, and typically */
1966 /* does not exceed len) */
1967 while ((limit
- pos_byte
) * direction
>= 0)
1968 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1969 /* now run the same tests to distinguish going off the */
1970 /* end, a match or a phony match. */
1971 if ((pos_byte
- limit
) * direction
<= len_byte
)
1972 break; /* ran off the end */
1973 /* Found what might be a match.
1974 Set POS_BYTE back to last (first if reverse) pos. */
1975 pos_byte
-= infinity
;
1976 i
= dirlen
- direction
;
1977 while ((i
-= direction
) + direction
!= 0)
1981 pos_byte
-= direction
;
1982 ptr
= BYTE_POS_ADDR (pos_byte
);
1983 /* Translate only the last byte of a character. */
1985 || ((ptr
== tail_end_ptr
1986 || CHAR_HEAD_P (ptr
[1]))
1987 && (CHAR_HEAD_P (ptr
[0])
1988 /* Check if this is the last byte of a
1989 translable character. */
1990 || (translate_prev_byte1
== ptr
[-1]
1991 && (CHAR_HEAD_P (translate_prev_byte1
)
1992 || (translate_prev_byte2
== ptr
[-2]
1993 && (CHAR_HEAD_P (translate_prev_byte2
)
1994 || translate_prev_byte3
== ptr
[-3])))))))
1995 ch
= simple_translate
[*ptr
];
2001 /* Above loop has moved POS_BYTE part or all the way
2002 back to the first pos (last pos if reverse).
2003 Set it once again at the last (first if reverse) char. */
2004 pos_byte
+= dirlen
- i
- direction
;
2005 if (i
+ direction
== 0)
2008 pos_byte
-= direction
;
2010 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
2012 set_search_regs (position
, len_byte
);
2014 if ((n
-= direction
) != 0)
2015 pos_byte
+= dirlen
; /* to resume search */
2017 return ((direction
> 0)
2018 ? search_regs
.end
[0] : search_regs
.start
[0]);
2021 pos_byte
+= stride_for_teases
;
2024 /* We have done one clump. Can we continue? */
2025 if ((lim_byte
- pos_byte
) * direction
< 0)
2026 return ((0 - n
) * direction
);
2028 return BYTE_TO_CHAR (pos_byte
);
2031 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2032 for the overall match just found in the current buffer.
2033 Also clear out the match data for registers 1 and up. */
2036 set_search_regs (beg_byte
, nbytes
)
2037 int beg_byte
, nbytes
;
2041 /* Make sure we have registers in which to store
2042 the match position. */
2043 if (search_regs
.num_regs
== 0)
2045 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2046 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2047 search_regs
.num_regs
= 2;
2050 /* Clear out the other registers. */
2051 for (i
= 1; i
< search_regs
.num_regs
; i
++)
2053 search_regs
.start
[i
] = -1;
2054 search_regs
.end
[i
] = -1;
2057 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
2058 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
2059 XSETBUFFER (last_thing_searched
, current_buffer
);
2062 /* Given a string of words separated by word delimiters,
2063 compute a regexp that matches those exact words
2064 separated by arbitrary punctuation. */
2070 register unsigned char *p
, *o
;
2071 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
2076 CHECK_STRING (string
);
2078 len
= SCHARS (string
);
2080 for (i
= 0, i_byte
= 0; i
< len
; )
2084 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
2086 if (SYNTAX (c
) != Sword
)
2089 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
2096 if (SYNTAX (prev_c
) == Sword
)
2099 return empty_string
;
2101 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
2102 if (STRING_MULTIBYTE (string
))
2103 val
= make_uninit_multibyte_string (len
+ adjust
,
2107 val
= make_uninit_string (len
+ adjust
);
2114 for (i
= 0, i_byte
= 0; i
< len
; )
2117 int i_byte_orig
= i_byte
;
2119 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
2121 if (SYNTAX (c
) == Sword
)
2123 bcopy (SDATA (string
) + i_byte_orig
, o
,
2124 i_byte
- i_byte_orig
);
2125 o
+= i_byte
- i_byte_orig
;
2127 else if (i
> 0 && SYNTAX (prev_c
) == Sword
&& --word_count
)
2145 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
2146 "MSearch backward: ",
2147 doc
: /* Search backward from point for STRING.
2148 Set point to the beginning of the occurrence found, and return point.
2149 An optional second argument bounds the search; it is a buffer position.
2150 The match found must not extend before that position.
2151 Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, position at limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.
2155 Search case-sensitivity is determined by the value of the variable
2156 `case-fold-search', which see.
2158 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2159 (string
, bound
, noerror
, count
)
2160 Lisp_Object string
, bound
, noerror
, count
;
2162 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
2165 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
2166 doc
: /* Search forward from point for STRING.
2167 Set point to the end of the occurrence found, and return point.
2168 An optional second argument bounds the search; it is a buffer position.
2169 The match found must not extend after that position. A value of nil is
2170 equivalent to (point-max).
2171 Optional third argument, if t, means if fail just return nil (no error).
2172 If not nil and not t, move to limit of search and return nil.
2173 Optional fourth argument is repeat count--search for successive occurrences.
2175 Search case-sensitivity is determined by the value of the variable
2176 `case-fold-search', which see.
2178 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2179 (string
, bound
, noerror
, count
)
2180 Lisp_Object string
, bound
, noerror
, count
;
2182 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2185 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2186 "sWord search backward: ",
2187 doc
: /* Search backward from point for STRING, ignoring differences in punctuation.
2188 Set point to the beginning of the occurrence found, and return point.
2189 An optional second argument bounds the search; it is a buffer position.
2190 The match found must not extend before that position.
2191 Optional third argument, if t, means if fail just return nil (no error).
2192 If not nil and not t, move to limit of search and return nil.
2193 Optional fourth argument is repeat count--search for successive occurrences. */)
2194 (string
, bound
, noerror
, count
)
2195 Lisp_Object string
, bound
, noerror
, count
;
2197 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2200 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2202 doc
: /* Search forward from point for STRING, ignoring differences in punctuation.
2203 Set point to the end of the occurrence found, and return point.
2204 An optional second argument bounds the search; it is a buffer position.
2205 The match found must not extend after that position.
2206 Optional third argument, if t, means if fail just return nil (no error).
2207 If not nil and not t, move to limit of search and return nil.
2208 Optional fourth argument is repeat count--search for successive occurrences. */)
2209 (string
, bound
, noerror
, count
)
2210 Lisp_Object string
, bound
, noerror
, count
;
2212 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2215 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2216 "sRE search backward: ",
2217 doc
: /* Search backward from point for match for regular expression REGEXP.
2218 Set point to the beginning of the match, and return point.
2219 The match found is the one starting last in the buffer
2220 and yet ending before the origin of the search.
2221 An optional second argument bounds the search; it is a buffer position.
2222 The match found must start at or after that position.
2223 Optional third argument, if t, means if fail just return nil (no error).
2224 If not nil and not t, move to limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2226 See also the functions `match-beginning', `match-end', `match-string',
2227 and `replace-match'. */)
2228 (regexp
, bound
, noerror
, count
)
2229 Lisp_Object regexp
, bound
, noerror
, count
;
2231 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2234 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2236 doc
: /* Search forward from point for regular expression REGEXP.
2237 Set point to the end of the occurrence found, and return point.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must not extend after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241 If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences.
2243 See also the functions `match-beginning', `match-end', `match-string',
2244 and `replace-match'. */)
2245 (regexp
, bound
, noerror
, count
)
2246 Lisp_Object regexp
, bound
, noerror
, count
;
2248 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2251 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2252 "sPosix search backward: ",
2253 doc
: /* Search backward from point for match for regular expression REGEXP.
2254 Find the longest match in accord with Posix regular expression rules.
2255 Set point to the beginning of the match, and return point.
2256 The match found is the one starting last in the buffer
2257 and yet ending before the origin of the search.
2258 An optional second argument bounds the search; it is a buffer position.
2259 The match found must start at or after that position.
2260 Optional third argument, if t, means if fail just return nil (no error).
2261 If not nil and not t, move to limit of search and return nil.
2262 Optional fourth argument is repeat count--search for successive occurrences.
2263 See also the functions `match-beginning', `match-end', `match-string',
2264 and `replace-match'. */)
2265 (regexp
, bound
, noerror
, count
)
2266 Lisp_Object regexp
, bound
, noerror
, count
;
2268 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2271 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2273 doc
: /* Search forward from point for regular expression REGEXP.
2274 Find the longest match in accord with Posix regular expression rules.
2275 Set point to the end of the occurrence found, and return point.
2276 An optional second argument bounds the search; it is a buffer position.
2277 The match found must not extend after that position.
2278 Optional third argument, if t, means if fail just return nil (no error).
2279 If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences.
2281 See also the functions `match-beginning', `match-end', `match-string',
2282 and `replace-match'. */)
2283 (regexp
, bound
, noerror
, count
)
2284 Lisp_Object regexp
, bound
, noerror
, count
;
2286 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2289 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2290 doc
: /* Replace text matched by last search with NEWTEXT.
2291 Leave point at the end of the replacement text.
2293 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2294 Otherwise maybe capitalize the whole text, or maybe just word initials,
2295 based on the replaced text.
2296 If the replaced text has only capital letters
2297 and has at least one multiletter word, convert NEWTEXT to all caps.
2298 Otherwise if all words are capitalized in the replaced text,
2299 capitalize each word in NEWTEXT.
2301 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2302 Otherwise treat `\\' as special:
2303 `\\&' in NEWTEXT means substitute original matched text.
2304 `\\N' means substitute what matched the Nth `\\(...\\)'.
2305 If Nth parens didn't match, substitute nothing.
2306 `\\\\' means insert one `\\'.
2307 Case conversion does not apply to these substitutions.
2309 FIXEDCASE and LITERAL are optional arguments.
2311 The optional fourth argument STRING can be a string to modify.
2312 This is meaningful when the previous match was done against STRING,
2313 using `string-match'. When used this way, `replace-match'
2314 creates and returns a new string made by copying STRING and replacing
2315 the part of STRING that was matched.
2317 The optional fifth argument SUBEXP specifies a subexpression;
2318 it says to replace just that subexpression with NEWTEXT,
2319 rather than replacing the entire matched text.
2320 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2321 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2322 NEWTEXT in place of subexp N.
2323 This is useful only after a regular expression search or match,
2324 since only regular expressions have distinguished subexpressions. */)
2325 (newtext
, fixedcase
, literal
, string
, subexp
)
2326 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2328 enum { nochange
, all_caps
, cap_initial
} case_action
;
2329 register int pos
, pos_byte
;
2330 int some_multiletter_word
;
2333 int some_nonuppercase_initial
;
2334 register int c
, prevc
;
2336 int opoint
, newpoint
;
2338 CHECK_STRING (newtext
);
2340 if (! NILP (string
))
2341 CHECK_STRING (string
);
2343 case_action
= nochange
; /* We tried an initialization */
2344 /* but some C compilers blew it */
2346 if (search_regs
.num_regs
<= 0)
2347 error ("`replace-match' called before any match found");
2353 CHECK_NUMBER (subexp
);
2354 sub
= XINT (subexp
);
2355 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2356 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2361 if (search_regs
.start
[sub
] < BEGV
2362 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2363 || search_regs
.end
[sub
] > ZV
)
2364 args_out_of_range (make_number (search_regs
.start
[sub
]),
2365 make_number (search_regs
.end
[sub
]));
2369 if (search_regs
.start
[sub
] < 0
2370 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2371 || search_regs
.end
[sub
] > SCHARS (string
))
2372 args_out_of_range (make_number (search_regs
.start
[sub
]),
2373 make_number (search_regs
.end
[sub
]));
2376 if (NILP (fixedcase
))
2378 /* Decide how to casify by examining the matched text. */
2381 pos
= search_regs
.start
[sub
];
2382 last
= search_regs
.end
[sub
];
2385 pos_byte
= CHAR_TO_BYTE (pos
);
2387 pos_byte
= string_char_to_byte (string
, pos
);
2390 case_action
= all_caps
;
2392 /* some_multiletter_word is set nonzero if any original word
2393 is more than one letter long. */
2394 some_multiletter_word
= 0;
2396 some_nonuppercase_initial
= 0;
2403 c
= FETCH_CHAR (pos_byte
);
2404 INC_BOTH (pos
, pos_byte
);
2407 FETCH_STRING_CHAR_ADVANCE (c
, string
, pos
, pos_byte
);
2411 /* Cannot be all caps if any original char is lower case */
2414 if (SYNTAX (prevc
) != Sword
)
2415 some_nonuppercase_initial
= 1;
2417 some_multiletter_word
= 1;
2419 else if (UPPERCASEP (c
))
2422 if (SYNTAX (prevc
) != Sword
)
2425 some_multiletter_word
= 1;
2429 /* If the initial is a caseless word constituent,
2430 treat that like a lowercase initial. */
2431 if (SYNTAX (prevc
) != Sword
)
2432 some_nonuppercase_initial
= 1;
2438 /* Convert to all caps if the old text is all caps
2439 and has at least one multiletter word. */
2440 if (! some_lowercase
&& some_multiletter_word
)
2441 case_action
= all_caps
;
2442 /* Capitalize each word, if the old text has all capitalized words. */
2443 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2444 case_action
= cap_initial
;
2445 else if (!some_nonuppercase_initial
&& some_uppercase
)
2446 /* Should x -> yz, operating on X, give Yz or YZ?
2447 We'll assume the latter. */
2448 case_action
= all_caps
;
2450 case_action
= nochange
;
2453 /* Do replacement in a string. */
2456 Lisp_Object before
, after
;
2458 before
= Fsubstring (string
, make_number (0),
2459 make_number (search_regs
.start
[sub
]));
2460 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2462 /* Substitute parts of the match into NEWTEXT
2467 int lastpos_byte
= 0;
2468 /* We build up the substituted string in ACCUM. */
2471 int length
= SBYTES (newtext
);
2475 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2479 int delbackslash
= 0;
2481 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2485 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2489 substart
= search_regs
.start
[sub
];
2490 subend
= search_regs
.end
[sub
];
2492 else if (c
>= '1' && c
<= '9')
2494 if (search_regs
.start
[c
- '0'] >= 0
2495 && c
<= search_regs
.num_regs
+ '0')
2497 substart
= search_regs
.start
[c
- '0'];
2498 subend
= search_regs
.end
[c
- '0'];
2502 /* If that subexp did not match,
2503 replace \\N with nothing. */
2511 error ("Invalid use of `\\' in replacement text");
2515 if (pos
- 2 != lastpos
)
2516 middle
= substring_both (newtext
, lastpos
,
2518 pos
- 2, pos_byte
- 2);
2521 accum
= concat3 (accum
, middle
,
2523 make_number (substart
),
2524 make_number (subend
)));
2526 lastpos_byte
= pos_byte
;
2528 else if (delbackslash
)
2530 middle
= substring_both (newtext
, lastpos
,
2532 pos
- 1, pos_byte
- 1);
2534 accum
= concat2 (accum
, middle
);
2536 lastpos_byte
= pos_byte
;
2541 middle
= substring_both (newtext
, lastpos
,
2547 newtext
= concat2 (accum
, middle
);
2550 /* Do case substitution in NEWTEXT if desired. */
2551 if (case_action
== all_caps
)
2552 newtext
= Fupcase (newtext
);
2553 else if (case_action
== cap_initial
)
2554 newtext
= Fupcase_initials (newtext
);
2556 return concat3 (before
, newtext
, after
);
2559 /* Record point, then move (quietly) to the start of the match. */
2560 if (PT
>= search_regs
.end
[sub
])
2562 else if (PT
> search_regs
.start
[sub
])
2563 opoint
= search_regs
.end
[sub
] - ZV
;
2567 /* If we want non-literal replacement,
2568 perform substitution on the replacement string. */
2571 int length
= SBYTES (newtext
);
2572 unsigned char *substed
;
2573 int substed_alloc_size
, substed_len
;
2574 int buf_multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
2575 int str_multibyte
= STRING_MULTIBYTE (newtext
);
2576 Lisp_Object rev_tbl
;
2577 int really_changed
= 0;
2579 rev_tbl
= (!buf_multibyte
&& CHAR_TABLE_P (Vnonascii_translation_table
)
2580 ? Fchar_table_extra_slot (Vnonascii_translation_table
,
2584 substed_alloc_size
= length
* 2 + 100;
2585 substed
= (unsigned char *) xmalloc (substed_alloc_size
+ 1);
2588 /* Go thru NEWTEXT, producing the actual text to insert in
2589 SUBSTED while adjusting multibyteness to that of the current
2592 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2594 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
2595 unsigned char *add_stuff
= NULL
;
2601 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
, pos
, pos_byte
);
2603 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2607 /* Note that we don't have to increment POS. */
2608 c
= SREF (newtext
, pos_byte
++);
2610 c
= unibyte_char_to_multibyte (c
);
2613 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2614 or set IDX to a match index, which means put that part
2615 of the buffer text into SUBSTED. */
2623 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
,
2625 if (!buf_multibyte
&& !SINGLE_BYTE_CHAR_P (c
))
2626 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2630 c
= SREF (newtext
, pos_byte
++);
2632 c
= unibyte_char_to_multibyte (c
);
2637 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2639 if (search_regs
.start
[c
- '0'] >= 1)
2643 add_len
= 1, add_stuff
= "\\";
2647 error ("Invalid use of `\\' in replacement text");
2652 add_len
= CHAR_STRING (c
, str
);
2656 /* If we want to copy part of a previous match,
2657 set up ADD_STUFF and ADD_LEN to point to it. */
2660 int begbyte
= CHAR_TO_BYTE (search_regs
.start
[idx
]);
2661 add_len
= CHAR_TO_BYTE (search_regs
.end
[idx
]) - begbyte
;
2662 if (search_regs
.start
[idx
] < GPT
&& GPT
< search_regs
.end
[idx
])
2663 move_gap (search_regs
.start
[idx
]);
2664 add_stuff
= BYTE_POS_ADDR (begbyte
);
2667 /* Now the stuff we want to add to SUBSTED
2668 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2670 /* Make sure SUBSTED is big enough. */
2671 if (substed_len
+ add_len
>= substed_alloc_size
)
2673 substed_alloc_size
= substed_len
+ add_len
+ 500;
2674 substed
= (unsigned char *) xrealloc (substed
,
2675 substed_alloc_size
+ 1);
2678 /* Now add to the end of SUBSTED. */
2681 bcopy (add_stuff
, substed
+ substed_len
, add_len
);
2682 substed_len
+= add_len
;
2690 int nchars
= multibyte_chars_in_text (substed
, substed_len
);
2692 newtext
= make_multibyte_string (substed
, nchars
, substed_len
);
2695 newtext
= make_unibyte_string (substed
, substed_len
);
2700 /* Replace the old text with the new in the cleanest possible way. */
2701 replace_range (search_regs
.start
[sub
], search_regs
.end
[sub
],
2703 newpoint
= search_regs
.start
[sub
] + SCHARS (newtext
);
2705 if (case_action
== all_caps
)
2706 Fupcase_region (make_number (search_regs
.start
[sub
]),
2707 make_number (newpoint
));
2708 else if (case_action
== cap_initial
)
2709 Fupcase_initials_region (make_number (search_regs
.start
[sub
]),
2710 make_number (newpoint
));
2712 /* Adjust search data for this change. */
2714 int oldend
= search_regs
.end
[sub
];
2715 int oldstart
= search_regs
.start
[sub
];
2716 int change
= newpoint
- search_regs
.end
[sub
];
2719 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2721 if (search_regs
.start
[i
] >= oldend
)
2722 search_regs
.start
[i
] += change
;
2723 else if (search_regs
.start
[i
] > oldstart
)
2724 search_regs
.start
[i
] = oldstart
;
2725 if (search_regs
.end
[i
] >= oldend
)
2726 search_regs
.end
[i
] += change
;
2727 else if (search_regs
.end
[i
] > oldstart
)
2728 search_regs
.end
[i
] = oldstart
;
2732 /* Put point back where it was in the text. */
2734 TEMP_SET_PT (opoint
+ ZV
);
2736 TEMP_SET_PT (opoint
);
2738 /* Now move point "officially" to the start of the inserted replacement. */
2739 move_if_not_intangible (newpoint
);
2745 match_limit (num
, beginningp
)
2754 args_out_of_range (num
, make_number (0));
2755 if (search_regs
.num_regs
<= 0)
2756 error ("No match data, because no search succeeded");
2757 if (n
>= search_regs
.num_regs
2758 || search_regs
.start
[n
] < 0)
2760 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2761 : search_regs
.end
[n
]));
2764 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2765 doc
: /* Return position of start of text matched by last search.
2766 SUBEXP, a number, specifies which parenthesized expression in the last
2768 Value is nil if SUBEXPth pair didn't match, or there were less than
2770 Zero means the entire text matched by the whole regexp or whole string. */)
2774 return match_limit (subexp
, 1);
2777 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2778 doc
: /* Return position of end of text matched by last search.
2779 SUBEXP, a number, specifies which parenthesized expression in the last
2781 Value is nil if SUBEXPth pair didn't match, or there were less than
2783 Zero means the entire text matched by the whole regexp or whole string. */)
2787 return match_limit (subexp
, 0);
2790 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 3, 0,
2791 doc
: /* Return a list containing all info on what the last search matched.
2792 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2793 All the elements are markers or nil (nil if the Nth pair didn't match)
2794 if the last match was on a buffer; integers or nil if a string was matched.
2795 Use `store-match-data' to reinstate the data in this list.
2797 If INTEGERS (the optional first argument) is non-nil, always use
2798 integers \(rather than markers) to represent buffer positions. In
2799 this case, and if the last match was in a buffer, the buffer will get
2800 stored as one additional element at the end of the list.
2802 If REUSE is a list, reuse it as part of the value. If REUSE is long
2803 enough to hold all the values, and if INTEGERS is non-nil, no consing
2806 If optional third arg RESEAT is non-nil, any previous markers on the
2807 REUSE list will be modified to point to nowhere.
2809 Return value is undefined if the last search failed. */)
2810 (integers
, reuse
, reseat
)
2811 Lisp_Object integers
, reuse
, reseat
;
2813 Lisp_Object tail
, prev
;
2818 for (tail
= reuse
; CONSP (tail
); tail
= XCDR (tail
))
2819 if (MARKERP (XCAR (tail
)))
2821 unchain_marker (XMARKER (XCAR (tail
)));
2822 XSETCAR (tail
, Qnil
);
2825 if (NILP (last_thing_searched
))
2830 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
+ 1)
2831 * sizeof (Lisp_Object
));
2834 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2836 int start
= search_regs
.start
[i
];
2839 if (EQ (last_thing_searched
, Qt
)
2840 || ! NILP (integers
))
2842 XSETFASTINT (data
[2 * i
], start
);
2843 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2845 else if (BUFFERP (last_thing_searched
))
2847 data
[2 * i
] = Fmake_marker ();
2848 Fset_marker (data
[2 * i
],
2849 make_number (start
),
2850 last_thing_searched
);
2851 data
[2 * i
+ 1] = Fmake_marker ();
2852 Fset_marker (data
[2 * i
+ 1],
2853 make_number (search_regs
.end
[i
]),
2854 last_thing_searched
);
2857 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2863 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2866 if (BUFFERP (last_thing_searched
) && !NILP (integers
))
2868 data
[len
] = last_thing_searched
;
2872 /* If REUSE is not usable, cons up the values and return them. */
2873 if (! CONSP (reuse
))
2874 return Flist (len
, data
);
2876 /* If REUSE is a list, store as many value elements as will fit
2877 into the elements of REUSE. */
2878 for (i
= 0, tail
= reuse
; CONSP (tail
);
2879 i
++, tail
= XCDR (tail
))
2882 XSETCAR (tail
, data
[i
]);
2884 XSETCAR (tail
, Qnil
);
2888 /* If we couldn't fit all value elements into REUSE,
2889 cons up the rest of them and add them to the end of REUSE. */
2891 XSETCDR (prev
, Flist (len
- i
, data
+ i
));
2896 /* We used to have an internal use variant of `reseat' described as:
2898 If RESEAT is `evaporate', put the markers back on the free list
2899 immediately. No other references to the markers must exist in this
2900 case, so it is used only internally on the unwind stack and
2901 save-match-data from Lisp.
2903 But it was ill-conceived: those supposedly-internal markers get exposed via
2904 the undo-list, so freeing them here is unsafe. */
2906 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 2, 0,
2907 doc
: /* Set internal data on last search match from elements of LIST.
2908 LIST should have been created by calling `match-data' previously.
2910 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2912 register Lisp_Object list
, reseat
;
2915 register Lisp_Object marker
;
2917 if (running_asynch_code
)
2918 save_search_regs ();
2922 /* Unless we find a marker with a buffer or an explicit buffer
2923 in LIST, assume that this match data came from a string. */
2924 last_thing_searched
= Qt
;
2926 /* Allocate registers if they don't already exist. */
2928 int length
= XFASTINT (Flength (list
)) / 2;
2930 if (length
> search_regs
.num_regs
)
2932 if (search_regs
.num_regs
== 0)
2935 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2937 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2942 = (regoff_t
*) xrealloc (search_regs
.start
,
2943 length
* sizeof (regoff_t
));
2945 = (regoff_t
*) xrealloc (search_regs
.end
,
2946 length
* sizeof (regoff_t
));
2949 for (i
= search_regs
.num_regs
; i
< length
; i
++)
2950 search_regs
.start
[i
] = -1;
2952 search_regs
.num_regs
= length
;
2955 for (i
= 0; CONSP (list
); i
++)
2957 marker
= XCAR (list
);
2958 if (BUFFERP (marker
))
2960 last_thing_searched
= marker
;
2967 search_regs
.start
[i
] = -1;
2976 if (MARKERP (marker
))
2978 if (XMARKER (marker
)->buffer
== 0)
2979 XSETFASTINT (marker
, 0);
2981 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2984 CHECK_NUMBER_COERCE_MARKER (marker
);
2985 from
= XINT (marker
);
2987 if (!NILP (reseat
) && MARKERP (m
))
2989 unchain_marker (XMARKER (m
));
2990 XSETCAR (list
, Qnil
);
2993 if ((list
= XCDR (list
), !CONSP (list
)))
2996 m
= marker
= XCAR (list
);
2998 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2999 XSETFASTINT (marker
, 0);
3001 CHECK_NUMBER_COERCE_MARKER (marker
);
3002 search_regs
.start
[i
] = from
;
3003 search_regs
.end
[i
] = XINT (marker
);
3005 if (!NILP (reseat
) && MARKERP (m
))
3007 unchain_marker (XMARKER (m
));
3008 XSETCAR (list
, Qnil
);
3014 for (; i
< search_regs
.num_regs
; i
++)
3015 search_regs
.start
[i
] = -1;
3021 /* If non-zero the match data have been saved in saved_search_regs
3022 during the execution of a sentinel or filter. */
3023 static int search_regs_saved
;
3024 static struct re_registers saved_search_regs
;
3025 static Lisp_Object saved_last_thing_searched
;
3027 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3028 if asynchronous code (filter or sentinel) is running. */
3032 if (!search_regs_saved
)
3034 saved_search_regs
.num_regs
= search_regs
.num_regs
;
3035 saved_search_regs
.start
= search_regs
.start
;
3036 saved_search_regs
.end
= search_regs
.end
;
3037 saved_last_thing_searched
= last_thing_searched
;
3038 last_thing_searched
= Qnil
;
3039 search_regs
.num_regs
= 0;
3040 search_regs
.start
= 0;
3041 search_regs
.end
= 0;
3043 search_regs_saved
= 1;
3047 /* Called upon exit from filters and sentinels. */
3049 restore_search_regs ()
3051 if (search_regs_saved
)
3053 if (search_regs
.num_regs
> 0)
3055 xfree (search_regs
.start
);
3056 xfree (search_regs
.end
);
3058 search_regs
.num_regs
= saved_search_regs
.num_regs
;
3059 search_regs
.start
= saved_search_regs
.start
;
3060 search_regs
.end
= saved_search_regs
.end
;
3061 last_thing_searched
= saved_last_thing_searched
;
3062 saved_last_thing_searched
= Qnil
;
3063 search_regs_saved
= 0;
3068 unwind_set_match_data (list
)
3071 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3072 return Fset_match_data (list
, Qt
);
3075 /* Called to unwind protect the match data. */
3077 record_unwind_save_match_data ()
3079 record_unwind_protect (unwind_set_match_data
,
3080 Fmatch_data (Qnil
, Qnil
, Qnil
));
3083 /* Quote a string to inactivate reg-expr chars */
3085 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
3086 doc
: /* Return a regexp string which matches exactly STRING and nothing else. */)
3090 register unsigned char *in
, *out
, *end
;
3091 register unsigned char *temp
;
3092 int backslashes_added
= 0;
3094 CHECK_STRING (string
);
3096 temp
= (unsigned char *) alloca (SBYTES (string
) * 2);
3098 /* Now copy the data into the new string, inserting escapes. */
3100 in
= SDATA (string
);
3101 end
= in
+ SBYTES (string
);
3104 for (; in
!= end
; in
++)
3107 || *in
== '*' || *in
== '.' || *in
== '\\'
3108 || *in
== '?' || *in
== '+'
3109 || *in
== '^' || *in
== '$')
3110 *out
++ = '\\', backslashes_added
++;
3114 return make_specified_string (temp
,
3115 SCHARS (string
) + backslashes_added
,
3117 STRING_MULTIBYTE (string
));
3125 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
3127 searchbufs
[i
].buf
.allocated
= 100;
3128 searchbufs
[i
].buf
.buffer
= (unsigned char *) xmalloc (100);
3129 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
3130 searchbufs
[i
].regexp
= Qnil
;
3131 searchbufs
[i
].whitespace_regexp
= Qnil
;
3132 searchbufs
[i
].syntax_table
= Qnil
;
3133 staticpro (&searchbufs
[i
].regexp
);
3134 staticpro (&searchbufs
[i
].whitespace_regexp
);
3135 staticpro (&searchbufs
[i
].syntax_table
);
3136 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
3138 searchbuf_head
= &searchbufs
[0];
3140 Qsearch_failed
= intern ("search-failed");
3141 staticpro (&Qsearch_failed
);
3142 Qinvalid_regexp
= intern ("invalid-regexp");
3143 staticpro (&Qinvalid_regexp
);
3145 Fput (Qsearch_failed
, Qerror_conditions
,
3146 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
3147 Fput (Qsearch_failed
, Qerror_message
,
3148 build_string ("Search failed"));
3150 Fput (Qinvalid_regexp
, Qerror_conditions
,
3151 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
3152 Fput (Qinvalid_regexp
, Qerror_message
,
3153 build_string ("Invalid regexp"));
3155 last_thing_searched
= Qnil
;
3156 staticpro (&last_thing_searched
);
3158 saved_last_thing_searched
= Qnil
;
3159 staticpro (&saved_last_thing_searched
);
3161 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp
,
3162 doc
: /* Regexp to substitute for bunches of spaces in regexp search.
3163 Some commands use this for user-specified regexps.
3164 Spaces that occur inside character classes or repetition operators
3165 or other such regexp constructs are not replaced with this.
3166 A value of nil (which is the normal value) means treat spaces literally. */);
3167 Vsearch_spaces_regexp
= Qnil
;
3169 defsubr (&Slooking_at
);
3170 defsubr (&Sposix_looking_at
);
3171 defsubr (&Sstring_match
);
3172 defsubr (&Sposix_string_match
);
3173 defsubr (&Ssearch_forward
);
3174 defsubr (&Ssearch_backward
);
3175 defsubr (&Sword_search_forward
);
3176 defsubr (&Sword_search_backward
);
3177 defsubr (&Sre_search_forward
);
3178 defsubr (&Sre_search_backward
);
3179 defsubr (&Sposix_search_forward
);
3180 defsubr (&Sposix_search_backward
);
3181 defsubr (&Sreplace_match
);
3182 defsubr (&Smatch_beginning
);
3183 defsubr (&Smatch_end
);
3184 defsubr (&Smatch_data
);
3185 defsubr (&Sset_match_data
);
3186 defsubr (&Sregexp_quote
);
3189 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3190 (do not change this comment) */