1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
28 #include "character.h"
30 #include "region-cache.h"
32 #include "blockinput.h"
33 #include "intervals.h"
35 #include <sys/types.h>
38 #define REGEXP_CACHE_SIZE 20
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
44 struct regexp_cache
*next
;
45 Lisp_Object regexp
, whitespace_regexp
;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table
;
50 struct re_pattern_buffer buf
;
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache
*searchbuf_head
;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs
;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched
;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp
;
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed
;
94 Lisp_Object Vsearch_spaces_regexp
;
96 static void set_search_regs ();
97 static void save_search_regs ();
98 static int simple_search ();
99 static int boyer_moore ();
100 static int search_buffer ();
101 static void matcher_overflow () NO_RETURN
;
106 error ("Stack overflow in regexp matcher");
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
120 The behavior also depends on Vsearch_spaces_regexp. */
123 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
)
124 struct regexp_cache
*cp
;
126 Lisp_Object translate
;
127 struct re_registers
*regp
;
134 cp
->buf
.translate
= (! NILP (translate
) ? translate
: make_number (0));
136 cp
->buf
.multibyte
= STRING_MULTIBYTE (pattern
);
137 cp
->buf
.charset_unibyte
= charset_unibyte
;
138 cp
->whitespace_regexp
= Vsearch_spaces_regexp
;
139 /* rms: I think BLOCK_INPUT is not needed here any more,
140 because regex.c defines malloc to call xmalloc.
141 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
142 So let's turn it off. */
144 old
= re_set_syntax (RE_SYNTAX_EMACS
145 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
146 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp
) ? NULL
147 : SDATA (Vsearch_spaces_regexp
));
149 val
= (char *) re_compile_pattern ((char *) SDATA (pattern
),
150 SBYTES (pattern
), &cp
->buf
);
152 /* If the compiled pattern hard codes some of the contents of the
153 syntax-table, it can only be reused with *this* syntax table. */
154 cp
->syntax_table
= cp
->buf
.used_syntax
? current_buffer
->syntax_table
: Qt
;
156 re_set_whitespace_regexp (NULL
);
161 xsignal1 (Qinvalid_regexp
, build_string (val
));
163 cp
->regexp
= Fcopy_sequence (pattern
);
166 /* Shrink each compiled regexp buffer in the cache
167 to the size actually used right now.
168 This is called from garbage collection. */
171 shrink_regexp_cache ()
173 struct regexp_cache
*cp
;
175 for (cp
= searchbuf_head
; cp
!= 0; cp
= cp
->next
)
177 cp
->buf
.allocated
= cp
->buf
.used
;
179 = (unsigned char *) xrealloc (cp
->buf
.buffer
, cp
->buf
.used
);
183 /* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
189 clear_regexp_cache ()
193 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
194 /* It's tempting to compare with the syntax-table we've actually changd,
195 but it's not sufficient because char-table inheritance mewans that
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs
[i
].syntax_table
, Qt
))
198 searchbufs
[i
].regexp
= Qnil
;
201 /* Compile a regexp if necessary, but first check to see if there's one in
203 PATTERN is the pattern to compile.
204 TRANSLATE is a translation table for ignoring case, or nil for none.
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
209 POSIX is nonzero if we want full backtracking (POSIX style)
210 for this pattern. 0 means backtrack only enough to get a valid match. */
212 struct re_pattern_buffer
*
213 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
215 struct re_registers
*regp
;
216 Lisp_Object translate
;
217 int posix
, multibyte
;
219 struct regexp_cache
*cp
, **cpp
;
221 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
224 /* Entries are initialized to nil, and may be set to nil by
225 compile_pattern_1 if the pattern isn't valid. Don't apply
226 string accessors in those cases. However, compile_pattern_1
227 is only applied to the cache entry we pick here to reuse. So
228 nil should never appear before a non-nil entry. */
229 if (NILP (cp
->regexp
))
231 if (SCHARS (cp
->regexp
) == SCHARS (pattern
)
232 && STRING_MULTIBYTE (cp
->regexp
) == STRING_MULTIBYTE (pattern
)
233 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
234 && EQ (cp
->buf
.translate
, (! NILP (translate
) ? translate
: make_number (0)))
235 && cp
->posix
== posix
236 && (EQ (cp
->syntax_table
, Qt
)
237 || EQ (cp
->syntax_table
, current_buffer
->syntax_table
))
238 && !NILP (Fequal (cp
->whitespace_regexp
, Vsearch_spaces_regexp
))
239 && cp
->buf
.charset_unibyte
== charset_unibyte
)
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
248 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
);
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
257 cp
->next
= searchbuf_head
;
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
263 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
265 /* The compiled pattern can be used both for mulitbyte and unibyte
266 target. But, we have to tell which the pattern is used for. */
267 cp
->buf
.target_multibyte
= multibyte
;
274 looking_at_1 (string
, posix
)
279 unsigned char *p1
, *p2
;
282 struct re_pattern_buffer
*bufp
;
284 if (running_asynch_code
)
287 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
288 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
289 = current_buffer
->case_eqv_table
;
291 CHECK_STRING (string
);
292 bufp
= compile_pattern (string
, &search_regs
,
293 (!NILP (current_buffer
->case_fold_search
)
294 ? current_buffer
->case_canon_table
: Qnil
),
296 !NILP (current_buffer
->enable_multibyte_characters
));
299 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
301 /* Get pointers and sizes of the two strings
302 that make up the visible portion of the buffer. */
305 s1
= GPT_BYTE
- BEGV_BYTE
;
307 s2
= ZV_BYTE
- GPT_BYTE
;
311 s2
= ZV_BYTE
- BEGV_BYTE
;
316 s1
= ZV_BYTE
- BEGV_BYTE
;
320 re_match_object
= Qnil
;
322 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
323 PT_BYTE
- BEGV_BYTE
, &search_regs
,
324 ZV_BYTE
- BEGV_BYTE
);
330 val
= (0 <= i
? Qt
: Qnil
);
332 for (i
= 0; i
< search_regs
.num_regs
; i
++)
333 if (search_regs
.start
[i
] >= 0)
336 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
338 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
340 XSETBUFFER (last_thing_searched
, current_buffer
);
344 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
345 doc
: /* Return t if text after point matches regular expression REGEXP.
346 This function modifies the match data that `match-beginning',
347 `match-end' and `match-data' access; save and restore the match
348 data if you want to preserve them. */)
352 return looking_at_1 (regexp
, 0);
355 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
356 doc
: /* Return t if text after point matches regular expression REGEXP.
357 Find the longest match, in accord with Posix regular expression rules.
358 This function modifies the match data that `match-beginning',
359 `match-end' and `match-data' access; save and restore the match
360 data if you want to preserve them. */)
364 return looking_at_1 (regexp
, 1);
368 string_match_1 (regexp
, string
, start
, posix
)
369 Lisp_Object regexp
, string
, start
;
373 struct re_pattern_buffer
*bufp
;
377 if (running_asynch_code
)
380 CHECK_STRING (regexp
);
381 CHECK_STRING (string
);
384 pos
= 0, pos_byte
= 0;
387 int len
= SCHARS (string
);
389 CHECK_NUMBER (start
);
391 if (pos
< 0 && -pos
<= len
)
393 else if (0 > pos
|| pos
> len
)
394 args_out_of_range (string
, start
);
395 pos_byte
= string_char_to_byte (string
, pos
);
398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
399 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
400 = current_buffer
->case_eqv_table
;
402 bufp
= compile_pattern (regexp
, &search_regs
,
403 (!NILP (current_buffer
->case_fold_search
)
404 ? current_buffer
->case_canon_table
: Qnil
),
406 STRING_MULTIBYTE (string
));
408 re_match_object
= string
;
410 val
= re_search (bufp
, (char *) SDATA (string
),
411 SBYTES (string
), pos_byte
,
412 SBYTES (string
) - pos_byte
,
415 last_thing_searched
= Qt
;
418 if (val
< 0) return Qnil
;
420 for (i
= 0; i
< search_regs
.num_regs
; i
++)
421 if (search_regs
.start
[i
] >= 0)
424 = string_byte_to_char (string
, search_regs
.start
[i
]);
426 = string_byte_to_char (string
, search_regs
.end
[i
]);
429 return make_number (string_byte_to_char (string
, val
));
432 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
433 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
434 Matching ignores case if `case-fold-search' is non-nil.
435 If third arg START is non-nil, start search at that index in STRING.
436 For index of first char beyond the match, do (match-end 0).
437 `match-end' and `match-beginning' also give indices of substrings
438 matched by parenthesis constructs in the pattern.
440 You can use the function `match-string' to extract the substrings
441 matched by the parenthesis constructions in REGEXP. */)
442 (regexp
, string
, start
)
443 Lisp_Object regexp
, string
, start
;
445 return string_match_1 (regexp
, string
, start
, 0);
448 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
449 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
450 Find the longest match, in accord with Posix regular expression rules.
451 Case is ignored if `case-fold-search' is non-nil in the current buffer.
452 If third arg START is non-nil, start search at that index in STRING.
453 For index of first char beyond the match, do (match-end 0).
454 `match-end' and `match-beginning' also give indices of substrings
455 matched by parenthesis constructs in the pattern. */)
456 (regexp
, string
, start
)
457 Lisp_Object regexp
, string
, start
;
459 return string_match_1 (regexp
, string
, start
, 1);
462 /* Match REGEXP against STRING, searching all of STRING,
463 and return the index of the match, or negative on failure.
464 This does not clobber the match data. */
467 fast_string_match (regexp
, string
)
468 Lisp_Object regexp
, string
;
471 struct re_pattern_buffer
*bufp
;
473 bufp
= compile_pattern (regexp
, 0, Qnil
,
474 0, STRING_MULTIBYTE (string
));
476 re_match_object
= string
;
478 val
= re_search (bufp
, (char *) SDATA (string
),
485 /* Match REGEXP against STRING, searching all of STRING ignoring case,
486 and return the index of the match, or negative on failure.
487 This does not clobber the match data.
488 We assume that STRING contains single-byte characters. */
490 extern Lisp_Object Vascii_downcase_table
;
493 fast_c_string_match_ignore_case (regexp
, string
)
498 struct re_pattern_buffer
*bufp
;
499 int len
= strlen (string
);
501 regexp
= string_make_unibyte (regexp
);
502 re_match_object
= Qt
;
503 bufp
= compile_pattern (regexp
, 0,
504 Vascii_canon_table
, 0,
507 val
= re_search (bufp
, string
, len
, 0, len
, 0);
512 /* Like fast_string_match but ignore case. */
515 fast_string_match_ignore_case (regexp
, string
)
516 Lisp_Object regexp
, string
;
519 struct re_pattern_buffer
*bufp
;
521 bufp
= compile_pattern (regexp
, 0, Vascii_canon_table
,
522 0, STRING_MULTIBYTE (string
));
524 re_match_object
= string
;
526 val
= re_search (bufp
, (char *) SDATA (string
),
533 /* The newline cache: remembering which sections of text have no newlines. */
535 /* If the user has requested newline caching, make sure it's on.
536 Otherwise, make sure it's off.
537 This is our cheezy way of associating an action with the change of
538 state of a buffer-local variable. */
540 newline_cache_on_off (buf
)
543 if (NILP (buf
->cache_long_line_scans
))
545 /* It should be off. */
546 if (buf
->newline_cache
)
548 free_region_cache (buf
->newline_cache
);
549 buf
->newline_cache
= 0;
554 /* It should be on. */
555 if (buf
->newline_cache
== 0)
556 buf
->newline_cache
= new_region_cache ();
561 /* Search for COUNT instances of the character TARGET between START and END.
563 If COUNT is positive, search forwards; END must be >= START.
564 If COUNT is negative, search backwards for the -COUNTth instance;
565 END must be <= START.
566 If COUNT is zero, do anything you please; run rogue, for all I care.
568 If END is zero, use BEGV or ZV instead, as appropriate for the
569 direction indicated by COUNT.
571 If we find COUNT instances, set *SHORTAGE to zero, and return the
572 position past the COUNTth match. Note that for reverse motion
573 this is not the same as the usual convention for Emacs motion commands.
575 If we don't find COUNT instances before reaching END, set *SHORTAGE
576 to the number of TARGETs left unfound, and return END.
578 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
579 except when inside redisplay. */
582 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
589 struct region_cache
*newline_cache
;
600 if (! end
) end
= BEGV
;
603 newline_cache_on_off (current_buffer
);
604 newline_cache
= current_buffer
->newline_cache
;
609 immediate_quit
= allow_quit
;
614 /* Our innermost scanning loop is very simple; it doesn't know
615 about gaps, buffer ends, or the newline cache. ceiling is
616 the position of the last character before the next such
617 obstacle --- the last character the dumb search loop should
619 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
620 int start_byte
= CHAR_TO_BYTE (start
);
623 /* If we're looking for a newline, consult the newline cache
624 to see where we can avoid some scanning. */
625 if (target
== '\n' && newline_cache
)
629 while (region_cache_forward
630 (current_buffer
, newline_cache
, start_byte
, &next_change
))
631 start_byte
= next_change
;
632 immediate_quit
= allow_quit
;
634 /* START should never be after END. */
635 if (start_byte
> ceiling_byte
)
636 start_byte
= ceiling_byte
;
638 /* Now the text after start is an unknown region, and
639 next_change is the position of the next known region. */
640 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
643 /* The dumb loop can only scan text stored in contiguous
644 bytes. BUFFER_CEILING_OF returns the last character
645 position that is contiguous, so the ceiling is the
646 position after that. */
647 tem
= BUFFER_CEILING_OF (start_byte
);
648 ceiling_byte
= min (tem
, ceiling_byte
);
651 /* The termination address of the dumb loop. */
652 register unsigned char *ceiling_addr
653 = BYTE_POS_ADDR (ceiling_byte
) + 1;
654 register unsigned char *cursor
655 = BYTE_POS_ADDR (start_byte
);
656 unsigned char *base
= cursor
;
658 while (cursor
< ceiling_addr
)
660 unsigned char *scan_start
= cursor
;
663 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
666 /* If we're looking for newlines, cache the fact that
667 the region from start to cursor is free of them. */
668 if (target
== '\n' && newline_cache
)
669 know_region_cache (current_buffer
, newline_cache
,
670 start_byte
+ scan_start
- base
,
671 start_byte
+ cursor
- base
);
673 /* Did we find the target character? */
674 if (cursor
< ceiling_addr
)
679 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
685 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
691 /* The last character to check before the next obstacle. */
692 int ceiling_byte
= CHAR_TO_BYTE (end
);
693 int start_byte
= CHAR_TO_BYTE (start
);
696 /* Consult the newline cache, if appropriate. */
697 if (target
== '\n' && newline_cache
)
701 while (region_cache_backward
702 (current_buffer
, newline_cache
, start_byte
, &next_change
))
703 start_byte
= next_change
;
704 immediate_quit
= allow_quit
;
706 /* Start should never be at or before end. */
707 if (start_byte
<= ceiling_byte
)
708 start_byte
= ceiling_byte
+ 1;
710 /* Now the text before start is an unknown region, and
711 next_change is the position of the next known region. */
712 ceiling_byte
= max (next_change
, ceiling_byte
);
715 /* Stop scanning before the gap. */
716 tem
= BUFFER_FLOOR_OF (start_byte
- 1);
717 ceiling_byte
= max (tem
, ceiling_byte
);
720 /* The termination address of the dumb loop. */
721 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
722 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
723 unsigned char *base
= cursor
;
725 while (cursor
>= ceiling_addr
)
727 unsigned char *scan_start
= cursor
;
729 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
732 /* If we're looking for newlines, cache the fact that
733 the region from after the cursor to start is free of them. */
734 if (target
== '\n' && newline_cache
)
735 know_region_cache (current_buffer
, newline_cache
,
736 start_byte
+ cursor
- base
,
737 start_byte
+ scan_start
- base
);
739 /* Did we find the target character? */
740 if (cursor
>= ceiling_addr
)
745 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
751 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
757 *shortage
= count
* direction
;
761 /* Search for COUNT instances of a line boundary, which means either a
762 newline or (if selective display enabled) a carriage return.
763 Start at START. If COUNT is negative, search backwards.
765 We report the resulting position by calling TEMP_SET_PT_BOTH.
767 If we find COUNT instances. we position after (always after,
768 even if scanning backwards) the COUNTth match, and return 0.
770 If we don't find COUNT instances before reaching the end of the
771 buffer (or the beginning, if scanning backwards), we return
772 the number of line boundaries left unfound, and position at
773 the limit we bumped up against.
775 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
776 except in special cases. */
779 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
780 int start
, start_byte
;
781 int limit
, limit_byte
;
785 int direction
= ((count
> 0) ? 1 : -1);
787 register unsigned char *cursor
;
790 register int ceiling
;
791 register unsigned char *ceiling_addr
;
793 int old_immediate_quit
= immediate_quit
;
795 /* The code that follows is like scan_buffer
796 but checks for either newline or carriage return. */
801 start_byte
= CHAR_TO_BYTE (start
);
805 while (start_byte
< limit_byte
)
807 ceiling
= BUFFER_CEILING_OF (start_byte
);
808 ceiling
= min (limit_byte
- 1, ceiling
);
809 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
810 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
813 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
816 if (cursor
!= ceiling_addr
)
820 immediate_quit
= old_immediate_quit
;
821 start_byte
= start_byte
+ cursor
- base
+ 1;
822 start
= BYTE_TO_CHAR (start_byte
);
823 TEMP_SET_PT_BOTH (start
, start_byte
);
827 if (++cursor
== ceiling_addr
)
833 start_byte
+= cursor
- base
;
838 while (start_byte
> limit_byte
)
840 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
841 ceiling
= max (limit_byte
, ceiling
);
842 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
843 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
846 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
849 if (cursor
!= ceiling_addr
)
853 immediate_quit
= old_immediate_quit
;
854 /* Return the position AFTER the match we found. */
855 start_byte
= start_byte
+ cursor
- base
+ 1;
856 start
= BYTE_TO_CHAR (start_byte
);
857 TEMP_SET_PT_BOTH (start
, start_byte
);
864 /* Here we add 1 to compensate for the last decrement
865 of CURSOR, which took it past the valid range. */
866 start_byte
+= cursor
- base
+ 1;
870 TEMP_SET_PT_BOTH (limit
, limit_byte
);
871 immediate_quit
= old_immediate_quit
;
873 return count
* direction
;
877 find_next_newline_no_quit (from
, cnt
)
878 register int from
, cnt
;
880 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
883 /* Like find_next_newline, but returns position before the newline,
884 not after, and only search up to TO. This isn't just
885 find_next_newline (...)-1, because you might hit TO. */
888 find_before_next_newline (from
, to
, cnt
)
892 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
900 /* Subroutines of Lisp buffer search functions. */
903 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
904 Lisp_Object string
, bound
, noerror
, count
;
915 CHECK_NUMBER (count
);
919 CHECK_STRING (string
);
923 lim
= ZV
, lim_byte
= ZV_BYTE
;
925 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
929 CHECK_NUMBER_COERCE_MARKER (bound
);
931 if (n
> 0 ? lim
< PT
: lim
> PT
)
932 error ("Invalid search bound (wrong side of point)");
934 lim
= ZV
, lim_byte
= ZV_BYTE
;
936 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
938 lim_byte
= CHAR_TO_BYTE (lim
);
941 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
942 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
943 = current_buffer
->case_eqv_table
;
945 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
946 (!NILP (current_buffer
->case_fold_search
)
947 ? current_buffer
->case_canon_table
949 (!NILP (current_buffer
->case_fold_search
)
950 ? current_buffer
->case_eqv_table
956 xsignal1 (Qsearch_failed
, string
);
958 if (!EQ (noerror
, Qt
))
960 if (lim
< BEGV
|| lim
> ZV
)
962 SET_PT_BOTH (lim
, lim_byte
);
964 #if 0 /* This would be clean, but maybe programs depend on
965 a value of nil here. */
973 if (np
< BEGV
|| np
> ZV
)
978 return make_number (np
);
981 /* Return 1 if REGEXP it matches just one constant string. */
984 trivial_regexp_p (regexp
)
987 int len
= SBYTES (regexp
);
988 unsigned char *s
= SDATA (regexp
);
993 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1000 case '|': case '(': case ')': case '`': case '\'': case 'b':
1001 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1002 case 'S': case '=': case '{': case '}': case '_':
1003 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1004 case '1': case '2': case '3': case '4': case '5':
1005 case '6': case '7': case '8': case '9':
1013 /* Search for the n'th occurrence of STRING in the current buffer,
1014 starting at position POS and stopping at position LIM,
1015 treating STRING as a literal string if RE is false or as
1016 a regular expression if RE is true.
1018 If N is positive, searching is forward and LIM must be greater than POS.
1019 If N is negative, searching is backward and LIM must be less than POS.
1021 Returns -x if x occurrences remain to be found (x > 0),
1022 or else the position at the beginning of the Nth occurrence
1023 (if searching backward) or the end (if searching forward).
1025 POSIX is nonzero if we want full backtracking (POSIX style)
1026 for this pattern. 0 means backtrack only enough to get a valid match. */
1028 #define TRANSLATE(out, trt, d) \
1034 temp = Faref (trt, make_number (d)); \
1035 if (INTEGERP (temp)) \
1036 out = XINT (temp); \
1046 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
1047 RE
, trt
, inverse_trt
, posix
)
1056 Lisp_Object inverse_trt
;
1059 int len
= SCHARS (string
);
1060 int len_byte
= SBYTES (string
);
1063 if (running_asynch_code
)
1064 save_search_regs ();
1066 /* Searching 0 times means don't move. */
1067 /* Null string is found at starting position. */
1068 if (len
== 0 || n
== 0)
1070 set_search_regs (pos_byte
, 0);
1074 if (RE
&& !(trivial_regexp_p (string
) && NILP (Vsearch_spaces_regexp
)))
1076 unsigned char *p1
, *p2
;
1078 struct re_pattern_buffer
*bufp
;
1080 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
,
1081 !NILP (current_buffer
->enable_multibyte_characters
));
1083 immediate_quit
= 1; /* Quit immediately if user types ^G,
1084 because letting this function finish
1085 can take too long. */
1086 QUIT
; /* Do a pending quit right away,
1087 to avoid paradoxical behavior */
1088 /* Get pointers and sizes of the two strings
1089 that make up the visible portion of the buffer. */
1092 s1
= GPT_BYTE
- BEGV_BYTE
;
1094 s2
= ZV_BYTE
- GPT_BYTE
;
1098 s2
= ZV_BYTE
- BEGV_BYTE
;
1103 s1
= ZV_BYTE
- BEGV_BYTE
;
1106 re_match_object
= Qnil
;
1111 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1112 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1114 /* Don't allow match past current point */
1115 pos_byte
- BEGV_BYTE
);
1118 matcher_overflow ();
1122 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1123 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1124 if (search_regs
.start
[i
] >= 0)
1126 search_regs
.start
[i
]
1127 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1129 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1131 XSETBUFFER (last_thing_searched
, current_buffer
);
1132 /* Set pos to the new position. */
1133 pos
= search_regs
.start
[0];
1145 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1146 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1148 lim_byte
- BEGV_BYTE
);
1151 matcher_overflow ();
1155 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1156 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1157 if (search_regs
.start
[i
] >= 0)
1159 search_regs
.start
[i
]
1160 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1162 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1164 XSETBUFFER (last_thing_searched
, current_buffer
);
1165 pos
= search_regs
.end
[0];
1177 else /* non-RE case */
1179 unsigned char *raw_pattern
, *pat
;
1180 int raw_pattern_size
;
1181 int raw_pattern_size_byte
;
1182 unsigned char *patbuf
;
1183 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1184 unsigned char *base_pat
;
1185 /* Set to positive if we find a non-ASCII char that need
1186 translation. Otherwise set to zero later. */
1188 int boyer_moore_ok
= 1;
1190 /* MULTIBYTE says whether the text to be searched is multibyte.
1191 We must convert PATTERN to match that, or we will not really
1192 find things right. */
1194 if (multibyte
== STRING_MULTIBYTE (string
))
1196 raw_pattern
= (unsigned char *) SDATA (string
);
1197 raw_pattern_size
= SCHARS (string
);
1198 raw_pattern_size_byte
= SBYTES (string
);
1202 raw_pattern_size
= SCHARS (string
);
1203 raw_pattern_size_byte
1204 = count_size_as_multibyte (SDATA (string
),
1206 raw_pattern
= (unsigned char *) alloca (raw_pattern_size_byte
+ 1);
1207 copy_text (SDATA (string
), raw_pattern
,
1208 SCHARS (string
), 0, 1);
1212 /* Converting multibyte to single-byte.
1214 ??? Perhaps this conversion should be done in a special way
1215 by subtracting nonascii-insert-offset from each non-ASCII char,
1216 so that only the multibyte chars which really correspond to
1217 the chosen single-byte character set can possibly match. */
1218 raw_pattern_size
= SCHARS (string
);
1219 raw_pattern_size_byte
= SCHARS (string
);
1220 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
1221 copy_text (SDATA (string
), raw_pattern
,
1222 SBYTES (string
), 1, 0);
1225 /* Copy and optionally translate the pattern. */
1226 len
= raw_pattern_size
;
1227 len_byte
= raw_pattern_size_byte
;
1228 patbuf
= (unsigned char *) alloca (len
* MAX_MULTIBYTE_LENGTH
);
1230 base_pat
= raw_pattern
;
1233 /* Fill patbuf by translated characters in STRING while
1234 checking if we can use boyer-moore search. If TRT is
1235 non-nil, we can use boyer-moore search only if TRT can be
1236 represented by the byte array of 256 elements. For that,
1237 all non-ASCII case-equivalents of all case-senstive
1238 characters in STRING must belong to the same charset and
1243 unsigned char str_base
[MAX_MULTIBYTE_LENGTH
], *str
;
1244 int c
, translated
, inverse
;
1245 int in_charlen
, charlen
;
1247 /* If we got here and the RE flag is set, it's because we're
1248 dealing with a regexp known to be trivial, so the backslash
1249 just quotes the next character. */
1250 if (RE
&& *base_pat
== '\\')
1258 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1263 charlen
= in_charlen
;
1267 /* Translate the character. */
1268 TRANSLATE (translated
, trt
, c
);
1269 charlen
= CHAR_STRING (translated
, str_base
);
1272 /* Check if C has any other case-equivalents. */
1273 TRANSLATE (inverse
, inverse_trt
, c
);
1274 /* If so, check if we can use boyer-moore. */
1275 if (c
!= inverse
&& boyer_moore_ok
)
1277 /* Check if all equivalents belong to the same
1278 group of characters. Note that the check of C
1279 itself is done by the last iteration. */
1280 int this_char_base
= -1;
1282 while (boyer_moore_ok
)
1284 if (ASCII_BYTE_P (inverse
))
1286 if (this_char_base
> 0)
1292 char_base
= this_char_base
;
1295 else if (CHAR_BYTE8_P (inverse
))
1296 /* Boyer-moore search can't handle a
1297 translation of an eight-bit
1300 else if (this_char_base
< 0)
1302 this_char_base
= inverse
& ~0x3F;
1304 char_base
= this_char_base
;
1305 else if (char_base
> 0
1306 && this_char_base
!= char_base
)
1309 else if ((inverse
& ~0x3F) != this_char_base
)
1313 TRANSLATE (inverse
, inverse_trt
, inverse
);
1320 /* Store this character into the translated pattern. */
1321 bcopy (str
, pat
, charlen
);
1323 base_pat
+= in_charlen
;
1324 len_byte
-= in_charlen
;
1329 /* Unibyte buffer. */
1335 /* If we got here and the RE flag is set, it's because we're
1336 dealing with a regexp known to be trivial, so the backslash
1337 just quotes the next character. */
1338 if (RE
&& *base_pat
== '\\')
1345 TRANSLATE (translated
, trt
, c
);
1346 *pat
++ = translated
;
1350 len_byte
= pat
- patbuf
;
1351 len
= raw_pattern_size
;
1352 pat
= base_pat
= patbuf
;
1355 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1356 pos
, pos_byte
, lim
, lim_byte
,
1359 return simple_search (n
, pat
, len
, len_byte
, trt
,
1360 pos
, pos_byte
, lim
, lim_byte
);
1364 /* Do a simple string search N times for the string PAT,
1365 whose length is LEN/LEN_BYTE,
1366 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1367 TRT is the translation table.
1369 Return the character position where the match is found.
1370 Otherwise, if M matches remained to be found, return -M.
1372 This kind of search works regardless of what is in PAT and
1373 regardless of what is in TRT. It is used in cases where
1374 boyer_moore cannot work. */
1377 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1385 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1386 int forward
= n
> 0;
1387 /* Number of buffer bytes matched. Note that this may be different
1388 from len_byte in a multibyte buffer. */
1391 if (lim
> pos
&& multibyte
)
1396 /* Try matching at position POS. */
1398 int this_pos_byte
= pos_byte
;
1400 int this_len_byte
= len_byte
;
1401 unsigned char *p
= pat
;
1402 if (pos
+ len
> lim
|| pos_byte
+ len_byte
> lim_byte
)
1405 while (this_len
> 0)
1407 int charlen
, buf_charlen
;
1410 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1411 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1412 ZV_BYTE
- this_pos_byte
,
1414 TRANSLATE (buf_ch
, trt
, buf_ch
);
1416 if (buf_ch
!= pat_ch
)
1419 this_len_byte
-= charlen
;
1423 this_pos_byte
+= buf_charlen
;
1429 match_byte
= this_pos_byte
- pos_byte
;
1431 pos_byte
+= match_byte
;
1435 INC_BOTH (pos
, pos_byte
);
1445 /* Try matching at position POS. */
1448 unsigned char *p
= pat
;
1450 if (pos
+ len
> lim
)
1453 while (this_len
> 0)
1456 int buf_ch
= FETCH_BYTE (this_pos
);
1457 TRANSLATE (buf_ch
, trt
, buf_ch
);
1459 if (buf_ch
!= pat_ch
)
1478 /* Backwards search. */
1479 else if (lim
< pos
&& multibyte
)
1484 /* Try matching at position POS. */
1485 int this_pos
= pos
- len
;
1488 int this_len_byte
= len_byte
;
1489 unsigned char *p
= pat
;
1491 if (this_pos
< lim
|| (pos_byte
- len_byte
) < lim_byte
)
1493 this_pos_byte
= CHAR_TO_BYTE (this_pos
);
1494 match_byte
= pos_byte
- this_pos_byte
;
1496 while (this_len
> 0)
1498 int charlen
, buf_charlen
;
1501 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1502 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1503 ZV_BYTE
- this_pos_byte
,
1505 TRANSLATE (buf_ch
, trt
, buf_ch
);
1507 if (buf_ch
!= pat_ch
)
1510 this_len_byte
-= charlen
;
1513 this_pos_byte
+= buf_charlen
;
1520 pos_byte
-= match_byte
;
1524 DEC_BOTH (pos
, pos_byte
);
1534 /* Try matching at position POS. */
1535 int this_pos
= pos
- len
;
1537 unsigned char *p
= pat
;
1542 while (this_len
> 0)
1545 int buf_ch
= FETCH_BYTE (this_pos
);
1546 TRANSLATE (buf_ch
, trt
, buf_ch
);
1548 if (buf_ch
!= pat_ch
)
1571 set_search_regs ((multibyte
? pos_byte
: pos
) - match_byte
, match_byte
);
1573 set_search_regs (multibyte
? pos_byte
: pos
, match_byte
);
1583 /* Do Boyer-Moore search N times for the string BASE_PAT,
1584 whose length is LEN/LEN_BYTE,
1585 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1586 DIRECTION says which direction we search in.
1587 TRT and INVERSE_TRT are translation tables.
1588 Characters in PAT are already translated by TRT.
1590 This kind of search works if all the characters in BASE_PAT that
1591 have nontrivial translation are the same aside from the last byte.
1592 This makes it possible to translate just the last byte of a
1593 character, and do so after just a simple test of the context.
1594 CHAR_BASE is nonzero iff there is such a non-ASCII character.
1596 If that criterion is not satisfied, do not call this function. */
1599 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1600 pos
, pos_byte
, lim
, lim_byte
, char_base
)
1602 unsigned char *base_pat
;
1605 Lisp_Object inverse_trt
;
1610 int direction
= ((n
> 0) ? 1 : -1);
1611 register int dirlen
;
1612 int infinity
, limit
, stride_for_teases
= 0;
1613 register int *BM_tab
;
1615 register unsigned char *cursor
, *p_limit
;
1617 unsigned char *pat
, *pat_end
;
1618 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1620 unsigned char simple_translate
[0400];
1621 /* These are set to the preceding bytes of a byte to be translated
1622 if char_base is nonzero. As the maximum byte length of a
1623 multibyte character is 5, we have to check at most four previous
1625 int translate_prev_byte1
= 0;
1626 int translate_prev_byte2
= 0;
1627 int translate_prev_byte3
= 0;
1628 int translate_prev_byte4
= 0;
1631 int BM_tab_space
[0400];
1632 BM_tab
= &BM_tab_space
[0];
1634 BM_tab
= (int *) alloca (0400 * sizeof (int));
1636 /* The general approach is that we are going to maintain that we know */
1637 /* the first (closest to the present position, in whatever direction */
1638 /* we're searching) character that could possibly be the last */
1639 /* (furthest from present position) character of a valid match. We */
1640 /* advance the state of our knowledge by looking at that character */
1641 /* and seeing whether it indeed matches the last character of the */
1642 /* pattern. If it does, we take a closer look. If it does not, we */
1643 /* move our pointer (to putative last characters) as far as is */
1644 /* logically possible. This amount of movement, which I call a */
1645 /* stride, will be the length of the pattern if the actual character */
1646 /* appears nowhere in the pattern, otherwise it will be the distance */
1647 /* from the last occurrence of that character to the end of the */
1649 /* As a coding trick, an enormous stride is coded into the table for */
1650 /* characters that match the last character. This allows use of only */
1651 /* a single test, a test for having gone past the end of the */
1652 /* permissible match region, to test for both possible matches (when */
1653 /* the stride goes past the end immediately) and failure to */
1654 /* match (where you get nudged past the end one stride at a time). */
1656 /* Here we make a "mickey mouse" BM table. The stride of the search */
1657 /* is determined only by the last character of the putative match. */
1658 /* If that character does not match, we will stride the proper */
1659 /* distance to propose a match that superimposes it on the last */
1660 /* instance of a character that matches it (per trt), or misses */
1661 /* it entirely if there is none. */
1663 dirlen
= len_byte
* direction
;
1664 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1666 /* Record position after the end of the pattern. */
1667 pat_end
= base_pat
+ len_byte
;
1668 /* BASE_PAT points to a character that we start scanning from.
1669 It is the first character in a forward search,
1670 the last character in a backward search. */
1672 base_pat
= pat_end
- 1;
1674 BM_tab_base
= BM_tab
;
1676 j
= dirlen
; /* to get it in a register */
1677 /* A character that does not appear in the pattern induces a */
1678 /* stride equal to the pattern length. */
1679 while (BM_tab_base
!= BM_tab
)
1687 /* We use this for translation, instead of TRT itself.
1688 We fill this in to handle the characters that actually
1689 occur in the pattern. Others don't matter anyway! */
1690 bzero (simple_translate
, sizeof simple_translate
);
1691 for (i
= 0; i
< 0400; i
++)
1692 simple_translate
[i
] = i
;
1696 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1697 byte following them are the target of translation. */
1698 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
1699 int len
= CHAR_STRING (char_base
, str
);
1701 translate_prev_byte1
= str
[len
- 2];
1704 translate_prev_byte2
= str
[len
- 3];
1707 translate_prev_byte3
= str
[len
- 4];
1709 translate_prev_byte4
= str
[len
- 5];
1715 while (i
!= infinity
)
1717 unsigned char *ptr
= base_pat
+ i
;
1723 /* If the byte currently looking at is the last of a
1724 character to check case-equivalents, set CH to that
1725 character. An ASCII character and a non-ASCII character
1726 matching with CHAR_BASE are to be checked. */
1729 if (ASCII_BYTE_P (*ptr
) || ! multibyte
)
1732 && ((pat_end
- ptr
) == 1 || CHAR_HEAD_P (ptr
[1])))
1734 unsigned char *charstart
= ptr
- 1;
1736 while (! (CHAR_HEAD_P (*charstart
)))
1738 ch
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1739 if (char_base
!= (ch
& ~0x3F))
1744 j
= (ch
& 0x3F) | 0200;
1749 stride_for_teases
= BM_tab
[j
];
1751 BM_tab
[j
] = dirlen
- i
;
1752 /* A translation table is accompanied by its inverse -- see */
1753 /* comment following downcase_table for details */
1756 int starting_ch
= ch
;
1761 TRANSLATE (ch
, inverse_trt
, ch
);
1763 j
= (ch
& 0x3F) | 0200;
1767 /* For all the characters that map into CH,
1768 set up simple_translate to map the last byte
1770 simple_translate
[j
] = starting_j
;
1771 if (ch
== starting_ch
)
1773 BM_tab
[j
] = dirlen
- i
;
1782 stride_for_teases
= BM_tab
[j
];
1783 BM_tab
[j
] = dirlen
- i
;
1785 /* stride_for_teases tells how much to stride if we get a */
1786 /* match on the far character but are subsequently */
1787 /* disappointed, by recording what the stride would have been */
1788 /* for that character if the last character had been */
1791 infinity
= dirlen
- infinity
;
1792 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1793 /* loop invariant - POS_BYTE points at where last char (first
1794 char if reverse) of pattern would align in a possible match. */
1798 unsigned char *tail_end_ptr
;
1800 /* It's been reported that some (broken) compiler thinks that
1801 Boolean expressions in an arithmetic context are unsigned.
1802 Using an explicit ?1:0 prevents this. */
1803 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1805 return (n
* (0 - direction
));
1806 /* First we do the part we can by pointers (maybe nothing) */
1809 limit
= pos_byte
- dirlen
+ direction
;
1812 limit
= BUFFER_CEILING_OF (limit
);
1813 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1814 can take on without hitting edge of buffer or the gap. */
1815 limit
= min (limit
, pos_byte
+ 20000);
1816 limit
= min (limit
, lim_byte
- 1);
1820 limit
= BUFFER_FLOOR_OF (limit
);
1821 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1822 can take on without hitting edge of buffer or the gap. */
1823 limit
= max (limit
, pos_byte
- 20000);
1824 limit
= max (limit
, lim_byte
);
1826 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1827 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1829 if ((limit
- pos_byte
) * direction
> 20)
1833 p_limit
= BYTE_POS_ADDR (limit
);
1834 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1835 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1836 while (1) /* use one cursor setting as long as i can */
1838 if (direction
> 0) /* worth duplicating */
1840 /* Use signed comparison if appropriate
1841 to make cursor+infinity sure to be > p_limit.
1842 Assuming that the buffer lies in a range of addresses
1843 that are all "positive" (as ints) or all "negative",
1844 either kind of comparison will work as long
1845 as we don't step by infinity. So pick the kind
1846 that works when we do step by infinity. */
1847 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1848 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1849 cursor
+= BM_tab
[*cursor
];
1851 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1852 cursor
+= BM_tab
[*cursor
];
1856 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1857 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1858 cursor
+= BM_tab
[*cursor
];
1860 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1861 cursor
+= BM_tab
[*cursor
];
1863 /* If you are here, cursor is beyond the end of the searched region. */
1864 /* This can happen if you match on the far character of the pattern, */
1865 /* because the "stride" of that character is infinity, a number able */
1866 /* to throw you well beyond the end of the search. It can also */
1867 /* happen if you fail to match within the permitted region and would */
1868 /* otherwise try a character beyond that region */
1869 if ((cursor
- p_limit
) * direction
<= len_byte
)
1870 break; /* a small overrun is genuine */
1871 cursor
-= infinity
; /* large overrun = hit */
1872 i
= dirlen
- direction
;
1875 while ((i
-= direction
) + direction
!= 0)
1878 cursor
-= direction
;
1879 /* Translate only the last byte of a character. */
1881 || ((cursor
== tail_end_ptr
1882 || CHAR_HEAD_P (cursor
[1]))
1883 && (CHAR_HEAD_P (cursor
[0])
1884 /* Check if this is the last byte of
1885 a translable character. */
1886 || (translate_prev_byte1
== cursor
[-1]
1887 && (CHAR_HEAD_P (translate_prev_byte1
)
1888 || (translate_prev_byte2
== cursor
[-2]
1889 && (CHAR_HEAD_P (translate_prev_byte2
)
1890 || (translate_prev_byte3
== cursor
[-3]))))))))
1891 ch
= simple_translate
[*cursor
];
1900 while ((i
-= direction
) + direction
!= 0)
1902 cursor
-= direction
;
1903 if (pat
[i
] != *cursor
)
1907 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1908 if (i
+ direction
== 0)
1912 cursor
-= direction
;
1914 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1915 ? 1 - len_byte
: 0);
1916 set_search_regs (position
, len_byte
);
1918 if ((n
-= direction
) != 0)
1919 cursor
+= dirlen
; /* to resume search */
1921 return ((direction
> 0)
1922 ? search_regs
.end
[0] : search_regs
.start
[0]);
1925 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1927 pos_byte
+= cursor
- p2
;
1930 /* Now we'll pick up a clump that has to be done the hard */
1931 /* way because it covers a discontinuity */
1933 limit
= ((direction
> 0)
1934 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1935 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1936 limit
= ((direction
> 0)
1937 ? min (limit
+ len_byte
, lim_byte
- 1)
1938 : max (limit
- len_byte
, lim_byte
));
1939 /* LIMIT is now the last value POS_BYTE can have
1940 and still be valid for a possible match. */
1943 /* This loop can be coded for space rather than */
1944 /* speed because it will usually run only once. */
1945 /* (the reach is at most len + 21, and typically */
1946 /* does not exceed len) */
1947 while ((limit
- pos_byte
) * direction
>= 0)
1948 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1949 /* now run the same tests to distinguish going off the */
1950 /* end, a match or a phony match. */
1951 if ((pos_byte
- limit
) * direction
<= len_byte
)
1952 break; /* ran off the end */
1953 /* Found what might be a match.
1954 Set POS_BYTE back to last (first if reverse) pos. */
1955 pos_byte
-= infinity
;
1956 i
= dirlen
- direction
;
1957 while ((i
-= direction
) + direction
!= 0)
1961 pos_byte
-= direction
;
1962 ptr
= BYTE_POS_ADDR (pos_byte
);
1963 /* Translate only the last byte of a character. */
1965 || ((ptr
== tail_end_ptr
1966 || CHAR_HEAD_P (ptr
[1]))
1967 && (CHAR_HEAD_P (ptr
[0])
1968 /* Check if this is the last byte of a
1969 translable character. */
1970 || (translate_prev_byte1
== ptr
[-1]
1971 && (CHAR_HEAD_P (translate_prev_byte1
)
1972 || (translate_prev_byte2
== ptr
[-2]
1973 && (CHAR_HEAD_P (translate_prev_byte2
)
1974 || translate_prev_byte3
== ptr
[-3])))))))
1975 ch
= simple_translate
[*ptr
];
1981 /* Above loop has moved POS_BYTE part or all the way
1982 back to the first pos (last pos if reverse).
1983 Set it once again at the last (first if reverse) char. */
1984 pos_byte
+= dirlen
- i
- direction
;
1985 if (i
+ direction
== 0)
1988 pos_byte
-= direction
;
1990 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
1992 set_search_regs (position
, len_byte
);
1994 if ((n
-= direction
) != 0)
1995 pos_byte
+= dirlen
; /* to resume search */
1997 return ((direction
> 0)
1998 ? search_regs
.end
[0] : search_regs
.start
[0]);
2001 pos_byte
+= stride_for_teases
;
2004 /* We have done one clump. Can we continue? */
2005 if ((lim_byte
- pos_byte
) * direction
< 0)
2006 return ((0 - n
) * direction
);
2008 return BYTE_TO_CHAR (pos_byte
);
2011 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2012 for the overall match just found in the current buffer.
2013 Also clear out the match data for registers 1 and up. */
2016 set_search_regs (beg_byte
, nbytes
)
2017 int beg_byte
, nbytes
;
2021 /* Make sure we have registers in which to store
2022 the match position. */
2023 if (search_regs
.num_regs
== 0)
2025 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2026 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2027 search_regs
.num_regs
= 2;
2030 /* Clear out the other registers. */
2031 for (i
= 1; i
< search_regs
.num_regs
; i
++)
2033 search_regs
.start
[i
] = -1;
2034 search_regs
.end
[i
] = -1;
2037 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
2038 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
2039 XSETBUFFER (last_thing_searched
, current_buffer
);
2042 /* Given a string of words separated by word delimiters,
2043 compute a regexp that matches those exact words
2044 separated by arbitrary punctuation. */
2050 register unsigned char *p
, *o
;
2051 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
2056 CHECK_STRING (string
);
2058 len
= SCHARS (string
);
2060 for (i
= 0, i_byte
= 0; i
< len
; )
2064 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c
, string
, i
, i_byte
);
2066 if (SYNTAX (c
) != Sword
)
2069 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
2076 if (SYNTAX (prev_c
) == Sword
)
2079 return empty_unibyte_string
;
2081 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
2082 if (STRING_MULTIBYTE (string
))
2083 val
= make_uninit_multibyte_string (len
+ adjust
,
2087 val
= make_uninit_string (len
+ adjust
);
2094 for (i
= 0, i_byte
= 0; i
< len
; )
2097 int i_byte_orig
= i_byte
;
2099 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c
, string
, i
, i_byte
);
2101 if (SYNTAX (c
) == Sword
)
2103 bcopy (SDATA (string
) + i_byte_orig
, o
,
2104 i_byte
- i_byte_orig
);
2105 o
+= i_byte
- i_byte_orig
;
2107 else if (i
> 0 && SYNTAX (prev_c
) == Sword
&& --word_count
)
2125 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
2126 "MSearch backward: ",
2127 doc
: /* Search backward from point for STRING.
2128 Set point to the beginning of the occurrence found, and return point.
2129 An optional second argument bounds the search; it is a buffer position.
2130 The match found must not extend before that position.
2131 Optional third argument, if t, means if fail just return nil (no error).
2132 If not nil and not t, position at limit of search and return nil.
2133 Optional fourth argument is repeat count--search for successive occurrences.
2135 Search case-sensitivity is determined by the value of the variable
2136 `case-fold-search', which see.
2138 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2139 (string
, bound
, noerror
, count
)
2140 Lisp_Object string
, bound
, noerror
, count
;
2142 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
2145 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
2146 doc
: /* Search forward from point for STRING.
2147 Set point to the end of the occurrence found, and return point.
2148 An optional second argument bounds the search; it is a buffer position.
2149 The match found must not extend after that position. A value of nil is
2150 equivalent to (point-max).
2151 Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, move to limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.
2155 Search case-sensitivity is determined by the value of the variable
2156 `case-fold-search', which see.
2158 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2159 (string
, bound
, noerror
, count
)
2160 Lisp_Object string
, bound
, noerror
, count
;
2162 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2165 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2166 "sWord search backward: ",
2167 doc
: /* Search backward from point for STRING, ignoring differences in punctuation.
2168 Set point to the beginning of the occurrence found, and return point.
2169 An optional second argument bounds the search; it is a buffer position.
2170 The match found must not extend before that position.
2171 Optional third argument, if t, means if fail just return nil (no error).
2172 If not nil and not t, move to limit of search and return nil.
2173 Optional fourth argument is repeat count--search for successive occurrences. */)
2174 (string
, bound
, noerror
, count
)
2175 Lisp_Object string
, bound
, noerror
, count
;
2177 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2180 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2182 doc
: /* Search forward from point for STRING, ignoring differences in punctuation.
2183 Set point to the end of the occurrence found, and return point.
2184 An optional second argument bounds the search; it is a buffer position.
2185 The match found must not extend after that position.
2186 Optional third argument, if t, means if fail just return nil (no error).
2187 If not nil and not t, move to limit of search and return nil.
2188 Optional fourth argument is repeat count--search for successive occurrences. */)
2189 (string
, bound
, noerror
, count
)
2190 Lisp_Object string
, bound
, noerror
, count
;
2192 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2195 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2196 "sRE search backward: ",
2197 doc
: /* Search backward from point for match for regular expression REGEXP.
2198 Set point to the beginning of the match, and return point.
2199 The match found is the one starting last in the buffer
2200 and yet ending before the origin of the search.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must start at or after that position.
2203 Optional third argument, if t, means if fail just return nil (no error).
2204 If not nil and not t, move to limit of search and return nil.
2205 Optional fourth argument is repeat count--search for successive occurrences.
2206 See also the functions `match-beginning', `match-end', `match-string',
2207 and `replace-match'. */)
2208 (regexp
, bound
, noerror
, count
)
2209 Lisp_Object regexp
, bound
, noerror
, count
;
2211 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2214 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2216 doc
: /* Search forward from point for regular expression REGEXP.
2217 Set point to the end of the occurrence found, and return point.
2218 An optional second argument bounds the search; it is a buffer position.
2219 The match found must not extend after that position.
2220 Optional third argument, if t, means if fail just return nil (no error).
2221 If not nil and not t, move to limit of search and return nil.
2222 Optional fourth argument is repeat count--search for successive occurrences.
2223 See also the functions `match-beginning', `match-end', `match-string',
2224 and `replace-match'. */)
2225 (regexp
, bound
, noerror
, count
)
2226 Lisp_Object regexp
, bound
, noerror
, count
;
2228 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2231 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2232 "sPosix search backward: ",
2233 doc
: /* Search backward from point for match for regular expression REGEXP.
2234 Find the longest match in accord with Posix regular expression rules.
2235 Set point to the beginning of the match, and return point.
2236 The match found is the one starting last in the buffer
2237 and yet ending before the origin of the search.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must start at or after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241 If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences.
2243 See also the functions `match-beginning', `match-end', `match-string',
2244 and `replace-match'. */)
2245 (regexp
, bound
, noerror
, count
)
2246 Lisp_Object regexp
, bound
, noerror
, count
;
2248 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2251 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2253 doc
: /* Search forward from point for regular expression REGEXP.
2254 Find the longest match in accord with Posix regular expression rules.
2255 Set point to the end of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend after that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259 If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences.
2261 See also the functions `match-beginning', `match-end', `match-string',
2262 and `replace-match'. */)
2263 (regexp
, bound
, noerror
, count
)
2264 Lisp_Object regexp
, bound
, noerror
, count
;
2266 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2269 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2270 doc
: /* Replace text matched by last search with NEWTEXT.
2271 Leave point at the end of the replacement text.
2273 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2274 Otherwise maybe capitalize the whole text, or maybe just word initials,
2275 based on the replaced text.
2276 If the replaced text has only capital letters
2277 and has at least one multiletter word, convert NEWTEXT to all caps.
2278 Otherwise if all words are capitalized in the replaced text,
2279 capitalize each word in NEWTEXT.
2281 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2282 Otherwise treat `\\' as special:
2283 `\\&' in NEWTEXT means substitute original matched text.
2284 `\\N' means substitute what matched the Nth `\\(...\\)'.
2285 If Nth parens didn't match, substitute nothing.
2286 `\\\\' means insert one `\\'.
2287 Case conversion does not apply to these substitutions.
2289 FIXEDCASE and LITERAL are optional arguments.
2291 The optional fourth argument STRING can be a string to modify.
2292 This is meaningful when the previous match was done against STRING,
2293 using `string-match'. When used this way, `replace-match'
2294 creates and returns a new string made by copying STRING and replacing
2295 the part of STRING that was matched.
2297 The optional fifth argument SUBEXP specifies a subexpression;
2298 it says to replace just that subexpression with NEWTEXT,
2299 rather than replacing the entire matched text.
2300 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2301 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2302 NEWTEXT in place of subexp N.
2303 This is useful only after a regular expression search or match,
2304 since only regular expressions have distinguished subexpressions. */)
2305 (newtext
, fixedcase
, literal
, string
, subexp
)
2306 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2308 enum { nochange
, all_caps
, cap_initial
} case_action
;
2309 register int pos
, pos_byte
;
2310 int some_multiletter_word
;
2313 int some_nonuppercase_initial
;
2314 register int c
, prevc
;
2316 int opoint
, newpoint
;
2318 CHECK_STRING (newtext
);
2320 if (! NILP (string
))
2321 CHECK_STRING (string
);
2323 case_action
= nochange
; /* We tried an initialization */
2324 /* but some C compilers blew it */
2326 if (search_regs
.num_regs
<= 0)
2327 error ("`replace-match' called before any match found");
2333 CHECK_NUMBER (subexp
);
2334 sub
= XINT (subexp
);
2335 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2336 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2341 if (search_regs
.start
[sub
] < BEGV
2342 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2343 || search_regs
.end
[sub
] > ZV
)
2344 args_out_of_range (make_number (search_regs
.start
[sub
]),
2345 make_number (search_regs
.end
[sub
]));
2349 if (search_regs
.start
[sub
] < 0
2350 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2351 || search_regs
.end
[sub
] > SCHARS (string
))
2352 args_out_of_range (make_number (search_regs
.start
[sub
]),
2353 make_number (search_regs
.end
[sub
]));
2356 if (NILP (fixedcase
))
2358 /* Decide how to casify by examining the matched text. */
2361 pos
= search_regs
.start
[sub
];
2362 last
= search_regs
.end
[sub
];
2365 pos_byte
= CHAR_TO_BYTE (pos
);
2367 pos_byte
= string_char_to_byte (string
, pos
);
2370 case_action
= all_caps
;
2372 /* some_multiletter_word is set nonzero if any original word
2373 is more than one letter long. */
2374 some_multiletter_word
= 0;
2376 some_nonuppercase_initial
= 0;
2383 c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
);
2384 INC_BOTH (pos
, pos_byte
);
2387 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c
, string
, pos
, pos_byte
);
2391 /* Cannot be all caps if any original char is lower case */
2394 if (SYNTAX (prevc
) != Sword
)
2395 some_nonuppercase_initial
= 1;
2397 some_multiletter_word
= 1;
2399 else if (UPPERCASEP (c
))
2402 if (SYNTAX (prevc
) != Sword
)
2405 some_multiletter_word
= 1;
2409 /* If the initial is a caseless word constituent,
2410 treat that like a lowercase initial. */
2411 if (SYNTAX (prevc
) != Sword
)
2412 some_nonuppercase_initial
= 1;
2418 /* Convert to all caps if the old text is all caps
2419 and has at least one multiletter word. */
2420 if (! some_lowercase
&& some_multiletter_word
)
2421 case_action
= all_caps
;
2422 /* Capitalize each word, if the old text has all capitalized words. */
2423 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2424 case_action
= cap_initial
;
2425 else if (!some_nonuppercase_initial
&& some_uppercase
)
2426 /* Should x -> yz, operating on X, give Yz or YZ?
2427 We'll assume the latter. */
2428 case_action
= all_caps
;
2430 case_action
= nochange
;
2433 /* Do replacement in a string. */
2436 Lisp_Object before
, after
;
2438 before
= Fsubstring (string
, make_number (0),
2439 make_number (search_regs
.start
[sub
]));
2440 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2442 /* Substitute parts of the match into NEWTEXT
2447 int lastpos_byte
= 0;
2448 /* We build up the substituted string in ACCUM. */
2451 int length
= SBYTES (newtext
);
2455 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2459 int delbackslash
= 0;
2461 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2465 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2469 substart
= search_regs
.start
[sub
];
2470 subend
= search_regs
.end
[sub
];
2472 else if (c
>= '1' && c
<= '9')
2474 if (search_regs
.start
[c
- '0'] >= 0
2475 && c
<= search_regs
.num_regs
+ '0')
2477 substart
= search_regs
.start
[c
- '0'];
2478 subend
= search_regs
.end
[c
- '0'];
2482 /* If that subexp did not match,
2483 replace \\N with nothing. */
2491 error ("Invalid use of `\\' in replacement text");
2495 if (pos
- 2 != lastpos
)
2496 middle
= substring_both (newtext
, lastpos
,
2498 pos
- 2, pos_byte
- 2);
2501 accum
= concat3 (accum
, middle
,
2503 make_number (substart
),
2504 make_number (subend
)));
2506 lastpos_byte
= pos_byte
;
2508 else if (delbackslash
)
2510 middle
= substring_both (newtext
, lastpos
,
2512 pos
- 1, pos_byte
- 1);
2514 accum
= concat2 (accum
, middle
);
2516 lastpos_byte
= pos_byte
;
2521 middle
= substring_both (newtext
, lastpos
,
2527 newtext
= concat2 (accum
, middle
);
2530 /* Do case substitution in NEWTEXT if desired. */
2531 if (case_action
== all_caps
)
2532 newtext
= Fupcase (newtext
);
2533 else if (case_action
== cap_initial
)
2534 newtext
= Fupcase_initials (newtext
);
2536 return concat3 (before
, newtext
, after
);
2539 /* Record point, then move (quietly) to the start of the match. */
2540 if (PT
>= search_regs
.end
[sub
])
2542 else if (PT
> search_regs
.start
[sub
])
2543 opoint
= search_regs
.end
[sub
] - ZV
;
2547 /* If we want non-literal replacement,
2548 perform substitution on the replacement string. */
2551 int length
= SBYTES (newtext
);
2552 unsigned char *substed
;
2553 int substed_alloc_size
, substed_len
;
2554 int buf_multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
2555 int str_multibyte
= STRING_MULTIBYTE (newtext
);
2556 Lisp_Object rev_tbl
;
2557 int really_changed
= 0;
2561 substed_alloc_size
= length
* 2 + 100;
2562 substed
= (unsigned char *) xmalloc (substed_alloc_size
+ 1);
2565 /* Go thru NEWTEXT, producing the actual text to insert in
2566 SUBSTED while adjusting multibyteness to that of the current
2569 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2571 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
2572 unsigned char *add_stuff
= NULL
;
2578 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
, pos
, pos_byte
);
2580 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2584 /* Note that we don't have to increment POS. */
2585 c
= SREF (newtext
, pos_byte
++);
2587 c
= unibyte_char_to_multibyte (c
);
2590 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2591 or set IDX to a match index, which means put that part
2592 of the buffer text into SUBSTED. */
2600 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
,
2602 if (!buf_multibyte
&& !ASCII_CHAR_P (c
))
2603 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2607 c
= SREF (newtext
, pos_byte
++);
2609 c
= unibyte_char_to_multibyte (c
);
2614 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2616 if (search_regs
.start
[c
- '0'] >= 1)
2620 add_len
= 1, add_stuff
= "\\";
2624 error ("Invalid use of `\\' in replacement text");
2629 add_len
= CHAR_STRING (c
, str
);
2633 /* If we want to copy part of a previous match,
2634 set up ADD_STUFF and ADD_LEN to point to it. */
2637 int begbyte
= CHAR_TO_BYTE (search_regs
.start
[idx
]);
2638 add_len
= CHAR_TO_BYTE (search_regs
.end
[idx
]) - begbyte
;
2639 if (search_regs
.start
[idx
] < GPT
&& GPT
< search_regs
.end
[idx
])
2640 move_gap (search_regs
.start
[idx
]);
2641 add_stuff
= BYTE_POS_ADDR (begbyte
);
2644 /* Now the stuff we want to add to SUBSTED
2645 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2647 /* Make sure SUBSTED is big enough. */
2648 if (substed_len
+ add_len
>= substed_alloc_size
)
2650 substed_alloc_size
= substed_len
+ add_len
+ 500;
2651 substed
= (unsigned char *) xrealloc (substed
,
2652 substed_alloc_size
+ 1);
2655 /* Now add to the end of SUBSTED. */
2658 bcopy (add_stuff
, substed
+ substed_len
, add_len
);
2659 substed_len
+= add_len
;
2667 int nchars
= multibyte_chars_in_text (substed
, substed_len
);
2669 newtext
= make_multibyte_string (substed
, nchars
, substed_len
);
2672 newtext
= make_unibyte_string (substed
, substed_len
);
2677 /* Replace the old text with the new in the cleanest possible way. */
2678 replace_range (search_regs
.start
[sub
], search_regs
.end
[sub
],
2680 newpoint
= search_regs
.start
[sub
] + SCHARS (newtext
);
2682 if (case_action
== all_caps
)
2683 Fupcase_region (make_number (search_regs
.start
[sub
]),
2684 make_number (newpoint
));
2685 else if (case_action
== cap_initial
)
2686 Fupcase_initials_region (make_number (search_regs
.start
[sub
]),
2687 make_number (newpoint
));
2689 /* Adjust search data for this change. */
2691 int oldend
= search_regs
.end
[sub
];
2692 int oldstart
= search_regs
.start
[sub
];
2693 int change
= newpoint
- search_regs
.end
[sub
];
2696 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2698 if (search_regs
.start
[i
] >= oldend
)
2699 search_regs
.start
[i
] += change
;
2700 else if (search_regs
.start
[i
] > oldstart
)
2701 search_regs
.start
[i
] = oldstart
;
2702 if (search_regs
.end
[i
] >= oldend
)
2703 search_regs
.end
[i
] += change
;
2704 else if (search_regs
.end
[i
] > oldstart
)
2705 search_regs
.end
[i
] = oldstart
;
2709 /* Put point back where it was in the text. */
2711 TEMP_SET_PT (opoint
+ ZV
);
2713 TEMP_SET_PT (opoint
);
2715 /* Now move point "officially" to the start of the inserted replacement. */
2716 move_if_not_intangible (newpoint
);
2722 match_limit (num
, beginningp
)
2731 args_out_of_range (num
, make_number (0));
2732 if (search_regs
.num_regs
<= 0)
2733 error ("No match data, because no search succeeded");
2734 if (n
>= search_regs
.num_regs
2735 || search_regs
.start
[n
] < 0)
2737 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2738 : search_regs
.end
[n
]));
2741 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2742 doc
: /* Return position of start of text matched by last search.
2743 SUBEXP, a number, specifies which parenthesized expression in the last
2745 Value is nil if SUBEXPth pair didn't match, or there were less than
2747 Zero means the entire text matched by the whole regexp or whole string. */)
2751 return match_limit (subexp
, 1);
2754 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2755 doc
: /* Return position of end of text matched by last search.
2756 SUBEXP, a number, specifies which parenthesized expression in the last
2758 Value is nil if SUBEXPth pair didn't match, or there were less than
2760 Zero means the entire text matched by the whole regexp or whole string. */)
2764 return match_limit (subexp
, 0);
2767 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 3, 0,
2768 doc
: /* Return a list containing all info on what the last search matched.
2769 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2770 All the elements are markers or nil (nil if the Nth pair didn't match)
2771 if the last match was on a buffer; integers or nil if a string was matched.
2772 Use `store-match-data' to reinstate the data in this list.
2774 If INTEGERS (the optional first argument) is non-nil, always use
2775 integers \(rather than markers) to represent buffer positions. In
2776 this case, and if the last match was in a buffer, the buffer will get
2777 stored as one additional element at the end of the list.
2779 If REUSE is a list, reuse it as part of the value. If REUSE is long
2780 enough to hold all the values, and if INTEGERS is non-nil, no consing
2783 If optional third arg RESEAT is non-nil, any previous markers on the
2784 REUSE list will be modified to point to nowhere.
2786 Return value is undefined if the last search failed. */)
2787 (integers
, reuse
, reseat
)
2788 Lisp_Object integers
, reuse
, reseat
;
2790 Lisp_Object tail
, prev
;
2795 for (tail
= reuse
; CONSP (tail
); tail
= XCDR (tail
))
2796 if (MARKERP (XCAR (tail
)))
2798 unchain_marker (XMARKER (XCAR (tail
)));
2799 XSETCAR (tail
, Qnil
);
2802 if (NILP (last_thing_searched
))
2807 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
+ 1)
2808 * sizeof (Lisp_Object
));
2811 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2813 int start
= search_regs
.start
[i
];
2816 if (EQ (last_thing_searched
, Qt
)
2817 || ! NILP (integers
))
2819 XSETFASTINT (data
[2 * i
], start
);
2820 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2822 else if (BUFFERP (last_thing_searched
))
2824 data
[2 * i
] = Fmake_marker ();
2825 Fset_marker (data
[2 * i
],
2826 make_number (start
),
2827 last_thing_searched
);
2828 data
[2 * i
+ 1] = Fmake_marker ();
2829 Fset_marker (data
[2 * i
+ 1],
2830 make_number (search_regs
.end
[i
]),
2831 last_thing_searched
);
2834 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2840 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2843 if (BUFFERP (last_thing_searched
) && !NILP (integers
))
2845 data
[len
] = last_thing_searched
;
2849 /* If REUSE is not usable, cons up the values and return them. */
2850 if (! CONSP (reuse
))
2851 return Flist (len
, data
);
2853 /* If REUSE is a list, store as many value elements as will fit
2854 into the elements of REUSE. */
2855 for (i
= 0, tail
= reuse
; CONSP (tail
);
2856 i
++, tail
= XCDR (tail
))
2859 XSETCAR (tail
, data
[i
]);
2861 XSETCAR (tail
, Qnil
);
2865 /* If we couldn't fit all value elements into REUSE,
2866 cons up the rest of them and add them to the end of REUSE. */
2868 XSETCDR (prev
, Flist (len
- i
, data
+ i
));
2873 /* Internal usage only:
2874 If RESEAT is `evaporate', put the markers back on the free list
2875 immediately. No other references to the markers must exist in this case,
2876 so it is used only internally on the unwind stack and save-match-data from
2879 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 2, 0,
2880 doc
: /* Set internal data on last search match from elements of LIST.
2881 LIST should have been created by calling `match-data' previously.
2883 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2885 register Lisp_Object list
, reseat
;
2888 register Lisp_Object marker
;
2890 if (running_asynch_code
)
2891 save_search_regs ();
2895 /* Unless we find a marker with a buffer or an explicit buffer
2896 in LIST, assume that this match data came from a string. */
2897 last_thing_searched
= Qt
;
2899 /* Allocate registers if they don't already exist. */
2901 int length
= XFASTINT (Flength (list
)) / 2;
2903 if (length
> search_regs
.num_regs
)
2905 if (search_regs
.num_regs
== 0)
2908 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2910 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2915 = (regoff_t
*) xrealloc (search_regs
.start
,
2916 length
* sizeof (regoff_t
));
2918 = (regoff_t
*) xrealloc (search_regs
.end
,
2919 length
* sizeof (regoff_t
));
2922 for (i
= search_regs
.num_regs
; i
< length
; i
++)
2923 search_regs
.start
[i
] = -1;
2925 search_regs
.num_regs
= length
;
2928 for (i
= 0; CONSP (list
); i
++)
2930 marker
= XCAR (list
);
2931 if (BUFFERP (marker
))
2933 last_thing_searched
= marker
;
2940 search_regs
.start
[i
] = -1;
2949 if (MARKERP (marker
))
2951 if (XMARKER (marker
)->buffer
== 0)
2952 XSETFASTINT (marker
, 0);
2954 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2957 CHECK_NUMBER_COERCE_MARKER (marker
);
2958 from
= XINT (marker
);
2960 if (!NILP (reseat
) && MARKERP (m
))
2962 if (EQ (reseat
, Qevaporate
))
2965 unchain_marker (XMARKER (m
));
2966 XSETCAR (list
, Qnil
);
2969 if ((list
= XCDR (list
), !CONSP (list
)))
2972 m
= marker
= XCAR (list
);
2974 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2975 XSETFASTINT (marker
, 0);
2977 CHECK_NUMBER_COERCE_MARKER (marker
);
2978 search_regs
.start
[i
] = from
;
2979 search_regs
.end
[i
] = XINT (marker
);
2981 if (!NILP (reseat
) && MARKERP (m
))
2983 if (EQ (reseat
, Qevaporate
))
2986 unchain_marker (XMARKER (m
));
2987 XSETCAR (list
, Qnil
);
2993 for (; i
< search_regs
.num_regs
; i
++)
2994 search_regs
.start
[i
] = -1;
3000 /* If non-zero the match data have been saved in saved_search_regs
3001 during the execution of a sentinel or filter. */
3002 static int search_regs_saved
;
3003 static struct re_registers saved_search_regs
;
3004 static Lisp_Object saved_last_thing_searched
;
3006 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3007 if asynchronous code (filter or sentinel) is running. */
3011 if (!search_regs_saved
)
3013 saved_search_regs
.num_regs
= search_regs
.num_regs
;
3014 saved_search_regs
.start
= search_regs
.start
;
3015 saved_search_regs
.end
= search_regs
.end
;
3016 saved_last_thing_searched
= last_thing_searched
;
3017 last_thing_searched
= Qnil
;
3018 search_regs
.num_regs
= 0;
3019 search_regs
.start
= 0;
3020 search_regs
.end
= 0;
3022 search_regs_saved
= 1;
3026 /* Called upon exit from filters and sentinels. */
3028 restore_search_regs ()
3030 if (search_regs_saved
)
3032 if (search_regs
.num_regs
> 0)
3034 xfree (search_regs
.start
);
3035 xfree (search_regs
.end
);
3037 search_regs
.num_regs
= saved_search_regs
.num_regs
;
3038 search_regs
.start
= saved_search_regs
.start
;
3039 search_regs
.end
= saved_search_regs
.end
;
3040 last_thing_searched
= saved_last_thing_searched
;
3041 saved_last_thing_searched
= Qnil
;
3042 search_regs_saved
= 0;
3047 unwind_set_match_data (list
)
3050 /* It is safe to free (evaporate) the markers immediately. */
3051 return Fset_match_data (list
, Qevaporate
);
3054 /* Called to unwind protect the match data. */
3056 record_unwind_save_match_data ()
3058 record_unwind_protect (unwind_set_match_data
,
3059 Fmatch_data (Qnil
, Qnil
, Qnil
));
3062 /* Quote a string to inactivate reg-expr chars */
3064 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
3065 doc
: /* Return a regexp string which matches exactly STRING and nothing else. */)
3069 register unsigned char *in
, *out
, *end
;
3070 register unsigned char *temp
;
3071 int backslashes_added
= 0;
3073 CHECK_STRING (string
);
3075 temp
= (unsigned char *) alloca (SBYTES (string
) * 2);
3077 /* Now copy the data into the new string, inserting escapes. */
3079 in
= SDATA (string
);
3080 end
= in
+ SBYTES (string
);
3083 for (; in
!= end
; in
++)
3086 || *in
== '*' || *in
== '.' || *in
== '\\'
3087 || *in
== '?' || *in
== '+'
3088 || *in
== '^' || *in
== '$')
3089 *out
++ = '\\', backslashes_added
++;
3093 return make_specified_string (temp
,
3094 SCHARS (string
) + backslashes_added
,
3096 STRING_MULTIBYTE (string
));
3104 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
3106 searchbufs
[i
].buf
.allocated
= 100;
3107 searchbufs
[i
].buf
.buffer
= (unsigned char *) xmalloc (100);
3108 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
3109 searchbufs
[i
].regexp
= Qnil
;
3110 searchbufs
[i
].whitespace_regexp
= Qnil
;
3111 searchbufs
[i
].syntax_table
= Qnil
;
3112 staticpro (&searchbufs
[i
].regexp
);
3113 staticpro (&searchbufs
[i
].whitespace_regexp
);
3114 staticpro (&searchbufs
[i
].syntax_table
);
3115 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
3117 searchbuf_head
= &searchbufs
[0];
3119 Qsearch_failed
= intern ("search-failed");
3120 staticpro (&Qsearch_failed
);
3121 Qinvalid_regexp
= intern ("invalid-regexp");
3122 staticpro (&Qinvalid_regexp
);
3124 Fput (Qsearch_failed
, Qerror_conditions
,
3125 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
3126 Fput (Qsearch_failed
, Qerror_message
,
3127 build_string ("Search failed"));
3129 Fput (Qinvalid_regexp
, Qerror_conditions
,
3130 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
3131 Fput (Qinvalid_regexp
, Qerror_message
,
3132 build_string ("Invalid regexp"));
3134 last_thing_searched
= Qnil
;
3135 staticpro (&last_thing_searched
);
3137 saved_last_thing_searched
= Qnil
;
3138 staticpro (&saved_last_thing_searched
);
3140 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp
,
3141 doc
: /* Regexp to substitute for bunches of spaces in regexp search.
3142 Some commands use this for user-specified regexps.
3143 Spaces that occur inside character classes or repetition operators
3144 or other such regexp constructs are not replaced with this.
3145 A value of nil (which is the normal value) means treat spaces literally. */);
3146 Vsearch_spaces_regexp
= Qnil
;
3148 defsubr (&Slooking_at
);
3149 defsubr (&Sposix_looking_at
);
3150 defsubr (&Sstring_match
);
3151 defsubr (&Sposix_string_match
);
3152 defsubr (&Ssearch_forward
);
3153 defsubr (&Ssearch_backward
);
3154 defsubr (&Sword_search_forward
);
3155 defsubr (&Sword_search_backward
);
3156 defsubr (&Sre_search_forward
);
3157 defsubr (&Sre_search_backward
);
3158 defsubr (&Sposix_search_forward
);
3159 defsubr (&Sposix_search_backward
);
3160 defsubr (&Sreplace_match
);
3161 defsubr (&Smatch_beginning
);
3162 defsubr (&Smatch_end
);
3163 defsubr (&Smatch_data
);
3164 defsubr (&Sset_match_data
);
3165 defsubr (&Sregexp_quote
);
3168 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3169 (do not change this comment) */