src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2012
   4   Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "character.h"
  28 #include "buffer.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* Error condition signaled when regexp compile_pattern fails.  */
  88 static Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches.  */
  91 static Lisp_Object Qsearch_failed;
  92
  93 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  94 static void save_search_regs (void);
  95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  96                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  97                                 ptrdiff_t, ptrdiff_t);
  98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  99                               Lisp_Object, Lisp_Object, ptrdiff_t,
 100                               ptrdiff_t, int);
 101 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
 102                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
 103                                 Lisp_Object, Lisp_Object, int);
 104
 105 static _Noreturn void
 106 matcher_overflow (void)
 107 {
 108   error ("Stack overflow in regexp matcher");
 109 }
 110
 111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 112    PATTERN is the pattern to compile.
 113    CP is the place to put the result.
 114    TRANSLATE is a translation table for ignoring case, or nil for none.
 115    POSIX is nonzero if we want full backtracking (POSIX style)
 116    for this pattern.  0 means backtrack only enough to get a valid match.
 117
 118    The behavior also depends on Vsearch_spaces_regexp.  */
 119
 120 static void
 121 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix)
 122 {
 123   char *val;
 124   reg_syntax_t old;
 125
 126   cp->regexp = Qnil;
 127   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 128   cp->posix = posix;
 129   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 130   cp->buf.charset_unibyte = charset_unibyte;
 131   if (STRINGP (Vsearch_spaces_regexp))
 132     cp->whitespace_regexp = Vsearch_spaces_regexp;
 133   else
 134     cp->whitespace_regexp = Qnil;
 135
 136   /* rms: I think BLOCK_INPUT is not needed here any more,
 137      because regex.c defines malloc to call xmalloc.
 138      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 139      So let's turn it off.  */
 140   /*  BLOCK_INPUT;  */
 141   old = re_set_syntax (RE_SYNTAX_EMACS
 142                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 143
 144   if (STRINGP (Vsearch_spaces_regexp))
 145     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 146   else
 147     re_set_whitespace_regexp (NULL);
 148
 149   val = (char *) re_compile_pattern (SSDATA (pattern),
 150                                      SBYTES (pattern), &cp->buf);
 151
 152   /* If the compiled pattern hard codes some of the contents of the
 153      syntax-table, it can only be reused with *this* syntax table.  */
 154   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 155
 156   re_set_whitespace_regexp (NULL);
 157
 158   re_set_syntax (old);
 159   /* UNBLOCK_INPUT;  */
 160   if (val)
 161     xsignal1 (Qinvalid_regexp, build_string (val));
 162
 163   cp->regexp = Fcopy_sequence (pattern);
 164 }
 165
 166 /* Shrink each compiled regexp buffer in the cache
 167    to the size actually used right now.
 168    This is called from garbage collection.  */
 169
 170 void
 171 shrink_regexp_cache (void)
 172 {
 173   struct regexp_cache *cp;
 174
 175   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 176     {
 177       cp->buf.allocated = cp->buf.used;
 178       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 179     }
 180 }
 181
 182 /* Clear the regexp cache w.r.t. a particular syntax table,
 183    because it was changed.
 184    There is no danger of memory leak here because re_compile_pattern
 185    automagically manages the memory in each re_pattern_buffer struct,
 186    based on its `allocated' and `buffer' values.  */
 187 void
 188 clear_regexp_cache (void)
 189 {
 190   int i;
 191
 192   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 193     /* It's tempting to compare with the syntax-table we've actually changed,
 194        but it's not sufficient because char-table inheritance means that
 195        modifying one syntax-table can change others at the same time.  */
 196     if (!EQ (searchbufs[i].syntax_table, Qt))
 197       searchbufs[i].regexp = Qnil;
 198 }
 199
 200 /* Compile a regexp if necessary, but first check to see if there's one in
 201    the cache.
 202    PATTERN is the pattern to compile.
 203    TRANSLATE is a translation table for ignoring case, or nil for none.
 204    REGP is the structure that says where to store the "register"
 205    values that will result from matching this pattern.
 206    If it is 0, we should compile the pattern not to record any
 207    subexpression bounds.
 208    POSIX is nonzero if we want full backtracking (POSIX style)
 209    for this pattern.  0 means backtrack only enough to get a valid match.  */
 210
 211 struct re_pattern_buffer *
 212 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 213 {
 214   struct regexp_cache *cp, **cpp;
 215
 216   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 217     {
 218       cp = *cpp;
 219       /* Entries are initialized to nil, and may be set to nil by
 220          compile_pattern_1 if the pattern isn't valid.  Don't apply
 221          string accessors in those cases.  However, compile_pattern_1
 222          is only applied to the cache entry we pick here to reuse.  So
 223          nil should never appear before a non-nil entry.  */
 224       if (NILP (cp->regexp))
 225         goto compile_it;
 226       if (SCHARS (cp->regexp) == SCHARS (pattern)
 227           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 228           && !NILP (Fstring_equal (cp->regexp, pattern))
 229           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 230           && cp->posix == posix
 231           && (EQ (cp->syntax_table, Qt)
 232               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 233           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 234           && cp->buf.charset_unibyte == charset_unibyte)
 235         break;
 236
 237       /* If we're at the end of the cache, compile into the nil cell
 238          we found, or the last (least recently used) cell with a
 239          string value.  */
 240       if (cp->next == 0)
 241         {
 242         compile_it:
 243           compile_pattern_1 (cp, pattern, translate, posix);
 244           break;
 245         }
 246     }
 247
 248   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 249      either because we found it in the cache or because we just compiled it.
 250      Move it to the front of the queue to mark it as most recently used.  */
 251   *cpp = cp->next;
 252   cp->next = searchbuf_head;
 253   searchbuf_head = cp;
 254
 255   /* Advise the searching functions about the space we have allocated
 256      for register data.  */
 257   if (regp)
 258     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 259
 260   /* The compiled pattern can be used both for multibyte and unibyte
 261      target.  But, we have to tell which the pattern is used for. */
 262   cp->buf.target_multibyte = multibyte;
 263
 264   return &cp->buf;
 265 }
 266
 267 \f
 268 static Lisp_Object
 269 looking_at_1 (Lisp_Object string, int posix)
 270 {
 271   Lisp_Object val;
 272   unsigned char *p1, *p2;
 273   ptrdiff_t s1, s2;
 274   register ptrdiff_t i;
 275   struct re_pattern_buffer *bufp;
 276
 277   if (running_asynch_code)
 278     save_search_regs ();
 279
 280   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 281   XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
 282     = BVAR (current_buffer, case_eqv_table);
 283
 284   CHECK_STRING (string);
 285   bufp = compile_pattern (string,
 286                           (NILP (Vinhibit_changing_match_data)
 287                            ? &search_regs : NULL),
 288                           (!NILP (BVAR (current_buffer, case_fold_search))
 289                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 290                           posix,
 291                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 292
 293   immediate_quit = 1;
 294   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 295
 296   /* Get pointers and sizes of the two strings
 297      that make up the visible portion of the buffer. */
 298
 299   p1 = BEGV_ADDR;
 300   s1 = GPT_BYTE - BEGV_BYTE;
 301   p2 = GAP_END_ADDR;
 302   s2 = ZV_BYTE - GPT_BYTE;
 303   if (s1 < 0)
 304     {
 305       p2 = p1;
 306       s2 = ZV_BYTE - BEGV_BYTE;
 307       s1 = 0;
 308     }
 309   if (s2 < 0)
 310     {
 311       s1 = ZV_BYTE - BEGV_BYTE;
 312       s2 = 0;
 313     }
 314
 315   re_match_object = Qnil;
 316
 317   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 318                   PT_BYTE - BEGV_BYTE,
 319                   (NILP (Vinhibit_changing_match_data)
 320                    ? &search_regs : NULL),
 321                   ZV_BYTE - BEGV_BYTE);
 322   immediate_quit = 0;
 323
 324   if (i == -2)
 325     matcher_overflow ();
 326
 327   val = (0 <= i ? Qt : Qnil);
 328   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 329     for (i = 0; i < search_regs.num_regs; i++)
 330       if (search_regs.start[i] >= 0)
 331         {
 332           search_regs.start[i]
 333             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 334           search_regs.end[i]
 335             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 336         }
 337
 338   /* Set last_thing_searched only when match data is changed.  */
 339   if (NILP (Vinhibit_changing_match_data))
 340     XSETBUFFER (last_thing_searched, current_buffer);
 341
 342   return val;
 343 }
 344
 345 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 346        doc: /* Return t if text after point matches regular expression REGEXP.
 347 This function modifies the match data that `match-beginning',
 348 `match-end' and `match-data' access; save and restore the match
 349 data if you want to preserve them.  */)
 350   (Lisp_Object regexp)
 351 {
 352   return looking_at_1 (regexp, 0);
 353 }
 354
 355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 356        doc: /* Return t if text after point matches regular expression REGEXP.
 357 Find the longest match, in accord with Posix regular expression rules.
 358 This function modifies the match data that `match-beginning',
 359 `match-end' and `match-data' access; save and restore the match
 360 data if you want to preserve them.  */)
 361   (Lisp_Object regexp)
 362 {
 363   return looking_at_1 (regexp, 1);
 364 }
 365 \f
 366 static Lisp_Object
 367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 368 {
 369   ptrdiff_t val;
 370   struct re_pattern_buffer *bufp;
 371   EMACS_INT pos;
 372   ptrdiff_t pos_byte, i;
 373
 374   if (running_asynch_code)
 375     save_search_regs ();
 376
 377   CHECK_STRING (regexp);
 378   CHECK_STRING (string);
 379
 380   if (NILP (start))
 381     pos = 0, pos_byte = 0;
 382   else
 383     {
 384       ptrdiff_t len = SCHARS (string);
 385
 386       CHECK_NUMBER (start);
 387       pos = XINT (start);
 388       if (pos < 0 && -pos <= len)
 389         pos = len + pos;
 390       else if (0 > pos || pos > len)
 391         args_out_of_range (string, start);
 392       pos_byte = string_char_to_byte (string, pos);
 393     }
 394
 395   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 396   XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
 397     = BVAR (current_buffer, case_eqv_table);
 398
 399   bufp = compile_pattern (regexp,
 400                           (NILP (Vinhibit_changing_match_data)
 401                            ? &search_regs : NULL),
 402                           (!NILP (BVAR (current_buffer, case_fold_search))
 403                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 404                           posix,
 405                           STRING_MULTIBYTE (string));
 406   immediate_quit = 1;
 407   re_match_object = string;
 408
 409   val = re_search (bufp, SSDATA (string),
 410                    SBYTES (string), pos_byte,
 411                    SBYTES (string) - pos_byte,
 412                    (NILP (Vinhibit_changing_match_data)
 413                     ? &search_regs : NULL));
 414   immediate_quit = 0;
 415
 416   /* Set last_thing_searched only when match data is changed.  */
 417   if (NILP (Vinhibit_changing_match_data))
 418     last_thing_searched = Qt;
 419
 420   if (val == -2)
 421     matcher_overflow ();
 422   if (val < 0) return Qnil;
 423
 424   if (NILP (Vinhibit_changing_match_data))
 425     for (i = 0; i < search_regs.num_regs; i++)
 426       if (search_regs.start[i] >= 0)
 427         {
 428           search_regs.start[i]
 429             = string_byte_to_char (string, search_regs.start[i]);
 430           search_regs.end[i]
 431             = string_byte_to_char (string, search_regs.end[i]);
 432         }
 433
 434   return make_number (string_byte_to_char (string, val));
 435 }
 436
 437 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 438        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 439 Matching ignores case if `case-fold-search' is non-nil.
 440 If third arg START is non-nil, start search at that index in STRING.
 441 For index of first char beyond the match, do (match-end 0).
 442 `match-end' and `match-beginning' also give indices of substrings
 443 matched by parenthesis constructs in the pattern.
 444
 445 You can use the function `match-string' to extract the substrings
 446 matched by the parenthesis constructions in REGEXP. */)
 447   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 448 {
 449   return string_match_1 (regexp, string, start, 0);
 450 }
 451
 452 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 453        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 454 Find the longest match, in accord with Posix regular expression rules.
 455 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 456 If third arg START is non-nil, start search at that index in STRING.
 457 For index of first char beyond the match, do (match-end 0).
 458 `match-end' and `match-beginning' also give indices of substrings
 459 matched by parenthesis constructs in the pattern.  */)
 460   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 461 {
 462   return string_match_1 (regexp, string, start, 1);
 463 }
 464
 465 /* Match REGEXP against STRING, searching all of STRING,
 466    and return the index of the match, or negative on failure.
 467    This does not clobber the match data.  */
 468
 469 ptrdiff_t
 470 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 471 {
 472   ptrdiff_t val;
 473   struct re_pattern_buffer *bufp;
 474
 475   bufp = compile_pattern (regexp, 0, Qnil,
 476                           0, STRING_MULTIBYTE (string));
 477   immediate_quit = 1;
 478   re_match_object = string;
 479
 480   val = re_search (bufp, SSDATA (string),
 481                    SBYTES (string), 0,
 482                    SBYTES (string), 0);
 483   immediate_quit = 0;
 484   return val;
 485 }
 486
 487 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 488    and return the index of the match, or negative on failure.
 489    This does not clobber the match data.
 490    We assume that STRING contains single-byte characters.  */
 491
 492 ptrdiff_t
 493 fast_c_string_match_ignore_case (Lisp_Object regexp,
 494                                  const char *string, ptrdiff_t len)
 495 {
 496   ptrdiff_t val;
 497   struct re_pattern_buffer *bufp;
 498
 499   regexp = string_make_unibyte (regexp);
 500   re_match_object = Qt;
 501   bufp = compile_pattern (regexp, 0,
 502                           Vascii_canon_table, 0,
 503                           0);
 504   immediate_quit = 1;
 505   val = re_search (bufp, string, len, 0, len, 0);
 506   immediate_quit = 0;
 507   return val;
 508 }
 509
 510 /* Like fast_string_match but ignore case.  */
 511
 512 ptrdiff_t
 513 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 514 {
 515   ptrdiff_t val;
 516   struct re_pattern_buffer *bufp;
 517
 518   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 519                           0, STRING_MULTIBYTE (string));
 520   immediate_quit = 1;
 521   re_match_object = string;
 522
 523   val = re_search (bufp, SSDATA (string),
 524                    SBYTES (string), 0,
 525                    SBYTES (string), 0);
 526   immediate_quit = 0;
 527   return val;
 528 }
 529 \f
 530 /* Match REGEXP against the characters after POS to LIMIT, and return
 531    the number of matched characters.  If STRING is non-nil, match
 532    against the characters in it.  In that case, POS and LIMIT are
 533    indices into the string.  This function doesn't modify the match
 534    data.  */
 535
 536 ptrdiff_t
 537 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 538 {
 539   int multibyte;
 540   struct re_pattern_buffer *buf;
 541   unsigned char *p1, *p2;
 542   ptrdiff_t s1, s2;
 543   ptrdiff_t len;
 544
 545   if (STRINGP (string))
 546     {
 547       if (pos_byte < 0)
 548         pos_byte = string_char_to_byte (string, pos);
 549       if (limit_byte < 0)
 550         limit_byte = string_char_to_byte (string, limit);
 551       p1 = NULL;
 552       s1 = 0;
 553       p2 = SDATA (string);
 554       s2 = SBYTES (string);
 555       re_match_object = string;
 556       multibyte = STRING_MULTIBYTE (string);
 557     }
 558   else
 559     {
 560       if (pos_byte < 0)
 561         pos_byte = CHAR_TO_BYTE (pos);
 562       if (limit_byte < 0)
 563         limit_byte = CHAR_TO_BYTE (limit);
 564       pos_byte -= BEGV_BYTE;
 565       limit_byte -= BEGV_BYTE;
 566       p1 = BEGV_ADDR;
 567       s1 = GPT_BYTE - BEGV_BYTE;
 568       p2 = GAP_END_ADDR;
 569       s2 = ZV_BYTE - GPT_BYTE;
 570       if (s1 < 0)
 571         {
 572           p2 = p1;
 573           s2 = ZV_BYTE - BEGV_BYTE;
 574           s1 = 0;
 575         }
 576       if (s2 < 0)
 577         {
 578           s1 = ZV_BYTE - BEGV_BYTE;
 579           s2 = 0;
 580         }
 581       re_match_object = Qnil;
 582       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 583     }
 584
 585   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 586   immediate_quit = 1;
 587   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 588                     pos_byte, NULL, limit_byte);
 589   immediate_quit = 0;
 590
 591   return len;
 592 }
 593
 594 \f
 595 /* The newline cache: remembering which sections of text have no newlines.  */
 596
 597 /* If the user has requested newline caching, make sure it's on.
 598    Otherwise, make sure it's off.
 599    This is our cheezy way of associating an action with the change of
 600    state of a buffer-local variable.  */
 601 static void
 602 newline_cache_on_off (struct buffer *buf)
 603 {
 604   if (NILP (BVAR (buf, cache_long_line_scans)))
 605     {
 606       /* It should be off.  */
 607       if (buf->newline_cache)
 608         {
 609           free_region_cache (buf->newline_cache);
 610           buf->newline_cache = 0;
 611         }
 612     }
 613   else
 614     {
 615       /* It should be on.  */
 616       if (buf->newline_cache == 0)
 617         buf->newline_cache = new_region_cache ();
 618     }
 619 }
 620
 621 \f
 622 /* Search for COUNT instances of the character TARGET between START and END.
 623
 624    If COUNT is positive, search forwards; END must be >= START.
 625    If COUNT is negative, search backwards for the -COUNTth instance;
 626       END must be <= START.
 627    If COUNT is zero, do anything you please; run rogue, for all I care.
 628
 629    If END is zero, use BEGV or ZV instead, as appropriate for the
 630    direction indicated by COUNT.
 631
 632    If we find COUNT instances, set *SHORTAGE to zero, and return the
 633    position past the COUNTth match.  Note that for reverse motion
 634    this is not the same as the usual convention for Emacs motion commands.
 635
 636    If we don't find COUNT instances before reaching END, set *SHORTAGE
 637    to the number of TARGETs left unfound, and return END.
 638
 639    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 640    except when inside redisplay.  */
 641
 642 ptrdiff_t
 643 scan_buffer (register int target, ptrdiff_t start, ptrdiff_t end,
 644              ptrdiff_t count, ptrdiff_t *shortage, int allow_quit)
 645 {
 646   struct region_cache *newline_cache;
 647   int direction;
 648
 649   if (count > 0)
 650     {
 651       direction = 1;
 652       if (! end) end = ZV;
 653     }
 654   else
 655     {
 656       direction = -1;
 657       if (! end) end = BEGV;
 658     }
 659
 660   newline_cache_on_off (current_buffer);
 661   newline_cache = current_buffer->newline_cache;
 662
 663   if (shortage != 0)
 664     *shortage = 0;
 665
 666   immediate_quit = allow_quit;
 667
 668   if (count > 0)
 669     while (start != end)
 670       {
 671         /* Our innermost scanning loop is very simple; it doesn't know
 672            about gaps, buffer ends, or the newline cache.  ceiling is
 673            the position of the last character before the next such
 674            obstacle --- the last character the dumb search loop should
 675            examine.  */
 676         ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end) - 1;
 677         ptrdiff_t start_byte = CHAR_TO_BYTE (start);
 678         ptrdiff_t tem;
 679
 680         /* If we're looking for a newline, consult the newline cache
 681            to see where we can avoid some scanning.  */
 682         if (target == '\n' && newline_cache)
 683           {
 684             ptrdiff_t next_change;
 685             immediate_quit = 0;
 686             while (region_cache_forward
 687                    (current_buffer, newline_cache, start_byte, &next_change))
 688               start_byte = next_change;
 689             immediate_quit = allow_quit;
 690
 691             /* START should never be after END.  */
 692             if (start_byte > ceiling_byte)
 693               start_byte = ceiling_byte;
 694
 695             /* Now the text after start is an unknown region, and
 696                next_change is the position of the next known region. */
 697             ceiling_byte = min (next_change - 1, ceiling_byte);
 698           }
 699
 700         /* The dumb loop can only scan text stored in contiguous
 701            bytes. BUFFER_CEILING_OF returns the last character
 702            position that is contiguous, so the ceiling is the
 703            position after that.  */
 704         tem = BUFFER_CEILING_OF (start_byte);
 705         ceiling_byte = min (tem, ceiling_byte);
 706
 707         {
 708           /* The termination address of the dumb loop.  */
 709           register unsigned char *ceiling_addr
 710             = BYTE_POS_ADDR (ceiling_byte) + 1;
 711           register unsigned char *cursor
 712             = BYTE_POS_ADDR (start_byte);
 713           unsigned char *base = cursor;
 714
 715           while (cursor < ceiling_addr)
 716             {
 717               unsigned char *scan_start = cursor;
 718
 719               /* The dumb loop.  */
 720               while (*cursor != target && ++cursor < ceiling_addr)
 721                 ;
 722
 723               /* If we're looking for newlines, cache the fact that
 724                  the region from start to cursor is free of them. */
 725               if (target == '\n' && newline_cache)
 726                 know_region_cache (current_buffer, newline_cache,
 727                                    BYTE_TO_CHAR (start_byte + scan_start - base),
 728                                    BYTE_TO_CHAR (start_byte + cursor - base));
 729
 730               /* Did we find the target character?  */
 731               if (cursor < ceiling_addr)
 732                 {
 733                   if (--count == 0)
 734                     {
 735                       immediate_quit = 0;
 736                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 737                     }
 738                   cursor++;
 739                 }
 740             }
 741
 742           start = BYTE_TO_CHAR (start_byte + cursor - base);
 743         }
 744       }
 745   else
 746     while (start > end)
 747       {
 748         /* The last character to check before the next obstacle.  */
 749         ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end);
 750         ptrdiff_t start_byte = CHAR_TO_BYTE (start);
 751         ptrdiff_t tem;
 752
 753         /* Consult the newline cache, if appropriate.  */
 754         if (target == '\n' && newline_cache)
 755           {
 756             ptrdiff_t next_change;
 757             immediate_quit = 0;
 758             while (region_cache_backward
 759                    (current_buffer, newline_cache, start_byte, &next_change))
 760               start_byte = next_change;
 761             immediate_quit = allow_quit;
 762
 763             /* Start should never be at or before end.  */
 764             if (start_byte <= ceiling_byte)
 765               start_byte = ceiling_byte + 1;
 766
 767             /* Now the text before start is an unknown region, and
 768                next_change is the position of the next known region. */
 769             ceiling_byte = max (next_change, ceiling_byte);
 770           }
 771
 772         /* Stop scanning before the gap.  */
 773         tem = BUFFER_FLOOR_OF (start_byte - 1);
 774         ceiling_byte = max (tem, ceiling_byte);
 775
 776         {
 777           /* The termination address of the dumb loop.  */
 778           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 779           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 780           unsigned char *base = cursor;
 781
 782           while (cursor >= ceiling_addr)
 783             {
 784               unsigned char *scan_start = cursor;
 785
 786               while (*cursor != target && --cursor >= ceiling_addr)
 787                 ;
 788
 789               /* If we're looking for newlines, cache the fact that
 790                  the region from after the cursor to start is free of them.  */
 791               if (target == '\n' && newline_cache)
 792                 know_region_cache (current_buffer, newline_cache,
 793                                    BYTE_TO_CHAR (start_byte + cursor - base),
 794                                    BYTE_TO_CHAR (start_byte + scan_start - base));
 795
 796               /* Did we find the target character?  */
 797               if (cursor >= ceiling_addr)
 798                 {
 799                   if (++count >= 0)
 800                     {
 801                       immediate_quit = 0;
 802                       return BYTE_TO_CHAR (start_byte + cursor - base);
 803                     }
 804                   cursor--;
 805                 }
 806             }
 807
 808           start = BYTE_TO_CHAR (start_byte + cursor - base);
 809         }
 810       }
 811
 812   immediate_quit = 0;
 813   if (shortage != 0)
 814     *shortage = count * direction;
 815   return start;
 816 }
 817 \f
 818 /* Search for COUNT instances of a line boundary, which means either a
 819    newline or (if selective display enabled) a carriage return.
 820    Start at START.  If COUNT is negative, search backwards.
 821
 822    We report the resulting position by calling TEMP_SET_PT_BOTH.
 823
 824    If we find COUNT instances. we position after (always after,
 825    even if scanning backwards) the COUNTth match, and return 0.
 826
 827    If we don't find COUNT instances before reaching the end of the
 828    buffer (or the beginning, if scanning backwards), we return
 829    the number of line boundaries left unfound, and position at
 830    the limit we bumped up against.
 831
 832    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 833    except in special cases.  */
 834
 835 EMACS_INT
 836 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 837               ptrdiff_t limit, ptrdiff_t limit_byte,
 838               register EMACS_INT count, int allow_quit)
 839 {
 840   int direction = ((count > 0) ? 1 : -1);
 841
 842   register unsigned char *cursor;
 843   unsigned char *base;
 844
 845   ptrdiff_t ceiling;
 846   register unsigned char *ceiling_addr;
 847
 848   int old_immediate_quit = immediate_quit;
 849
 850   /* The code that follows is like scan_buffer
 851      but checks for either newline or carriage return.  */
 852
 853   if (allow_quit)
 854     immediate_quit++;
 855
 856   start_byte = CHAR_TO_BYTE (start);
 857
 858   if (count > 0)
 859     {
 860       while (start_byte < limit_byte)
 861         {
 862           ceiling =  BUFFER_CEILING_OF (start_byte);
 863           ceiling = min (limit_byte - 1, ceiling);
 864           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 865           base = (cursor = BYTE_POS_ADDR (start_byte));
 866           while (1)
 867             {
 868               while (*cursor != '\n' && ++cursor != ceiling_addr)
 869                 ;
 870
 871               if (cursor != ceiling_addr)
 872                 {
 873                   if (--count == 0)
 874                     {
 875                       immediate_quit = old_immediate_quit;
 876                       start_byte = start_byte + cursor - base + 1;
 877                       start = BYTE_TO_CHAR (start_byte);
 878                       TEMP_SET_PT_BOTH (start, start_byte);
 879                       return 0;
 880                     }
 881                   else
 882                     if (++cursor == ceiling_addr)
 883                       break;
 884                 }
 885               else
 886                 break;
 887             }
 888           start_byte += cursor - base;
 889         }
 890     }
 891   else
 892     {
 893       while (start_byte > limit_byte)
 894         {
 895           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 896           ceiling = max (limit_byte, ceiling);
 897           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 898           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 899           while (1)
 900             {
 901               while (--cursor != ceiling_addr && *cursor != '\n')
 902                 ;
 903
 904               if (cursor != ceiling_addr)
 905                 {
 906                   if (++count == 0)
 907                     {
 908                       immediate_quit = old_immediate_quit;
 909                       /* Return the position AFTER the match we found.  */
 910                       start_byte = start_byte + cursor - base + 1;
 911                       start = BYTE_TO_CHAR (start_byte);
 912                       TEMP_SET_PT_BOTH (start, start_byte);
 913                       return 0;
 914                     }
 915                 }
 916               else
 917                 break;
 918             }
 919           /* Here we add 1 to compensate for the last decrement
 920              of CURSOR, which took it past the valid range.  */
 921           start_byte += cursor - base + 1;
 922         }
 923     }
 924
 925   TEMP_SET_PT_BOTH (limit, limit_byte);
 926   immediate_quit = old_immediate_quit;
 927
 928   return count * direction;
 929 }
 930
 931 ptrdiff_t
 932 find_next_newline_no_quit (ptrdiff_t from, ptrdiff_t cnt)
 933 {
 934   return scan_buffer ('\n', from, 0, cnt, (ptrdiff_t *) 0, 0);
 935 }
 936
 937 /* Like find_next_newline, but returns position before the newline,
 938    not after, and only search up to TO.  This isn't just
 939    find_next_newline (...)-1, because you might hit TO.  */
 940
 941 ptrdiff_t
 942 find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt)
 943 {
 944   ptrdiff_t shortage;
 945   ptrdiff_t pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 946
 947   if (shortage == 0)
 948     pos--;
 949
 950   return pos;
 951 }
 952 \f
 953 /* Subroutines of Lisp buffer search functions. */
 954
 955 static Lisp_Object
 956 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
 957                 Lisp_Object count, int direction, int RE, int posix)
 958 {
 959   register EMACS_INT np;
 960   EMACS_INT lim;
 961   ptrdiff_t lim_byte;
 962   EMACS_INT n = direction;
 963
 964   if (!NILP (count))
 965     {
 966       CHECK_NUMBER (count);
 967       n *= XINT (count);
 968     }
 969
 970   CHECK_STRING (string);
 971   if (NILP (bound))
 972     {
 973       if (n > 0)
 974         lim = ZV, lim_byte = ZV_BYTE;
 975       else
 976         lim = BEGV, lim_byte = BEGV_BYTE;
 977     }
 978   else
 979     {
 980       CHECK_NUMBER_COERCE_MARKER (bound);
 981       lim = XINT (bound);
 982       if (n > 0 ? lim < PT : lim > PT)
 983         error ("Invalid search bound (wrong side of point)");
 984       if (lim > ZV)
 985         lim = ZV, lim_byte = ZV_BYTE;
 986       else if (lim < BEGV)
 987         lim = BEGV, lim_byte = BEGV_BYTE;
 988       else
 989         lim_byte = CHAR_TO_BYTE (lim);
 990     }
 991
 992   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 993   XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
 994     = BVAR (current_buffer, case_eqv_table);
 995
 996   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 997                       (!NILP (BVAR (current_buffer, case_fold_search))
 998                        ? BVAR (current_buffer, case_canon_table)
 999                        : Qnil),
1000                       (!NILP (BVAR (current_buffer, case_fold_search))
1001                        ? BVAR (current_buffer, case_eqv_table)
1002                        : Qnil),
1003                       posix);
1004   if (np <= 0)
1005     {
1006       if (NILP (noerror))
1007         xsignal1 (Qsearch_failed, string);
1008
1009       if (!EQ (noerror, Qt))
1010         {
1011           if (lim < BEGV || lim > ZV)
1012             abort ();
1013           SET_PT_BOTH (lim, lim_byte);
1014           return Qnil;
1015 #if 0 /* This would be clean, but maybe programs depend on
1016          a value of nil here.  */
1017           np = lim;
1018 #endif
1019         }
1020       else
1021         return Qnil;
1022     }
1023
1024   if (np < BEGV || np > ZV)
1025     abort ();
1026
1027   SET_PT (np);
1028
1029   return make_number (np);
1030 }
1031 \f
1032 /* Return 1 if REGEXP it matches just one constant string.  */
1033
1034 static int
1035 trivial_regexp_p (Lisp_Object regexp)
1036 {
1037   ptrdiff_t len = SBYTES (regexp);
1038   unsigned char *s = SDATA (regexp);
1039   while (--len >= 0)
1040     {
1041       switch (*s++)
1042         {
1043         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1044           return 0;
1045         case '\\':
1046           if (--len < 0)
1047             return 0;
1048           switch (*s++)
1049             {
1050             case '|': case '(': case ')': case '`': case '\'': case 'b':
1051             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1052             case 'S': case '=': case '{': case '}': case '_':
1053             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1054             case '1': case '2': case '3': case '4': case '5':
1055             case '6': case '7': case '8': case '9':
1056               return 0;
1057             }
1058         }
1059     }
1060   return 1;
1061 }
1062
1063 /* Search for the n'th occurrence of STRING in the current buffer,
1064    starting at position POS and stopping at position LIM,
1065    treating STRING as a literal string if RE is false or as
1066    a regular expression if RE is true.
1067
1068    If N is positive, searching is forward and LIM must be greater than POS.
1069    If N is negative, searching is backward and LIM must be less than POS.
1070
1071    Returns -x if x occurrences remain to be found (x > 0),
1072    or else the position at the beginning of the Nth occurrence
1073    (if searching backward) or the end (if searching forward).
1074
1075    POSIX is nonzero if we want full backtracking (POSIX style)
1076    for this pattern.  0 means backtrack only enough to get a valid match.  */
1077
1078 #define TRANSLATE(out, trt, d)                  \
1079 do                                              \
1080   {                                             \
1081     if (! NILP (trt))                           \
1082       {                                         \
1083         Lisp_Object temp;                       \
1084         temp = Faref (trt, make_number (d));    \
1085         if (INTEGERP (temp))                    \
1086           out = XINT (temp);                    \
1087         else                                    \
1088           out = d;                              \
1089       }                                         \
1090     else                                        \
1091       out = d;                                  \
1092   }                                             \
1093 while (0)
1094
1095 /* Only used in search_buffer, to record the end position of the match
1096    when searching regexps and SEARCH_REGS should not be changed
1097    (i.e. Vinhibit_changing_match_data is non-nil).  */
1098 static struct re_registers search_regs_1;
1099
1100 static EMACS_INT
1101 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1102                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1103                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1104 {
1105   ptrdiff_t len = SCHARS (string);
1106   ptrdiff_t len_byte = SBYTES (string);
1107   register ptrdiff_t i;
1108
1109   if (running_asynch_code)
1110     save_search_regs ();
1111
1112   /* Searching 0 times means don't move.  */
1113   /* Null string is found at starting position.  */
1114   if (len == 0 || n == 0)
1115     {
1116       set_search_regs (pos_byte, 0);
1117       return pos;
1118     }
1119
1120   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1121     {
1122       unsigned char *p1, *p2;
1123       ptrdiff_t s1, s2;
1124       struct re_pattern_buffer *bufp;
1125
1126       bufp = compile_pattern (string,
1127                               (NILP (Vinhibit_changing_match_data)
1128                                ? &search_regs : &search_regs_1),
1129                               trt, posix,
1130                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1131
1132       immediate_quit = 1;       /* Quit immediately if user types ^G,
1133                                    because letting this function finish
1134                                    can take too long. */
1135       QUIT;                     /* Do a pending quit right away,
1136                                    to avoid paradoxical behavior */
1137       /* Get pointers and sizes of the two strings
1138          that make up the visible portion of the buffer. */
1139
1140       p1 = BEGV_ADDR;
1141       s1 = GPT_BYTE - BEGV_BYTE;
1142       p2 = GAP_END_ADDR;
1143       s2 = ZV_BYTE - GPT_BYTE;
1144       if (s1 < 0)
1145         {
1146           p2 = p1;
1147           s2 = ZV_BYTE - BEGV_BYTE;
1148           s1 = 0;
1149         }
1150       if (s2 < 0)
1151         {
1152           s1 = ZV_BYTE - BEGV_BYTE;
1153           s2 = 0;
1154         }
1155       re_match_object = Qnil;
1156
1157       while (n < 0)
1158         {
1159           ptrdiff_t val;
1160
1161           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1162                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1163                              (NILP (Vinhibit_changing_match_data)
1164                               ? &search_regs : &search_regs_1),
1165                              /* Don't allow match past current point */
1166                              pos_byte - BEGV_BYTE);
1167           if (val == -2)
1168             {
1169               matcher_overflow ();
1170             }
1171           if (val >= 0)
1172             {
1173               if (NILP (Vinhibit_changing_match_data))
1174                 {
1175                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1176                   for (i = 0; i < search_regs.num_regs; i++)
1177                     if (search_regs.start[i] >= 0)
1178                       {
1179                         search_regs.start[i]
1180                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1181                         search_regs.end[i]
1182                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1183                       }
1184                   XSETBUFFER (last_thing_searched, current_buffer);
1185                   /* Set pos to the new position. */
1186                   pos = search_regs.start[0];
1187                 }
1188               else
1189                 {
1190                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1191                   /* Set pos to the new position.  */
1192                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1193                 }
1194             }
1195           else
1196             {
1197               immediate_quit = 0;
1198               return (n);
1199             }
1200           n++;
1201         }
1202       while (n > 0)
1203         {
1204           ptrdiff_t val;
1205
1206           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1207                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1208                              (NILP (Vinhibit_changing_match_data)
1209                               ? &search_regs : &search_regs_1),
1210                              lim_byte - BEGV_BYTE);
1211           if (val == -2)
1212             {
1213               matcher_overflow ();
1214             }
1215           if (val >= 0)
1216             {
1217               if (NILP (Vinhibit_changing_match_data))
1218                 {
1219                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1220                   for (i = 0; i < search_regs.num_regs; i++)
1221                     if (search_regs.start[i] >= 0)
1222                       {
1223                         search_regs.start[i]
1224                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1225                         search_regs.end[i]
1226                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1227                       }
1228                   XSETBUFFER (last_thing_searched, current_buffer);
1229                   pos = search_regs.end[0];
1230                 }
1231               else
1232                 {
1233                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1234                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1235                 }
1236             }
1237           else
1238             {
1239               immediate_quit = 0;
1240               return (0 - n);
1241             }
1242           n--;
1243         }
1244       immediate_quit = 0;
1245       return (pos);
1246     }
1247   else                          /* non-RE case */
1248     {
1249       unsigned char *raw_pattern, *pat;
1250       ptrdiff_t raw_pattern_size;
1251       ptrdiff_t raw_pattern_size_byte;
1252       unsigned char *patbuf;
1253       int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1254       unsigned char *base_pat;
1255       /* Set to positive if we find a non-ASCII char that need
1256          translation.  Otherwise set to zero later.  */
1257       int char_base = -1;
1258       int boyer_moore_ok = 1;
1259
1260       /* MULTIBYTE says whether the text to be searched is multibyte.
1261          We must convert PATTERN to match that, or we will not really
1262          find things right.  */
1263
1264       if (multibyte == STRING_MULTIBYTE (string))
1265         {
1266           raw_pattern = SDATA (string);
1267           raw_pattern_size = SCHARS (string);
1268           raw_pattern_size_byte = SBYTES (string);
1269         }
1270       else if (multibyte)
1271         {
1272           raw_pattern_size = SCHARS (string);
1273           raw_pattern_size_byte
1274             = count_size_as_multibyte (SDATA (string),
1275                                        raw_pattern_size);
1276           raw_pattern = alloca (raw_pattern_size_byte + 1);
1277           copy_text (SDATA (string), raw_pattern,
1278                      SCHARS (string), 0, 1);
1279         }
1280       else
1281         {
1282           /* Converting multibyte to single-byte.
1283
1284              ??? Perhaps this conversion should be done in a special way
1285              by subtracting nonascii-insert-offset from each non-ASCII char,
1286              so that only the multibyte chars which really correspond to
1287              the chosen single-byte character set can possibly match.  */
1288           raw_pattern_size = SCHARS (string);
1289           raw_pattern_size_byte = SCHARS (string);
1290           raw_pattern = alloca (raw_pattern_size + 1);
1291           copy_text (SDATA (string), raw_pattern,
1292                      SBYTES (string), 1, 0);
1293         }
1294
1295       /* Copy and optionally translate the pattern.  */
1296       len = raw_pattern_size;
1297       len_byte = raw_pattern_size_byte;
1298       patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
1299       pat = patbuf;
1300       base_pat = raw_pattern;
1301       if (multibyte)
1302         {
1303           /* Fill patbuf by translated characters in STRING while
1304              checking if we can use boyer-moore search.  If TRT is
1305              non-nil, we can use boyer-moore search only if TRT can be
1306              represented by the byte array of 256 elements.  For that,
1307              all non-ASCII case-equivalents of all case-sensitive
1308              characters in STRING must belong to the same charset and
1309              row.  */
1310
1311           while (--len >= 0)
1312             {
1313               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1314               int c, translated, inverse;
1315               int in_charlen, charlen;
1316
1317               /* If we got here and the RE flag is set, it's because we're
1318                  dealing with a regexp known to be trivial, so the backslash
1319                  just quotes the next character.  */
1320               if (RE && *base_pat == '\\')
1321                 {
1322                   len--;
1323                   raw_pattern_size--;
1324                   len_byte--;
1325                   base_pat++;
1326                 }
1327
1328               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1329
1330               if (NILP (trt))
1331                 {
1332                   str = base_pat;
1333                   charlen = in_charlen;
1334                 }
1335               else
1336                 {
1337                   /* Translate the character.  */
1338                   TRANSLATE (translated, trt, c);
1339                   charlen = CHAR_STRING (translated, str_base);
1340                   str = str_base;
1341
1342                   /* Check if C has any other case-equivalents.  */
1343                   TRANSLATE (inverse, inverse_trt, c);
1344                   /* If so, check if we can use boyer-moore.  */
1345                   if (c != inverse && boyer_moore_ok)
1346                     {
1347                       /* Check if all equivalents belong to the same
1348                          group of characters.  Note that the check of C
1349                          itself is done by the last iteration.  */
1350                       int this_char_base = -1;
1351
1352                       while (boyer_moore_ok)
1353                         {
1354                           if (ASCII_BYTE_P (inverse))
1355                             {
1356                               if (this_char_base > 0)
1357                                 boyer_moore_ok = 0;
1358                               else
1359                                 this_char_base = 0;
1360                             }
1361                           else if (CHAR_BYTE8_P (inverse))
1362                             /* Boyer-moore search can't handle a
1363                                translation of an eight-bit
1364                                character.  */
1365                             boyer_moore_ok = 0;
1366                           else if (this_char_base < 0)
1367                             {
1368                               this_char_base = inverse & ~0x3F;
1369                               if (char_base < 0)
1370                                 char_base = this_char_base;
1371                               else if (this_char_base != char_base)
1372                                 boyer_moore_ok = 0;
1373                             }
1374                           else if ((inverse & ~0x3F) != this_char_base)
1375                             boyer_moore_ok = 0;
1376                           if (c == inverse)
1377                             break;
1378                           TRANSLATE (inverse, inverse_trt, inverse);
1379                         }
1380                     }
1381                 }
1382
1383               /* Store this character into the translated pattern.  */
1384               memcpy (pat, str, charlen);
1385               pat += charlen;
1386               base_pat += in_charlen;
1387               len_byte -= in_charlen;
1388             }
1389
1390           /* If char_base is still negative we didn't find any translated
1391              non-ASCII characters.  */
1392           if (char_base < 0)
1393             char_base = 0;
1394         }
1395       else
1396         {
1397           /* Unibyte buffer.  */
1398           char_base = 0;
1399           while (--len >= 0)
1400             {
1401               int c, translated;
1402
1403               /* If we got here and the RE flag is set, it's because we're
1404                  dealing with a regexp known to be trivial, so the backslash
1405                  just quotes the next character.  */
1406               if (RE && *base_pat == '\\')
1407                 {
1408                   len--;
1409                   raw_pattern_size--;
1410                   base_pat++;
1411                 }
1412               c = *base_pat++;
1413               TRANSLATE (translated, trt, c);
1414               *pat++ = translated;
1415             }
1416         }
1417
1418       len_byte = pat - patbuf;
1419       pat = base_pat = patbuf;
1420
1421       if (boyer_moore_ok)
1422         return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1423                             pos_byte, lim_byte,
1424                             char_base);
1425       else
1426         return simple_search (n, pat, raw_pattern_size, len_byte, trt,
1427                               pos, pos_byte, lim, lim_byte);
1428     }
1429 }
1430 \f
1431 /* Do a simple string search N times for the string PAT,
1432    whose length is LEN/LEN_BYTE,
1433    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1434    TRT is the translation table.
1435
1436    Return the character position where the match is found.
1437    Otherwise, if M matches remained to be found, return -M.
1438
1439    This kind of search works regardless of what is in PAT and
1440    regardless of what is in TRT.  It is used in cases where
1441    boyer_moore cannot work.  */
1442
1443 static EMACS_INT
1444 simple_search (EMACS_INT n, unsigned char *pat,
1445                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1446                ptrdiff_t pos, ptrdiff_t pos_byte,
1447                ptrdiff_t lim, ptrdiff_t lim_byte)
1448 {
1449   int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1450   int forward = n > 0;
1451   /* Number of buffer bytes matched.  Note that this may be different
1452      from len_byte in a multibyte buffer.  */
1453   ptrdiff_t match_byte = PTRDIFF_MIN;
1454
1455   if (lim > pos && multibyte)
1456     while (n > 0)
1457       {
1458         while (1)
1459           {
1460             /* Try matching at position POS.  */
1461             ptrdiff_t this_pos = pos;
1462             ptrdiff_t this_pos_byte = pos_byte;
1463             ptrdiff_t this_len = len;
1464             unsigned char *p = pat;
1465             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1466               goto stop;
1467
1468             while (this_len > 0)
1469               {
1470                 int charlen, buf_charlen;
1471                 int pat_ch, buf_ch;
1472
1473                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1474                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1475                                                  buf_charlen);
1476                 TRANSLATE (buf_ch, trt, buf_ch);
1477
1478                 if (buf_ch != pat_ch)
1479                   break;
1480
1481                 this_len--;
1482                 p += charlen;
1483
1484                 this_pos_byte += buf_charlen;
1485                 this_pos++;
1486               }
1487
1488             if (this_len == 0)
1489               {
1490                 match_byte = this_pos_byte - pos_byte;
1491                 pos += len;
1492                 pos_byte += match_byte;
1493                 break;
1494               }
1495
1496             INC_BOTH (pos, pos_byte);
1497           }
1498
1499         n--;
1500       }
1501   else if (lim > pos)
1502     while (n > 0)
1503       {
1504         while (1)
1505           {
1506             /* Try matching at position POS.  */
1507             ptrdiff_t this_pos = pos;
1508             ptrdiff_t this_len = len;
1509             unsigned char *p = pat;
1510
1511             if (pos + len > lim)
1512               goto stop;
1513
1514             while (this_len > 0)
1515               {
1516                 int pat_ch = *p++;
1517                 int buf_ch = FETCH_BYTE (this_pos);
1518                 TRANSLATE (buf_ch, trt, buf_ch);
1519
1520                 if (buf_ch != pat_ch)
1521                   break;
1522
1523                 this_len--;
1524                 this_pos++;
1525               }
1526
1527             if (this_len == 0)
1528               {
1529                 match_byte = len;
1530                 pos += len;
1531                 break;
1532               }
1533
1534             pos++;
1535           }
1536
1537         n--;
1538       }
1539   /* Backwards search.  */
1540   else if (lim < pos && multibyte)
1541     while (n < 0)
1542       {
1543         while (1)
1544           {
1545             /* Try matching at position POS.  */
1546             ptrdiff_t this_pos = pos;
1547             ptrdiff_t this_pos_byte = pos_byte;
1548             ptrdiff_t this_len = len;
1549             const unsigned char *p = pat + len_byte;
1550
1551             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1552               goto stop;
1553
1554             while (this_len > 0)
1555               {
1556                 int pat_ch, buf_ch;
1557
1558                 DEC_BOTH (this_pos, this_pos_byte);
1559                 PREV_CHAR_BOUNDARY (p, pat);
1560                 pat_ch = STRING_CHAR (p);
1561                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1562                 TRANSLATE (buf_ch, trt, buf_ch);
1563
1564                 if (buf_ch != pat_ch)
1565                   break;
1566
1567                 this_len--;
1568               }
1569
1570             if (this_len == 0)
1571               {
1572                 match_byte = pos_byte - this_pos_byte;
1573                 pos = this_pos;
1574                 pos_byte = this_pos_byte;
1575                 break;
1576               }
1577
1578             DEC_BOTH (pos, pos_byte);
1579           }
1580
1581         n++;
1582       }
1583   else if (lim < pos)
1584     while (n < 0)
1585       {
1586         while (1)
1587           {
1588             /* Try matching at position POS.  */
1589             ptrdiff_t this_pos = pos - len;
1590             ptrdiff_t this_len = len;
1591             unsigned char *p = pat;
1592
1593             if (this_pos < lim)
1594               goto stop;
1595
1596             while (this_len > 0)
1597               {
1598                 int pat_ch = *p++;
1599                 int buf_ch = FETCH_BYTE (this_pos);
1600                 TRANSLATE (buf_ch, trt, buf_ch);
1601
1602                 if (buf_ch != pat_ch)
1603                   break;
1604                 this_len--;
1605                 this_pos++;
1606               }
1607
1608             if (this_len == 0)
1609               {
1610                 match_byte = len;
1611                 pos -= len;
1612                 break;
1613               }
1614
1615             pos--;
1616           }
1617
1618         n++;
1619       }
1620
1621  stop:
1622   if (n == 0)
1623     {
1624       eassert (match_byte != PTRDIFF_MIN);
1625       if (forward)
1626         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1627       else
1628         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1629
1630       return pos;
1631     }
1632   else if (n > 0)
1633     return -n;
1634   else
1635     return n;
1636 }
1637 \f
1638 /* Do Boyer-Moore search N times for the string BASE_PAT,
1639    whose length is LEN_BYTE,
1640    from buffer position POS_BYTE until LIM_BYTE.
1641    DIRECTION says which direction we search in.
1642    TRT and INVERSE_TRT are translation tables.
1643    Characters in PAT are already translated by TRT.
1644
1645    This kind of search works if all the characters in BASE_PAT that
1646    have nontrivial translation are the same aside from the last byte.
1647    This makes it possible to translate just the last byte of a
1648    character, and do so after just a simple test of the context.
1649    CHAR_BASE is nonzero if there is such a non-ASCII character.
1650
1651    If that criterion is not satisfied, do not call this function.  */
1652
1653 static EMACS_INT
1654 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1655              ptrdiff_t len_byte,
1656              Lisp_Object trt, Lisp_Object inverse_trt,
1657              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1658              int char_base)
1659 {
1660   int direction = ((n > 0) ? 1 : -1);
1661   register ptrdiff_t dirlen;
1662   ptrdiff_t limit;
1663   int stride_for_teases = 0;
1664   int BM_tab[0400];
1665   register unsigned char *cursor, *p_limit;
1666   register ptrdiff_t i;
1667   register int j;
1668   unsigned char *pat, *pat_end;
1669   int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1670
1671   unsigned char simple_translate[0400];
1672   /* These are set to the preceding bytes of a byte to be translated
1673      if char_base is nonzero.  As the maximum byte length of a
1674      multibyte character is 5, we have to check at most four previous
1675      bytes.  */
1676   int translate_prev_byte1 = 0;
1677   int translate_prev_byte2 = 0;
1678   int translate_prev_byte3 = 0;
1679
1680   /* The general approach is that we are going to maintain that we know
1681      the first (closest to the present position, in whatever direction
1682      we're searching) character that could possibly be the last
1683      (furthest from present position) character of a valid match.  We
1684      advance the state of our knowledge by looking at that character
1685      and seeing whether it indeed matches the last character of the
1686      pattern.  If it does, we take a closer look.  If it does not, we
1687      move our pointer (to putative last characters) as far as is
1688      logically possible.  This amount of movement, which I call a
1689      stride, will be the length of the pattern if the actual character
1690      appears nowhere in the pattern, otherwise it will be the distance
1691      from the last occurrence of that character to the end of the
1692      pattern.  If the amount is zero we have a possible match.  */
1693
1694   /* Here we make a "mickey mouse" BM table.  The stride of the search
1695      is determined only by the last character of the putative match.
1696      If that character does not match, we will stride the proper
1697      distance to propose a match that superimposes it on the last
1698      instance of a character that matches it (per trt), or misses
1699      it entirely if there is none. */
1700
1701   dirlen = len_byte * direction;
1702
1703   /* Record position after the end of the pattern.  */
1704   pat_end = base_pat + len_byte;
1705   /* BASE_PAT points to a character that we start scanning from.
1706      It is the first character in a forward search,
1707      the last character in a backward search.  */
1708   if (direction < 0)
1709     base_pat = pat_end - 1;
1710
1711   /* A character that does not appear in the pattern induces a
1712      stride equal to the pattern length.  */
1713   for (i = 0; i < 0400; i++)
1714     BM_tab[i] = dirlen;
1715
1716   /* We use this for translation, instead of TRT itself.
1717      We fill this in to handle the characters that actually
1718      occur in the pattern.  Others don't matter anyway!  */
1719   for (i = 0; i < 0400; i++)
1720     simple_translate[i] = i;
1721
1722   if (char_base)
1723     {
1724       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1725          byte following them are the target of translation.  */
1726       unsigned char str[MAX_MULTIBYTE_LENGTH];
1727       int cblen = CHAR_STRING (char_base, str);
1728
1729       translate_prev_byte1 = str[cblen - 2];
1730       if (cblen > 2)
1731         {
1732           translate_prev_byte2 = str[cblen - 3];
1733           if (cblen > 3)
1734             translate_prev_byte3 = str[cblen - 4];
1735         }
1736     }
1737
1738   i = 0;
1739   while (i != dirlen)
1740     {
1741       unsigned char *ptr = base_pat + i;
1742       i += direction;
1743       if (! NILP (trt))
1744         {
1745           /* If the byte currently looking at is the last of a
1746              character to check case-equivalents, set CH to that
1747              character.  An ASCII character and a non-ASCII character
1748              matching with CHAR_BASE are to be checked.  */
1749           int ch = -1;
1750
1751           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1752             ch = *ptr;
1753           else if (char_base
1754                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1755             {
1756               unsigned char *charstart = ptr - 1;
1757
1758               while (! (CHAR_HEAD_P (*charstart)))
1759                 charstart--;
1760               ch = STRING_CHAR (charstart);
1761               if (char_base != (ch & ~0x3F))
1762                 ch = -1;
1763             }
1764
1765           if (ch >= 0200 && multibyte)
1766             j = (ch & 0x3F) | 0200;
1767           else
1768             j = *ptr;
1769
1770           if (i == dirlen)
1771             stride_for_teases = BM_tab[j];
1772
1773           BM_tab[j] = dirlen - i;
1774           /* A translation table is accompanied by its inverse -- see
1775              comment following downcase_table for details.  */
1776           if (ch >= 0)
1777             {
1778               int starting_ch = ch;
1779               int starting_j = j;
1780
1781               while (1)
1782                 {
1783                   TRANSLATE (ch, inverse_trt, ch);
1784                   if (ch >= 0200 && multibyte)
1785                     j = (ch & 0x3F) | 0200;
1786                   else
1787                     j = ch;
1788
1789                   /* For all the characters that map into CH,
1790                      set up simple_translate to map the last byte
1791                      into STARTING_J.  */
1792                   simple_translate[j] = starting_j;
1793                   if (ch == starting_ch)
1794                     break;
1795                   BM_tab[j] = dirlen - i;
1796                 }
1797             }
1798         }
1799       else
1800         {
1801           j = *ptr;
1802
1803           if (i == dirlen)
1804             stride_for_teases = BM_tab[j];
1805           BM_tab[j] = dirlen - i;
1806         }
1807       /* stride_for_teases tells how much to stride if we get a
1808          match on the far character but are subsequently
1809          disappointed, by recording what the stride would have been
1810          for that character if the last character had been
1811          different.  */
1812     }
1813   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1814   /* loop invariant - POS_BYTE points at where last char (first
1815      char if reverse) of pattern would align in a possible match.  */
1816   while (n != 0)
1817     {
1818       ptrdiff_t tail_end;
1819       unsigned char *tail_end_ptr;
1820
1821       /* It's been reported that some (broken) compiler thinks that
1822          Boolean expressions in an arithmetic context are unsigned.
1823          Using an explicit ?1:0 prevents this.  */
1824       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1825           < 0)
1826         return (n * (0 - direction));
1827       /* First we do the part we can by pointers (maybe nothing) */
1828       QUIT;
1829       pat = base_pat;
1830       limit = pos_byte - dirlen + direction;
1831       if (direction > 0)
1832         {
1833           limit = BUFFER_CEILING_OF (limit);
1834           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1835              can take on without hitting edge of buffer or the gap.  */
1836           limit = min (limit, pos_byte + 20000);
1837           limit = min (limit, lim_byte - 1);
1838         }
1839       else
1840         {
1841           limit = BUFFER_FLOOR_OF (limit);
1842           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1843              can take on without hitting edge of buffer or the gap.  */
1844           limit = max (limit, pos_byte - 20000);
1845           limit = max (limit, lim_byte);
1846         }
1847       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1848       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1849
1850       if ((limit - pos_byte) * direction > 20)
1851         {
1852           unsigned char *p2;
1853
1854           p_limit = BYTE_POS_ADDR (limit);
1855           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1856           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1857           while (1)             /* use one cursor setting as long as i can */
1858             {
1859               if (direction > 0) /* worth duplicating */
1860                 {
1861                   while (cursor <= p_limit)
1862                     {
1863                       if (BM_tab[*cursor] == 0)
1864                         goto hit;
1865                       cursor += BM_tab[*cursor];
1866                     }
1867                 }
1868               else
1869                 {
1870                   while (cursor >= p_limit)
1871                     {
1872                       if (BM_tab[*cursor] == 0)
1873                         goto hit;
1874                       cursor += BM_tab[*cursor];
1875                     }
1876                 }
1877               /* If you are here, cursor is beyond the end of the
1878                  searched region.  You fail to match within the
1879                  permitted region and would otherwise try a character
1880                  beyond that region.  */
1881               break;
1882
1883             hit:
1884               i = dirlen - direction;
1885               if (! NILP (trt))
1886                 {
1887                   while ((i -= direction) + direction != 0)
1888                     {
1889                       int ch;
1890                       cursor -= direction;
1891                       /* Translate only the last byte of a character.  */
1892                       if (! multibyte
1893                           || ((cursor == tail_end_ptr
1894                                || CHAR_HEAD_P (cursor[1]))
1895                               && (CHAR_HEAD_P (cursor[0])
1896                                   /* Check if this is the last byte of
1897                                      a translatable character.  */
1898                                   || (translate_prev_byte1 == cursor[-1]
1899                                       && (CHAR_HEAD_P (translate_prev_byte1)
1900                                           || (translate_prev_byte2 == cursor[-2]
1901                                               && (CHAR_HEAD_P (translate_prev_byte2)
1902                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1903                         ch = simple_translate[*cursor];
1904                       else
1905                         ch = *cursor;
1906                       if (pat[i] != ch)
1907                         break;
1908                     }
1909                 }
1910               else
1911                 {
1912                   while ((i -= direction) + direction != 0)
1913                     {
1914                       cursor -= direction;
1915                       if (pat[i] != *cursor)
1916                         break;
1917                     }
1918                 }
1919               cursor += dirlen - i - direction; /* fix cursor */
1920               if (i + direction == 0)
1921                 {
1922                   ptrdiff_t position, start, end;
1923
1924                   cursor -= direction;
1925
1926                   position = pos_byte + cursor - p2 + ((direction > 0)
1927                                                        ? 1 - len_byte : 0);
1928                   set_search_regs (position, len_byte);
1929
1930                   if (NILP (Vinhibit_changing_match_data))
1931                     {
1932                       start = search_regs.start[0];
1933                       end = search_regs.end[0];
1934                     }
1935                   else
1936                     /* If Vinhibit_changing_match_data is non-nil,
1937                        search_regs will not be changed.  So let's
1938                        compute start and end here.  */
1939                     {
1940                       start = BYTE_TO_CHAR (position);
1941                       end = BYTE_TO_CHAR (position + len_byte);
1942                     }
1943
1944                   if ((n -= direction) != 0)
1945                     cursor += dirlen; /* to resume search */
1946                   else
1947                     return direction > 0 ? end : start;
1948                 }
1949               else
1950                 cursor += stride_for_teases; /* <sigh> we lose -  */
1951             }
1952           pos_byte += cursor - p2;
1953         }
1954       else
1955         /* Now we'll pick up a clump that has to be done the hard
1956            way because it covers a discontinuity.  */
1957         {
1958           limit = ((direction > 0)
1959                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1960                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1961           limit = ((direction > 0)
1962                    ? min (limit + len_byte, lim_byte - 1)
1963                    : max (limit - len_byte, lim_byte));
1964           /* LIMIT is now the last value POS_BYTE can have
1965              and still be valid for a possible match.  */
1966           while (1)
1967             {
1968               /* This loop can be coded for space rather than
1969                  speed because it will usually run only once.
1970                  (the reach is at most len + 21, and typically
1971                  does not exceed len).  */
1972               while ((limit - pos_byte) * direction >= 0)
1973                 {
1974                   int ch = FETCH_BYTE (pos_byte);
1975                   if (BM_tab[ch] == 0)
1976                     goto hit2;
1977                   pos_byte += BM_tab[ch];
1978                 }
1979               break;    /* ran off the end */
1980
1981             hit2:
1982               /* Found what might be a match.  */
1983               i = dirlen - direction;
1984               while ((i -= direction) + direction != 0)
1985                 {
1986                   int ch;
1987                   unsigned char *ptr;
1988                   pos_byte -= direction;
1989                   ptr = BYTE_POS_ADDR (pos_byte);
1990                   /* Translate only the last byte of a character.  */
1991                   if (! multibyte
1992                       || ((ptr == tail_end_ptr
1993                            || CHAR_HEAD_P (ptr[1]))
1994                           && (CHAR_HEAD_P (ptr[0])
1995                               /* Check if this is the last byte of a
1996                                  translatable character.  */
1997                               || (translate_prev_byte1 == ptr[-1]
1998                                   && (CHAR_HEAD_P (translate_prev_byte1)
1999                                       || (translate_prev_byte2 == ptr[-2]
2000                                           && (CHAR_HEAD_P (translate_prev_byte2)
2001                                               || translate_prev_byte3 == ptr[-3])))))))
2002                     ch = simple_translate[*ptr];
2003                   else
2004                     ch = *ptr;
2005                   if (pat[i] != ch)
2006                     break;
2007                 }
2008               /* Above loop has moved POS_BYTE part or all the way
2009                  back to the first pos (last pos if reverse).
2010                  Set it once again at the last (first if reverse) char.  */
2011               pos_byte += dirlen - i - direction;
2012               if (i + direction == 0)
2013                 {
2014                   ptrdiff_t position, start, end;
2015                   pos_byte -= direction;
2016
2017                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2018                   set_search_regs (position, len_byte);
2019
2020                   if (NILP (Vinhibit_changing_match_data))
2021                     {
2022                       start = search_regs.start[0];
2023                       end = search_regs.end[0];
2024                     }
2025                   else
2026                     /* If Vinhibit_changing_match_data is non-nil,
2027                        search_regs will not be changed.  So let's
2028                        compute start and end here.  */
2029                     {
2030                       start = BYTE_TO_CHAR (position);
2031                       end = BYTE_TO_CHAR (position + len_byte);
2032                     }
2033
2034                   if ((n -= direction) != 0)
2035                     pos_byte += dirlen; /* to resume search */
2036                   else
2037                     return direction > 0 ? end : start;
2038                 }
2039               else
2040                 pos_byte += stride_for_teases;
2041             }
2042           }
2043       /* We have done one clump.  Can we continue? */
2044       if ((lim_byte - pos_byte) * direction < 0)
2045         return ((0 - n) * direction);
2046     }
2047   return BYTE_TO_CHAR (pos_byte);
2048 }
2049
2050 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2051    for the overall match just found in the current buffer.
2052    Also clear out the match data for registers 1 and up.  */
2053
2054 static void
2055 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2056 {
2057   ptrdiff_t i;
2058
2059   if (!NILP (Vinhibit_changing_match_data))
2060     return;
2061
2062   /* Make sure we have registers in which to store
2063      the match position.  */
2064   if (search_regs.num_regs == 0)
2065     {
2066       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2067       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2068       search_regs.num_regs = 2;
2069     }
2070
2071   /* Clear out the other registers.  */
2072   for (i = 1; i < search_regs.num_regs; i++)
2073     {
2074       search_regs.start[i] = -1;
2075       search_regs.end[i] = -1;
2076     }
2077
2078   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2079   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2080   XSETBUFFER (last_thing_searched, current_buffer);
2081 }
2082 \f
2083 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2084        "MSearch backward: ",
2085        doc: /* Search backward from point for STRING.
2086 Set point to the beginning of the occurrence found, and return point.
2087 An optional second argument bounds the search; it is a buffer position.
2088 The match found must not extend before that position.
2089 Optional third argument, if t, means if fail just return nil (no error).
2090  If not nil and not t, position at limit of search and return nil.
2091 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2092  successive occurrences.  If COUNT is negative, search forward,
2093  instead of backward, for -COUNT occurrences.
2094
2095 Search case-sensitivity is determined by the value of the variable
2096 `case-fold-search', which see.
2097
2098 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2099   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2100 {
2101   return search_command (string, bound, noerror, count, -1, 0, 0);
2102 }
2103
2104 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2105        doc: /* Search forward from point for STRING.
2106 Set point to the end of the occurrence found, and return point.
2107 An optional second argument bounds the search; it is a buffer position.
2108 The match found must not extend after that position.  A value of nil is
2109   equivalent to (point-max).
2110 Optional third argument, if t, means if fail just return nil (no error).
2111   If not nil and not t, move to limit of search and return nil.
2112 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2113  successive occurrences.  If COUNT is negative, search backward,
2114  instead of forward, for -COUNT occurrences.
2115
2116 Search case-sensitivity is determined by the value of the variable
2117 `case-fold-search', which see.
2118
2119 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2120   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2121 {
2122   return search_command (string, bound, noerror, count, 1, 0, 0);
2123 }
2124
2125 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2126        "sRE search backward: ",
2127        doc: /* Search backward from point for match for regular expression REGEXP.
2128 Set point to the beginning of the match, and return point.
2129 The match found is the one starting last in the buffer
2130 and yet ending before the origin of the search.
2131 An optional second argument bounds the search; it is a buffer position.
2132 The match found must start at or after that position.
2133 Optional third argument, if t, means if fail just return nil (no error).
2134   If not nil and not t, move to limit of search and return nil.
2135 Optional fourth argument is repeat count--search for successive occurrences.
2136
2137 Search case-sensitivity is determined by the value of the variable
2138 `case-fold-search', which see.
2139
2140 See also the functions `match-beginning', `match-end', `match-string',
2141 and `replace-match'.  */)
2142   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2143 {
2144   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2145 }
2146
2147 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2148        "sRE search: ",
2149        doc: /* Search forward from point for regular expression REGEXP.
2150 Set point to the end of the occurrence found, and return point.
2151 An optional second argument bounds the search; it is a buffer position.
2152 The match found must not extend after that position.
2153 Optional third argument, if t, means if fail just return nil (no error).
2154   If not nil and not t, move to limit of search and return nil.
2155 Optional fourth argument is repeat count--search for successive occurrences.
2156
2157 Search case-sensitivity is determined by the value of the variable
2158 `case-fold-search', which see.
2159
2160 See also the functions `match-beginning', `match-end', `match-string',
2161 and `replace-match'.  */)
2162   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2163 {
2164   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2165 }
2166
2167 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2168        "sPosix search backward: ",
2169        doc: /* Search backward from point for match for regular expression REGEXP.
2170 Find the longest match in accord with Posix regular expression rules.
2171 Set point to the beginning of the match, and return point.
2172 The match found is the one starting last in the buffer
2173 and yet ending before the origin of the search.
2174 An optional second argument bounds the search; it is a buffer position.
2175 The match found must start at or after that position.
2176 Optional third argument, if t, means if fail just return nil (no error).
2177   If not nil and not t, move to limit of search and return nil.
2178 Optional fourth argument is repeat count--search for successive occurrences.
2179
2180 Search case-sensitivity is determined by the value of the variable
2181 `case-fold-search', which see.
2182
2183 See also the functions `match-beginning', `match-end', `match-string',
2184 and `replace-match'.  */)
2185   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2186 {
2187   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2188 }
2189
2190 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2191        "sPosix search: ",
2192        doc: /* Search forward from point for regular expression REGEXP.
2193 Find the longest match in accord with Posix regular expression rules.
2194 Set point to the end of the occurrence found, and return point.
2195 An optional second argument bounds the search; it is a buffer position.
2196 The match found must not extend after that position.
2197 Optional third argument, if t, means if fail just return nil (no error).
2198   If not nil and not t, move to limit of search and return nil.
2199 Optional fourth argument is repeat count--search for successive occurrences.
2200
2201 Search case-sensitivity is determined by the value of the variable
2202 `case-fold-search', which see.
2203
2204 See also the functions `match-beginning', `match-end', `match-string',
2205 and `replace-match'.  */)
2206   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2207 {
2208   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2209 }
2210 \f
2211 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2212        doc: /* Replace text matched by last search with NEWTEXT.
2213 Leave point at the end of the replacement text.
2214
2215 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2216 Otherwise maybe capitalize the whole text, or maybe just word initials,
2217 based on the replaced text.
2218 If the replaced text has only capital letters
2219 and has at least one multiletter word, convert NEWTEXT to all caps.
2220 Otherwise if all words are capitalized in the replaced text,
2221 capitalize each word in NEWTEXT.
2222
2223 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2224 Otherwise treat `\\' as special:
2225   `\\&' in NEWTEXT means substitute original matched text.
2226   `\\N' means substitute what matched the Nth `\\(...\\)'.
2227        If Nth parens didn't match, substitute nothing.
2228   `\\\\' means insert one `\\'.
2229   `\\?' is treated literally
2230        (for compatibility with `query-replace-regexp').
2231   Any other character following `\\' signals an error.
2232 Case conversion does not apply to these substitutions.
2233
2234 FIXEDCASE and LITERAL are optional arguments.
2235
2236 The optional fourth argument STRING can be a string to modify.
2237 This is meaningful when the previous match was done against STRING,
2238 using `string-match'.  When used this way, `replace-match'
2239 creates and returns a new string made by copying STRING and replacing
2240 the part of STRING that was matched.
2241
2242 The optional fifth argument SUBEXP specifies a subexpression;
2243 it says to replace just that subexpression with NEWTEXT,
2244 rather than replacing the entire matched text.
2245 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2246 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2247 NEWTEXT in place of subexp N.
2248 This is useful only after a regular expression search or match,
2249 since only regular expressions have distinguished subexpressions.  */)
2250   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2251 {
2252   enum { nochange, all_caps, cap_initial } case_action;
2253   register ptrdiff_t pos, pos_byte;
2254   int some_multiletter_word;
2255   int some_lowercase;
2256   int some_uppercase;
2257   int some_nonuppercase_initial;
2258   register int c, prevc;
2259   ptrdiff_t sub;
2260   ptrdiff_t opoint, newpoint;
2261
2262   CHECK_STRING (newtext);
2263
2264   if (! NILP (string))
2265     CHECK_STRING (string);
2266
2267   case_action = nochange;       /* We tried an initialization */
2268                                 /* but some C compilers blew it */
2269
2270   if (search_regs.num_regs <= 0)
2271     error ("`replace-match' called before any match found");
2272
2273   if (NILP (subexp))
2274     sub = 0;
2275   else
2276     {
2277       CHECK_NUMBER (subexp);
2278       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2279         args_out_of_range (subexp, make_number (search_regs.num_regs));
2280       sub = XINT (subexp);
2281     }
2282
2283   if (NILP (string))
2284     {
2285       if (search_regs.start[sub] < BEGV
2286           || search_regs.start[sub] > search_regs.end[sub]
2287           || search_regs.end[sub] > ZV)
2288         args_out_of_range (make_number (search_regs.start[sub]),
2289                            make_number (search_regs.end[sub]));
2290     }
2291   else
2292     {
2293       if (search_regs.start[sub] < 0
2294           || search_regs.start[sub] > search_regs.end[sub]
2295           || search_regs.end[sub] > SCHARS (string))
2296         args_out_of_range (make_number (search_regs.start[sub]),
2297                            make_number (search_regs.end[sub]));
2298     }
2299
2300   if (NILP (fixedcase))
2301     {
2302       /* Decide how to casify by examining the matched text. */
2303       ptrdiff_t last;
2304
2305       pos = search_regs.start[sub];
2306       last = search_regs.end[sub];
2307
2308       if (NILP (string))
2309         pos_byte = CHAR_TO_BYTE (pos);
2310       else
2311         pos_byte = string_char_to_byte (string, pos);
2312
2313       prevc = '\n';
2314       case_action = all_caps;
2315
2316       /* some_multiletter_word is set nonzero if any original word
2317          is more than one letter long. */
2318       some_multiletter_word = 0;
2319       some_lowercase = 0;
2320       some_nonuppercase_initial = 0;
2321       some_uppercase = 0;
2322
2323       while (pos < last)
2324         {
2325           if (NILP (string))
2326             {
2327               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2328               INC_BOTH (pos, pos_byte);
2329             }
2330           else
2331             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2332
2333           if (lowercasep (c))
2334             {
2335               /* Cannot be all caps if any original char is lower case */
2336
2337               some_lowercase = 1;
2338               if (SYNTAX (prevc) != Sword)
2339                 some_nonuppercase_initial = 1;
2340               else
2341                 some_multiletter_word = 1;
2342             }
2343           else if (uppercasep (c))
2344             {
2345               some_uppercase = 1;
2346               if (SYNTAX (prevc) != Sword)
2347                 ;
2348               else
2349                 some_multiletter_word = 1;
2350             }
2351           else
2352             {
2353               /* If the initial is a caseless word constituent,
2354                  treat that like a lowercase initial.  */
2355               if (SYNTAX (prevc) != Sword)
2356                 some_nonuppercase_initial = 1;
2357             }
2358
2359           prevc = c;
2360         }
2361
2362       /* Convert to all caps if the old text is all caps
2363          and has at least one multiletter word.  */
2364       if (! some_lowercase && some_multiletter_word)
2365         case_action = all_caps;
2366       /* Capitalize each word, if the old text has all capitalized words.  */
2367       else if (!some_nonuppercase_initial && some_multiletter_word)
2368         case_action = cap_initial;
2369       else if (!some_nonuppercase_initial && some_uppercase)
2370         /* Should x -> yz, operating on X, give Yz or YZ?
2371            We'll assume the latter.  */
2372         case_action = all_caps;
2373       else
2374         case_action = nochange;
2375     }
2376
2377   /* Do replacement in a string.  */
2378   if (!NILP (string))
2379     {
2380       Lisp_Object before, after;
2381
2382       before = Fsubstring (string, make_number (0),
2383                            make_number (search_regs.start[sub]));
2384       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2385
2386       /* Substitute parts of the match into NEWTEXT
2387          if desired.  */
2388       if (NILP (literal))
2389         {
2390           ptrdiff_t lastpos = 0;
2391           ptrdiff_t lastpos_byte = 0;
2392           /* We build up the substituted string in ACCUM.  */
2393           Lisp_Object accum;
2394           Lisp_Object middle;
2395           ptrdiff_t length = SBYTES (newtext);
2396
2397           accum = Qnil;
2398
2399           for (pos_byte = 0, pos = 0; pos_byte < length;)
2400             {
2401               ptrdiff_t substart = -1;
2402               ptrdiff_t subend = 0;
2403               int delbackslash = 0;
2404
2405               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2406
2407               if (c == '\\')
2408                 {
2409                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2410
2411                   if (c == '&')
2412                     {
2413                       substart = search_regs.start[sub];
2414                       subend = search_regs.end[sub];
2415                     }
2416                   else if (c >= '1' && c <= '9')
2417                     {
2418                       if (c - '0' < search_regs.num_regs
2419                           && 0 <= search_regs.start[c - '0'])
2420                         {
2421                           substart = search_regs.start[c - '0'];
2422                           subend = search_regs.end[c - '0'];
2423                         }
2424                       else
2425                         {
2426                           /* If that subexp did not match,
2427                              replace \\N with nothing.  */
2428                           substart = 0;
2429                           subend = 0;
2430                         }
2431                     }
2432                   else if (c == '\\')
2433                     delbackslash = 1;
2434                   else if (c != '?')
2435                     error ("Invalid use of `\\' in replacement text");
2436                 }
2437               if (substart >= 0)
2438                 {
2439                   if (pos - 2 != lastpos)
2440                     middle = substring_both (newtext, lastpos,
2441                                              lastpos_byte,
2442                                              pos - 2, pos_byte - 2);
2443                   else
2444                     middle = Qnil;
2445                   accum = concat3 (accum, middle,
2446                                    Fsubstring (string,
2447                                                make_number (substart),
2448                                                make_number (subend)));
2449                   lastpos = pos;
2450                   lastpos_byte = pos_byte;
2451                 }
2452               else if (delbackslash)
2453                 {
2454                   middle = substring_both (newtext, lastpos,
2455                                            lastpos_byte,
2456                                            pos - 1, pos_byte - 1);
2457
2458                   accum = concat2 (accum, middle);
2459                   lastpos = pos;
2460                   lastpos_byte = pos_byte;
2461                 }
2462             }
2463
2464           if (pos != lastpos)
2465             middle = substring_both (newtext, lastpos,
2466                                      lastpos_byte,
2467                                      pos, pos_byte);
2468           else
2469             middle = Qnil;
2470
2471           newtext = concat2 (accum, middle);
2472         }
2473
2474       /* Do case substitution in NEWTEXT if desired.  */
2475       if (case_action == all_caps)
2476         newtext = Fupcase (newtext);
2477       else if (case_action == cap_initial)
2478         newtext = Fupcase_initials (newtext);
2479
2480       return concat3 (before, newtext, after);
2481     }
2482
2483   /* Record point, then move (quietly) to the start of the match.  */
2484   if (PT >= search_regs.end[sub])
2485     opoint = PT - ZV;
2486   else if (PT > search_regs.start[sub])
2487     opoint = search_regs.end[sub] - ZV;
2488   else
2489     opoint = PT;
2490
2491   /* If we want non-literal replacement,
2492      perform substitution on the replacement string.  */
2493   if (NILP (literal))
2494     {
2495       ptrdiff_t length = SBYTES (newtext);
2496       unsigned char *substed;
2497       ptrdiff_t substed_alloc_size, substed_len;
2498       int buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2499       int str_multibyte = STRING_MULTIBYTE (newtext);
2500       int really_changed = 0;
2501
2502       substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length
2503                             ? STRING_BYTES_BOUND
2504                             : length * 2 + 100);
2505       substed = xmalloc (substed_alloc_size);
2506       substed_len = 0;
2507
2508       /* Go thru NEWTEXT, producing the actual text to insert in
2509          SUBSTED while adjusting multibyteness to that of the current
2510          buffer.  */
2511
2512       for (pos_byte = 0, pos = 0; pos_byte < length;)
2513         {
2514           unsigned char str[MAX_MULTIBYTE_LENGTH];
2515           const unsigned char *add_stuff = NULL;
2516           ptrdiff_t add_len = 0;
2517           ptrdiff_t idx = -1;
2518
2519           if (str_multibyte)
2520             {
2521               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2522               if (!buf_multibyte)
2523                 c = multibyte_char_to_unibyte (c);
2524             }
2525           else
2526             {
2527               /* Note that we don't have to increment POS.  */
2528               c = SREF (newtext, pos_byte++);
2529               if (buf_multibyte)
2530                 MAKE_CHAR_MULTIBYTE (c);
2531             }
2532
2533           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2534              or set IDX to a match index, which means put that part
2535              of the buffer text into SUBSTED.  */
2536
2537           if (c == '\\')
2538             {
2539               really_changed = 1;
2540
2541               if (str_multibyte)
2542                 {
2543                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2544                                                       pos, pos_byte);
2545                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2546                     c = multibyte_char_to_unibyte (c);
2547                 }
2548               else
2549                 {
2550                   c = SREF (newtext, pos_byte++);
2551                   if (buf_multibyte)
2552                     MAKE_CHAR_MULTIBYTE (c);
2553                 }
2554
2555               if (c == '&')
2556                 idx = sub;
2557               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2558                 {
2559                   if (search_regs.start[c - '0'] >= 1)
2560                     idx = c - '0';
2561                 }
2562               else if (c == '\\')
2563                 add_len = 1, add_stuff = (unsigned char *) "\\";
2564               else
2565                 {
2566                   xfree (substed);
2567                   error ("Invalid use of `\\' in replacement text");
2568                 }
2569             }
2570           else
2571             {
2572               add_len = CHAR_STRING (c, str);
2573               add_stuff = str;
2574             }
2575
2576           /* If we want to copy part of a previous match,
2577              set up ADD_STUFF and ADD_LEN to point to it.  */
2578           if (idx >= 0)
2579             {
2580               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2581               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2582               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2583                 move_gap (search_regs.start[idx]);
2584               add_stuff = BYTE_POS_ADDR (begbyte);
2585             }
2586
2587           /* Now the stuff we want to add to SUBSTED
2588              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2589
2590           /* Make sure SUBSTED is big enough.  */
2591           if (substed_alloc_size - substed_len < add_len)
2592             substed =
2593               xpalloc (substed, &substed_alloc_size,
2594                        add_len - (substed_alloc_size - substed_len),
2595                        STRING_BYTES_BOUND, 1);
2596
2597           /* Now add to the end of SUBSTED.  */
2598           if (add_stuff)
2599             {
2600               memcpy (substed + substed_len, add_stuff, add_len);
2601               substed_len += add_len;
2602             }
2603         }
2604
2605       if (really_changed)
2606         {
2607           if (buf_multibyte)
2608             {
2609               ptrdiff_t nchars =
2610                 multibyte_chars_in_text (substed, substed_len);
2611
2612               newtext = make_multibyte_string ((char *) substed, nchars,
2613                                                substed_len);
2614             }
2615           else
2616             newtext = make_unibyte_string ((char *) substed, substed_len);
2617         }
2618       xfree (substed);
2619     }
2620
2621   /* Replace the old text with the new in the cleanest possible way.  */
2622   replace_range (search_regs.start[sub], search_regs.end[sub],
2623                  newtext, 1, 0, 1);
2624   newpoint = search_regs.start[sub] + SCHARS (newtext);
2625
2626   if (case_action == all_caps)
2627     Fupcase_region (make_number (search_regs.start[sub]),
2628                     make_number (newpoint));
2629   else if (case_action == cap_initial)
2630     Fupcase_initials_region (make_number (search_regs.start[sub]),
2631                              make_number (newpoint));
2632
2633   /* Adjust search data for this change.  */
2634   {
2635     ptrdiff_t oldend = search_regs.end[sub];
2636     ptrdiff_t oldstart = search_regs.start[sub];
2637     ptrdiff_t change = newpoint - search_regs.end[sub];
2638     ptrdiff_t i;
2639
2640     for (i = 0; i < search_regs.num_regs; i++)
2641       {
2642         if (search_regs.start[i] >= oldend)
2643           search_regs.start[i] += change;
2644         else if (search_regs.start[i] > oldstart)
2645           search_regs.start[i] = oldstart;
2646         if (search_regs.end[i] >= oldend)
2647           search_regs.end[i] += change;
2648         else if (search_regs.end[i] > oldstart)
2649           search_regs.end[i] = oldstart;
2650       }
2651   }
2652
2653   /* Put point back where it was in the text.  */
2654   if (opoint <= 0)
2655     TEMP_SET_PT (opoint + ZV);
2656   else
2657     TEMP_SET_PT (opoint);
2658
2659   /* Now move point "officially" to the start of the inserted replacement.  */
2660   move_if_not_intangible (newpoint);
2661
2662   return Qnil;
2663 }
2664 \f
2665 static Lisp_Object
2666 match_limit (Lisp_Object num, int beginningp)
2667 {
2668   EMACS_INT n;
2669
2670   CHECK_NUMBER (num);
2671   n = XINT (num);
2672   if (n < 0)
2673     args_out_of_range (num, make_number (0));
2674   if (search_regs.num_regs <= 0)
2675     error ("No match data, because no search succeeded");
2676   if (n >= search_regs.num_regs
2677       || search_regs.start[n] < 0)
2678     return Qnil;
2679   return (make_number ((beginningp) ? search_regs.start[n]
2680                                     : search_regs.end[n]));
2681 }
2682
2683 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2684        doc: /* Return position of start of text matched by last search.
2685 SUBEXP, a number, specifies which parenthesized expression in the last
2686   regexp.
2687 Value is nil if SUBEXPth pair didn't match, or there were less than
2688   SUBEXP pairs.
2689 Zero means the entire text matched by the whole regexp or whole string.  */)
2690   (Lisp_Object subexp)
2691 {
2692   return match_limit (subexp, 1);
2693 }
2694
2695 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2696        doc: /* Return position of end of text matched by last search.
2697 SUBEXP, a number, specifies which parenthesized expression in the last
2698   regexp.
2699 Value is nil if SUBEXPth pair didn't match, or there were less than
2700   SUBEXP pairs.
2701 Zero means the entire text matched by the whole regexp or whole string.  */)
2702   (Lisp_Object subexp)
2703 {
2704   return match_limit (subexp, 0);
2705 }
2706
2707 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2708        doc: /* Return a list containing all info on what the last search matched.
2709 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2710 All the elements are markers or nil (nil if the Nth pair didn't match)
2711 if the last match was on a buffer; integers or nil if a string was matched.
2712 Use `set-match-data' to reinstate the data in this list.
2713
2714 If INTEGERS (the optional first argument) is non-nil, always use
2715 integers \(rather than markers) to represent buffer positions.  In
2716 this case, and if the last match was in a buffer, the buffer will get
2717 stored as one additional element at the end of the list.
2718
2719 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2720 enough to hold all the values, and if INTEGERS is non-nil, no consing
2721 is done.
2722
2723 If optional third arg RESEAT is non-nil, any previous markers on the
2724 REUSE list will be modified to point to nowhere.
2725
2726 Return value is undefined if the last search failed.  */)
2727   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2728 {
2729   Lisp_Object tail, prev;
2730   Lisp_Object *data;
2731   ptrdiff_t i, len;
2732
2733   if (!NILP (reseat))
2734     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2735       if (MARKERP (XCAR (tail)))
2736         {
2737           unchain_marker (XMARKER (XCAR (tail)));
2738           XSETCAR (tail, Qnil);
2739         }
2740
2741   if (NILP (last_thing_searched))
2742     return Qnil;
2743
2744   prev = Qnil;
2745
2746   data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
2747
2748   len = 0;
2749   for (i = 0; i < search_regs.num_regs; i++)
2750     {
2751       ptrdiff_t start = search_regs.start[i];
2752       if (start >= 0)
2753         {
2754           if (EQ (last_thing_searched, Qt)
2755               || ! NILP (integers))
2756             {
2757               XSETFASTINT (data[2 * i], start);
2758               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2759             }
2760           else if (BUFFERP (last_thing_searched))
2761             {
2762               data[2 * i] = Fmake_marker ();
2763               Fset_marker (data[2 * i],
2764                            make_number (start),
2765                            last_thing_searched);
2766               data[2 * i + 1] = Fmake_marker ();
2767               Fset_marker (data[2 * i + 1],
2768                            make_number (search_regs.end[i]),
2769                            last_thing_searched);
2770             }
2771           else
2772             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2773             abort ();
2774
2775           len = 2 * i + 2;
2776         }
2777       else
2778         data[2 * i] = data[2 * i + 1] = Qnil;
2779     }
2780
2781   if (BUFFERP (last_thing_searched) && !NILP (integers))
2782     {
2783       data[len] = last_thing_searched;
2784       len++;
2785     }
2786
2787   /* If REUSE is not usable, cons up the values and return them.  */
2788   if (! CONSP (reuse))
2789     return Flist (len, data);
2790
2791   /* If REUSE is a list, store as many value elements as will fit
2792      into the elements of REUSE.  */
2793   for (i = 0, tail = reuse; CONSP (tail);
2794        i++, tail = XCDR (tail))
2795     {
2796       if (i < len)
2797         XSETCAR (tail, data[i]);
2798       else
2799         XSETCAR (tail, Qnil);
2800       prev = tail;
2801     }
2802
2803   /* If we couldn't fit all value elements into REUSE,
2804      cons up the rest of them and add them to the end of REUSE.  */
2805   if (i < len)
2806     XSETCDR (prev, Flist (len - i, data + i));
2807
2808   return reuse;
2809 }
2810
2811 /* We used to have an internal use variant of `reseat' described as:
2812
2813       If RESEAT is `evaporate', put the markers back on the free list
2814       immediately.  No other references to the markers must exist in this
2815       case, so it is used only internally on the unwind stack and
2816       save-match-data from Lisp.
2817
2818    But it was ill-conceived: those supposedly-internal markers get exposed via
2819    the undo-list, so freeing them here is unsafe.  */
2820
2821 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2822        doc: /* Set internal data on last search match from elements of LIST.
2823 LIST should have been created by calling `match-data' previously.
2824
2825 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2826   (register Lisp_Object list, Lisp_Object reseat)
2827 {
2828   ptrdiff_t i;
2829   register Lisp_Object marker;
2830
2831   if (running_asynch_code)
2832     save_search_regs ();
2833
2834   CHECK_LIST (list);
2835
2836   /* Unless we find a marker with a buffer or an explicit buffer
2837      in LIST, assume that this match data came from a string.  */
2838   last_thing_searched = Qt;
2839
2840   /* Allocate registers if they don't already exist.  */
2841   {
2842     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2843
2844     if (length > search_regs.num_regs)
2845       {
2846         ptrdiff_t num_regs = search_regs.num_regs;
2847         if (PTRDIFF_MAX < length)
2848           memory_full (SIZE_MAX);
2849         search_regs.start =
2850           xpalloc (search_regs.start, &num_regs, length - num_regs,
2851                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2852         search_regs.end =
2853           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2854
2855         for (i = search_regs.num_regs; i < num_regs; i++)
2856           search_regs.start[i] = -1;
2857
2858         search_regs.num_regs = num_regs;
2859       }
2860
2861     for (i = 0; CONSP (list); i++)
2862       {
2863         marker = XCAR (list);
2864         if (BUFFERP (marker))
2865           {
2866             last_thing_searched = marker;
2867             break;
2868           }
2869         if (i >= length)
2870           break;
2871         if (NILP (marker))
2872           {
2873             search_regs.start[i] = -1;
2874             list = XCDR (list);
2875           }
2876         else
2877           {
2878             Lisp_Object from;
2879             Lisp_Object m;
2880
2881             m = marker;
2882             if (MARKERP (marker))
2883               {
2884                 if (XMARKER (marker)->buffer == 0)
2885                   XSETFASTINT (marker, 0);
2886                 else
2887                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2888               }
2889
2890             CHECK_NUMBER_COERCE_MARKER (marker);
2891             from = marker;
2892
2893             if (!NILP (reseat) && MARKERP (m))
2894               {
2895                 unchain_marker (XMARKER (m));
2896                 XSETCAR (list, Qnil);
2897               }
2898
2899             if ((list = XCDR (list), !CONSP (list)))
2900               break;
2901
2902             m = marker = XCAR (list);
2903
2904             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2905               XSETFASTINT (marker, 0);
2906
2907             CHECK_NUMBER_COERCE_MARKER (marker);
2908             if ((XINT (from) < 0
2909                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2910                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2911                 && (XINT (marker) < 0
2912                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2913                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2914               {
2915                 search_regs.start[i] = XINT (from);
2916                 search_regs.end[i] = XINT (marker);
2917               }
2918             else
2919               {
2920                 search_regs.start[i] = -1;
2921               }
2922
2923             if (!NILP (reseat) && MARKERP (m))
2924               {
2925                 unchain_marker (XMARKER (m));
2926                 XSETCAR (list, Qnil);
2927               }
2928           }
2929         list = XCDR (list);
2930       }
2931
2932     for (; i < search_regs.num_regs; i++)
2933       search_regs.start[i] = -1;
2934   }
2935
2936   return Qnil;
2937 }
2938
2939 /* If non-zero the match data have been saved in saved_search_regs
2940    during the execution of a sentinel or filter. */
2941 static int search_regs_saved;
2942 static struct re_registers saved_search_regs;
2943 static Lisp_Object saved_last_thing_searched;
2944
2945 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2946    if asynchronous code (filter or sentinel) is running. */
2947 static void
2948 save_search_regs (void)
2949 {
2950   if (!search_regs_saved)
2951     {
2952       saved_search_regs.num_regs = search_regs.num_regs;
2953       saved_search_regs.start = search_regs.start;
2954       saved_search_regs.end = search_regs.end;
2955       saved_last_thing_searched = last_thing_searched;
2956       last_thing_searched = Qnil;
2957       search_regs.num_regs = 0;
2958       search_regs.start = 0;
2959       search_regs.end = 0;
2960
2961       search_regs_saved = 1;
2962     }
2963 }
2964
2965 /* Called upon exit from filters and sentinels. */
2966 void
2967 restore_search_regs (void)
2968 {
2969   if (search_regs_saved)
2970     {
2971       if (search_regs.num_regs > 0)
2972         {
2973           xfree (search_regs.start);
2974           xfree (search_regs.end);
2975         }
2976       search_regs.num_regs = saved_search_regs.num_regs;
2977       search_regs.start = saved_search_regs.start;
2978       search_regs.end = saved_search_regs.end;
2979       last_thing_searched = saved_last_thing_searched;
2980       saved_last_thing_searched = Qnil;
2981       search_regs_saved = 0;
2982     }
2983 }
2984
2985 static Lisp_Object
2986 unwind_set_match_data (Lisp_Object list)
2987 {
2988   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
2989   return Fset_match_data (list, Qt);
2990 }
2991
2992 /* Called to unwind protect the match data.  */
2993 void
2994 record_unwind_save_match_data (void)
2995 {
2996   record_unwind_protect (unwind_set_match_data,
2997                          Fmatch_data (Qnil, Qnil, Qnil));
2998 }
2999
3000 /* Quote a string to deactivate reg-expr chars */
3001
3002 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3003        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3004   (Lisp_Object string)
3005 {
3006   register char *in, *out, *end;
3007   register char *temp;
3008   int backslashes_added = 0;
3009
3010   CHECK_STRING (string);
3011
3012   temp = alloca (SBYTES (string) * 2);
3013
3014   /* Now copy the data into the new string, inserting escapes. */
3015
3016   in = SSDATA (string);
3017   end = in + SBYTES (string);
3018   out = temp;
3019
3020   for (; in != end; in++)
3021     {
3022       if (*in == '['
3023           || *in == '*' || *in == '.' || *in == '\\'
3024           || *in == '?' || *in == '+'
3025           || *in == '^' || *in == '$')
3026         *out++ = '\\', backslashes_added++;
3027       *out++ = *in;
3028     }
3029
3030   return make_specified_string (temp,
3031                                 SCHARS (string) + backslashes_added,
3032                                 out - temp,
3033                                 STRING_MULTIBYTE (string));
3034 }
3035 \f
3036 void
3037 syms_of_search (void)
3038 {
3039   register int i;
3040
3041   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3042     {
3043       searchbufs[i].buf.allocated = 100;
3044       searchbufs[i].buf.buffer = xmalloc (100);
3045       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3046       searchbufs[i].regexp = Qnil;
3047       searchbufs[i].whitespace_regexp = Qnil;
3048       searchbufs[i].syntax_table = Qnil;
3049       staticpro (&searchbufs[i].regexp);
3050       staticpro (&searchbufs[i].whitespace_regexp);
3051       staticpro (&searchbufs[i].syntax_table);
3052       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3053     }
3054   searchbuf_head = &searchbufs[0];
3055
3056   DEFSYM (Qsearch_failed, "search-failed");
3057   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3058
3059   Fput (Qsearch_failed, Qerror_conditions,
3060         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3061   Fput (Qsearch_failed, Qerror_message,
3062         build_pure_c_string ("Search failed"));
3063
3064   Fput (Qinvalid_regexp, Qerror_conditions,
3065         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3066   Fput (Qinvalid_regexp, Qerror_message,
3067         build_pure_c_string ("Invalid regexp"));
3068
3069   last_thing_searched = Qnil;
3070   staticpro (&last_thing_searched);
3071
3072   saved_last_thing_searched = Qnil;
3073   staticpro (&saved_last_thing_searched);
3074
3075   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3076       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3077 Some commands use this for user-specified regexps.
3078 Spaces that occur inside character classes or repetition operators
3079 or other such regexp constructs are not replaced with this.
3080 A value of nil (which is the normal value) means treat spaces literally.  */);
3081   Vsearch_spaces_regexp = Qnil;
3082
3083   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3084       doc: /* Internal use only.
3085 If non-nil, the primitive searching and matching functions
3086 such as `looking-at', `string-match', `re-search-forward', etc.,
3087 do not set the match data.  The proper way to use this variable
3088 is to bind it with `let' around a small expression.  */);
3089   Vinhibit_changing_match_data = Qnil;
3090
3091   defsubr (&Slooking_at);
3092   defsubr (&Sposix_looking_at);
3093   defsubr (&Sstring_match);
3094   defsubr (&Sposix_string_match);
3095   defsubr (&Ssearch_forward);
3096   defsubr (&Ssearch_backward);
3097   defsubr (&Sre_search_forward);
3098   defsubr (&Sre_search_backward);
3099   defsubr (&Sposix_search_forward);
3100   defsubr (&Sposix_search_backward);
3101   defsubr (&Sreplace_match);
3102   defsubr (&Smatch_beginning);
3103   defsubr (&Smatch_end);
3104   defsubr (&Smatch_data);
3105   defsubr (&Sset_match_data);
3106   defsubr (&Sregexp_quote);
3107 }