src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "character.h"
  28 #include "buffer.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* Error condition signaled when regexp compile_pattern fails.  */
  88 static Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches.  */
  91 static Lisp_Object Qsearch_failed;
  92
  93 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  94 static void save_search_regs (void);
  95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  96                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  97                                 ptrdiff_t, ptrdiff_t);
  98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  99                               Lisp_Object, Lisp_Object, ptrdiff_t,
 100                               ptrdiff_t, int);
 101 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
 102                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
 103                                 Lisp_Object, Lisp_Object, int);
 104
 105 static _Noreturn void
 106 matcher_overflow (void)
 107 {
 108   error ("Stack overflow in regexp matcher");
 109 }
 110
 111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 112    PATTERN is the pattern to compile.
 113    CP is the place to put the result.
 114    TRANSLATE is a translation table for ignoring case, or nil for none.
 115    POSIX is nonzero if we want full backtracking (POSIX style)
 116    for this pattern.  0 means backtrack only enough to get a valid match.
 117
 118    The behavior also depends on Vsearch_spaces_regexp.  */
 119
 120 static void
 121 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix)
 122 {
 123   char *val;
 124   reg_syntax_t old;
 125
 126   cp->regexp = Qnil;
 127   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 128   cp->posix = posix;
 129   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 130   cp->buf.charset_unibyte = charset_unibyte;
 131   if (STRINGP (Vsearch_spaces_regexp))
 132     cp->whitespace_regexp = Vsearch_spaces_regexp;
 133   else
 134     cp->whitespace_regexp = Qnil;
 135
 136   /* rms: I think BLOCK_INPUT is not needed here any more,
 137      because regex.c defines malloc to call xmalloc.
 138      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 139      So let's turn it off.  */
 140   /*  BLOCK_INPUT;  */
 141   old = re_set_syntax (RE_SYNTAX_EMACS
 142                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 143
 144   if (STRINGP (Vsearch_spaces_regexp))
 145     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 146   else
 147     re_set_whitespace_regexp (NULL);
 148
 149   val = (char *) re_compile_pattern (SSDATA (pattern),
 150                                      SBYTES (pattern), &cp->buf);
 151
 152   /* If the compiled pattern hard codes some of the contents of the
 153      syntax-table, it can only be reused with *this* syntax table.  */
 154   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 155
 156   re_set_whitespace_regexp (NULL);
 157
 158   re_set_syntax (old);
 159   /* unblock_input ();  */
 160   if (val)
 161     xsignal1 (Qinvalid_regexp, build_string (val));
 162
 163   cp->regexp = Fcopy_sequence (pattern);
 164 }
 165
 166 /* Shrink each compiled regexp buffer in the cache
 167    to the size actually used right now.
 168    This is called from garbage collection.  */
 169
 170 void
 171 shrink_regexp_cache (void)
 172 {
 173   struct regexp_cache *cp;
 174
 175   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 176     {
 177       cp->buf.allocated = cp->buf.used;
 178       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 179     }
 180 }
 181
 182 /* Clear the regexp cache w.r.t. a particular syntax table,
 183    because it was changed.
 184    There is no danger of memory leak here because re_compile_pattern
 185    automagically manages the memory in each re_pattern_buffer struct,
 186    based on its `allocated' and `buffer' values.  */
 187 void
 188 clear_regexp_cache (void)
 189 {
 190   int i;
 191
 192   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 193     /* It's tempting to compare with the syntax-table we've actually changed,
 194        but it's not sufficient because char-table inheritance means that
 195        modifying one syntax-table can change others at the same time.  */
 196     if (!EQ (searchbufs[i].syntax_table, Qt))
 197       searchbufs[i].regexp = Qnil;
 198 }
 199
 200 /* Compile a regexp if necessary, but first check to see if there's one in
 201    the cache.
 202    PATTERN is the pattern to compile.
 203    TRANSLATE is a translation table for ignoring case, or nil for none.
 204    REGP is the structure that says where to store the "register"
 205    values that will result from matching this pattern.
 206    If it is 0, we should compile the pattern not to record any
 207    subexpression bounds.
 208    POSIX is nonzero if we want full backtracking (POSIX style)
 209    for this pattern.  0 means backtrack only enough to get a valid match.  */
 210
 211 struct re_pattern_buffer *
 212 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 213 {
 214   struct regexp_cache *cp, **cpp;
 215
 216   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 217     {
 218       cp = *cpp;
 219       /* Entries are initialized to nil, and may be set to nil by
 220          compile_pattern_1 if the pattern isn't valid.  Don't apply
 221          string accessors in those cases.  However, compile_pattern_1
 222          is only applied to the cache entry we pick here to reuse.  So
 223          nil should never appear before a non-nil entry.  */
 224       if (NILP (cp->regexp))
 225         goto compile_it;
 226       if (SCHARS (cp->regexp) == SCHARS (pattern)
 227           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 228           && !NILP (Fstring_equal (cp->regexp, pattern))
 229           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 230           && cp->posix == posix
 231           && (EQ (cp->syntax_table, Qt)
 232               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 233           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 234           && cp->buf.charset_unibyte == charset_unibyte)
 235         break;
 236
 237       /* If we're at the end of the cache, compile into the nil cell
 238          we found, or the last (least recently used) cell with a
 239          string value.  */
 240       if (cp->next == 0)
 241         {
 242         compile_it:
 243           compile_pattern_1 (cp, pattern, translate, posix);
 244           break;
 245         }
 246     }
 247
 248   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 249      either because we found it in the cache or because we just compiled it.
 250      Move it to the front of the queue to mark it as most recently used.  */
 251   *cpp = cp->next;
 252   cp->next = searchbuf_head;
 253   searchbuf_head = cp;
 254
 255   /* Advise the searching functions about the space we have allocated
 256      for register data.  */
 257   if (regp)
 258     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 259
 260   /* The compiled pattern can be used both for multibyte and unibyte
 261      target.  But, we have to tell which the pattern is used for. */
 262   cp->buf.target_multibyte = multibyte;
 263
 264   return &cp->buf;
 265 }
 266
 267 \f
 268 static Lisp_Object
 269 looking_at_1 (Lisp_Object string, int posix)
 270 {
 271   Lisp_Object val;
 272   unsigned char *p1, *p2;
 273   ptrdiff_t s1, s2;
 274   register ptrdiff_t i;
 275   struct re_pattern_buffer *bufp;
 276
 277   if (running_asynch_code)
 278     save_search_regs ();
 279
 280   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 281   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 282                          BVAR (current_buffer, case_eqv_table));
 283
 284   CHECK_STRING (string);
 285   bufp = compile_pattern (string,
 286                           (NILP (Vinhibit_changing_match_data)
 287                            ? &search_regs : NULL),
 288                           (!NILP (BVAR (current_buffer, case_fold_search))
 289                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 290                           posix,
 291                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 292
 293   immediate_quit = 1;
 294   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 295
 296   /* Get pointers and sizes of the two strings
 297      that make up the visible portion of the buffer. */
 298
 299   p1 = BEGV_ADDR;
 300   s1 = GPT_BYTE - BEGV_BYTE;
 301   p2 = GAP_END_ADDR;
 302   s2 = ZV_BYTE - GPT_BYTE;
 303   if (s1 < 0)
 304     {
 305       p2 = p1;
 306       s2 = ZV_BYTE - BEGV_BYTE;
 307       s1 = 0;
 308     }
 309   if (s2 < 0)
 310     {
 311       s1 = ZV_BYTE - BEGV_BYTE;
 312       s2 = 0;
 313     }
 314
 315   re_match_object = Qnil;
 316
 317   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 318                   PT_BYTE - BEGV_BYTE,
 319                   (NILP (Vinhibit_changing_match_data)
 320                    ? &search_regs : NULL),
 321                   ZV_BYTE - BEGV_BYTE);
 322   immediate_quit = 0;
 323
 324   if (i == -2)
 325     matcher_overflow ();
 326
 327   val = (0 <= i ? Qt : Qnil);
 328   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 329     for (i = 0; i < search_regs.num_regs; i++)
 330       if (search_regs.start[i] >= 0)
 331         {
 332           search_regs.start[i]
 333             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 334           search_regs.end[i]
 335             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 336         }
 337
 338   /* Set last_thing_searched only when match data is changed.  */
 339   if (NILP (Vinhibit_changing_match_data))
 340     XSETBUFFER (last_thing_searched, current_buffer);
 341
 342   return val;
 343 }
 344
 345 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 346        doc: /* Return t if text after point matches regular expression REGEXP.
 347 This function modifies the match data that `match-beginning',
 348 `match-end' and `match-data' access; save and restore the match
 349 data if you want to preserve them.  */)
 350   (Lisp_Object regexp)
 351 {
 352   return looking_at_1 (regexp, 0);
 353 }
 354
 355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 356        doc: /* Return t if text after point matches regular expression REGEXP.
 357 Find the longest match, in accord with Posix regular expression rules.
 358 This function modifies the match data that `match-beginning',
 359 `match-end' and `match-data' access; save and restore the match
 360 data if you want to preserve them.  */)
 361   (Lisp_Object regexp)
 362 {
 363   return looking_at_1 (regexp, 1);
 364 }
 365 \f
 366 static Lisp_Object
 367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 368 {
 369   ptrdiff_t val;
 370   struct re_pattern_buffer *bufp;
 371   EMACS_INT pos;
 372   ptrdiff_t pos_byte, i;
 373
 374   if (running_asynch_code)
 375     save_search_regs ();
 376
 377   CHECK_STRING (regexp);
 378   CHECK_STRING (string);
 379
 380   if (NILP (start))
 381     pos = 0, pos_byte = 0;
 382   else
 383     {
 384       ptrdiff_t len = SCHARS (string);
 385
 386       CHECK_NUMBER (start);
 387       pos = XINT (start);
 388       if (pos < 0 && -pos <= len)
 389         pos = len + pos;
 390       else if (0 > pos || pos > len)
 391         args_out_of_range (string, start);
 392       pos_byte = string_char_to_byte (string, pos);
 393     }
 394
 395   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 396   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 397                          BVAR (current_buffer, case_eqv_table));
 398
 399   bufp = compile_pattern (regexp,
 400                           (NILP (Vinhibit_changing_match_data)
 401                            ? &search_regs : NULL),
 402                           (!NILP (BVAR (current_buffer, case_fold_search))
 403                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 404                           posix,
 405                           STRING_MULTIBYTE (string));
 406   immediate_quit = 1;
 407   re_match_object = string;
 408
 409   val = re_search (bufp, SSDATA (string),
 410                    SBYTES (string), pos_byte,
 411                    SBYTES (string) - pos_byte,
 412                    (NILP (Vinhibit_changing_match_data)
 413                     ? &search_regs : NULL));
 414   immediate_quit = 0;
 415
 416   /* Set last_thing_searched only when match data is changed.  */
 417   if (NILP (Vinhibit_changing_match_data))
 418     last_thing_searched = Qt;
 419
 420   if (val == -2)
 421     matcher_overflow ();
 422   if (val < 0) return Qnil;
 423
 424   if (NILP (Vinhibit_changing_match_data))
 425     for (i = 0; i < search_regs.num_regs; i++)
 426       if (search_regs.start[i] >= 0)
 427         {
 428           search_regs.start[i]
 429             = string_byte_to_char (string, search_regs.start[i]);
 430           search_regs.end[i]
 431             = string_byte_to_char (string, search_regs.end[i]);
 432         }
 433
 434   return make_number (string_byte_to_char (string, val));
 435 }
 436
 437 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 438        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 439 Matching ignores case if `case-fold-search' is non-nil.
 440 If third arg START is non-nil, start search at that index in STRING.
 441 For index of first char beyond the match, do (match-end 0).
 442 `match-end' and `match-beginning' also give indices of substrings
 443 matched by parenthesis constructs in the pattern.
 444
 445 You can use the function `match-string' to extract the substrings
 446 matched by the parenthesis constructions in REGEXP. */)
 447   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 448 {
 449   return string_match_1 (regexp, string, start, 0);
 450 }
 451
 452 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 453        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 454 Find the longest match, in accord with Posix regular expression rules.
 455 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 456 If third arg START is non-nil, start search at that index in STRING.
 457 For index of first char beyond the match, do (match-end 0).
 458 `match-end' and `match-beginning' also give indices of substrings
 459 matched by parenthesis constructs in the pattern.  */)
 460   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 461 {
 462   return string_match_1 (regexp, string, start, 1);
 463 }
 464
 465 /* Match REGEXP against STRING, searching all of STRING,
 466    and return the index of the match, or negative on failure.
 467    This does not clobber the match data.  */
 468
 469 ptrdiff_t
 470 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 471 {
 472   ptrdiff_t val;
 473   struct re_pattern_buffer *bufp;
 474
 475   bufp = compile_pattern (regexp, 0, Qnil,
 476                           0, STRING_MULTIBYTE (string));
 477   immediate_quit = 1;
 478   re_match_object = string;
 479
 480   val = re_search (bufp, SSDATA (string),
 481                    SBYTES (string), 0,
 482                    SBYTES (string), 0);
 483   immediate_quit = 0;
 484   return val;
 485 }
 486
 487 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 488    and return the index of the match, or negative on failure.
 489    This does not clobber the match data.
 490    We assume that STRING contains single-byte characters.  */
 491
 492 ptrdiff_t
 493 fast_c_string_match_ignore_case (Lisp_Object regexp,
 494                                  const char *string, ptrdiff_t len)
 495 {
 496   ptrdiff_t val;
 497   struct re_pattern_buffer *bufp;
 498
 499   regexp = string_make_unibyte (regexp);
 500   re_match_object = Qt;
 501   bufp = compile_pattern (regexp, 0,
 502                           Vascii_canon_table, 0,
 503                           0);
 504   immediate_quit = 1;
 505   val = re_search (bufp, string, len, 0, len, 0);
 506   immediate_quit = 0;
 507   return val;
 508 }
 509
 510 /* Like fast_string_match but ignore case.  */
 511
 512 ptrdiff_t
 513 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 514 {
 515   ptrdiff_t val;
 516   struct re_pattern_buffer *bufp;
 517
 518   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 519                           0, STRING_MULTIBYTE (string));
 520   immediate_quit = 1;
 521   re_match_object = string;
 522
 523   val = re_search (bufp, SSDATA (string),
 524                    SBYTES (string), 0,
 525                    SBYTES (string), 0);
 526   immediate_quit = 0;
 527   return val;
 528 }
 529 \f
 530 /* Match REGEXP against the characters after POS to LIMIT, and return
 531    the number of matched characters.  If STRING is non-nil, match
 532    against the characters in it.  In that case, POS and LIMIT are
 533    indices into the string.  This function doesn't modify the match
 534    data.  */
 535
 536 ptrdiff_t
 537 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 538 {
 539   int multibyte;
 540   struct re_pattern_buffer *buf;
 541   unsigned char *p1, *p2;
 542   ptrdiff_t s1, s2;
 543   ptrdiff_t len;
 544
 545   if (STRINGP (string))
 546     {
 547       if (pos_byte < 0)
 548         pos_byte = string_char_to_byte (string, pos);
 549       if (limit_byte < 0)
 550         limit_byte = string_char_to_byte (string, limit);
 551       p1 = NULL;
 552       s1 = 0;
 553       p2 = SDATA (string);
 554       s2 = SBYTES (string);
 555       re_match_object = string;
 556       multibyte = STRING_MULTIBYTE (string);
 557     }
 558   else
 559     {
 560       if (pos_byte < 0)
 561         pos_byte = CHAR_TO_BYTE (pos);
 562       if (limit_byte < 0)
 563         limit_byte = CHAR_TO_BYTE (limit);
 564       pos_byte -= BEGV_BYTE;
 565       limit_byte -= BEGV_BYTE;
 566       p1 = BEGV_ADDR;
 567       s1 = GPT_BYTE - BEGV_BYTE;
 568       p2 = GAP_END_ADDR;
 569       s2 = ZV_BYTE - GPT_BYTE;
 570       if (s1 < 0)
 571         {
 572           p2 = p1;
 573           s2 = ZV_BYTE - BEGV_BYTE;
 574           s1 = 0;
 575         }
 576       if (s2 < 0)
 577         {
 578           s1 = ZV_BYTE - BEGV_BYTE;
 579           s2 = 0;
 580         }
 581       re_match_object = Qnil;
 582       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 583     }
 584
 585   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 586   immediate_quit = 1;
 587   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 588                     pos_byte, NULL, limit_byte);
 589   immediate_quit = 0;
 590
 591   return len;
 592 }
 593
 594 \f
 595 /* The newline cache: remembering which sections of text have no newlines.  */
 596
 597 /* If the user has requested newline caching, make sure it's on.
 598    Otherwise, make sure it's off.
 599    This is our cheezy way of associating an action with the change of
 600    state of a buffer-local variable.  */
 601 static void
 602 newline_cache_on_off (struct buffer *buf)
 603 {
 604   if (NILP (BVAR (buf, cache_long_line_scans)))
 605     {
 606       /* It should be off.  */
 607       if (buf->newline_cache)
 608         {
 609           free_region_cache (buf->newline_cache);
 610           buf->newline_cache = 0;
 611         }
 612     }
 613   else
 614     {
 615       /* It should be on.  */
 616       if (buf->newline_cache == 0)
 617         buf->newline_cache = new_region_cache ();
 618     }
 619 }
 620
 621 \f
 622 /* Search for COUNT instances of the character TARGET between START and END.
 623
 624    If COUNT is positive, search forwards; END must be >= START.
 625    If COUNT is negative, search backwards for the -COUNTth instance;
 626       END must be <= START.
 627    If COUNT is zero, do anything you please; run rogue, for all I care.
 628
 629    If END is zero, use BEGV or ZV instead, as appropriate for the
 630    direction indicated by COUNT.
 631
 632    If we find COUNT instances, set *SHORTAGE to zero, and return the
 633    position past the COUNTth match.  Note that for reverse motion
 634    this is not the same as the usual convention for Emacs motion commands.
 635
 636    If we don't find COUNT instances before reaching END, set *SHORTAGE
 637    to the number of TARGETs left unfound, and return END.
 638
 639    If ALLOW_QUIT, set immediate_quit.  That's good to do
 640    except when inside redisplay.  */
 641
 642 ptrdiff_t
 643 scan_buffer (int target, ptrdiff_t start, ptrdiff_t end,
 644              ptrdiff_t count, ptrdiff_t *shortage, bool allow_quit)
 645 {
 646   struct region_cache *newline_cache;
 647   int direction;
 648
 649   if (count > 0)
 650     {
 651       direction = 1;
 652       if (! end) end = ZV;
 653     }
 654   else
 655     {
 656       direction = -1;
 657       if (! end) end = BEGV;
 658     }
 659
 660   newline_cache_on_off (current_buffer);
 661   newline_cache = current_buffer->newline_cache;
 662
 663   if (shortage != 0)
 664     *shortage = 0;
 665
 666   immediate_quit = allow_quit;
 667
 668   if (count > 0)
 669     while (start != end)
 670       {
 671         /* Our innermost scanning loop is very simple; it doesn't know
 672            about gaps, buffer ends, or the newline cache.  ceiling is
 673            the position of the last character before the next such
 674            obstacle --- the last character the dumb search loop should
 675            examine.  */
 676         ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end) - 1;
 677         ptrdiff_t start_byte;
 678         ptrdiff_t tem;
 679
 680         /* If we're looking for a newline, consult the newline cache
 681            to see where we can avoid some scanning.  */
 682         if (target == '\n' && newline_cache)
 683           {
 684             ptrdiff_t next_change;
 685             immediate_quit = 0;
 686             while (region_cache_forward
 687                    (current_buffer, newline_cache, start, &next_change))
 688               start = next_change;
 689             immediate_quit = allow_quit;
 690
 691             start_byte = CHAR_TO_BYTE (start);
 692
 693             /* START should never be after END.  */
 694             if (start_byte > ceiling_byte)
 695               start_byte = ceiling_byte;
 696
 697             /* Now the text after start is an unknown region, and
 698                next_change is the position of the next known region. */
 699             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 700           }
 701         else
 702           start_byte = CHAR_TO_BYTE (start);
 703
 704         /* The dumb loop can only scan text stored in contiguous
 705            bytes. BUFFER_CEILING_OF returns the last character
 706            position that is contiguous, so the ceiling is the
 707            position after that.  */
 708         tem = BUFFER_CEILING_OF (start_byte);
 709         ceiling_byte = min (tem, ceiling_byte);
 710
 711         {
 712           /* The termination address of the dumb loop.  */
 713           register unsigned char *ceiling_addr
 714             = BYTE_POS_ADDR (ceiling_byte) + 1;
 715           register unsigned char *cursor
 716             = BYTE_POS_ADDR (start_byte);
 717           unsigned char *base = cursor;
 718
 719           while (cursor < ceiling_addr)
 720             {
 721               unsigned char *scan_start = cursor;
 722
 723               /* The dumb loop.  */
 724               while (*cursor != target && ++cursor < ceiling_addr)
 725                 ;
 726
 727               /* If we're looking for newlines, cache the fact that
 728                  the region from start to cursor is free of them. */
 729               if (target == '\n' && newline_cache)
 730                 know_region_cache (current_buffer, newline_cache,
 731                                    BYTE_TO_CHAR (start_byte + scan_start - base),
 732                                    BYTE_TO_CHAR (start_byte + cursor - base));
 733
 734               /* Did we find the target character?  */
 735               if (cursor < ceiling_addr)
 736                 {
 737                   if (--count == 0)
 738                     {
 739                       immediate_quit = 0;
 740                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 741                     }
 742                   cursor++;
 743                 }
 744             }
 745
 746           start = BYTE_TO_CHAR (start_byte + cursor - base);
 747         }
 748       }
 749   else
 750     while (start > end)
 751       {
 752         /* The last character to check before the next obstacle.  */
 753         ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end);
 754         ptrdiff_t start_byte;
 755         ptrdiff_t tem;
 756
 757         /* Consult the newline cache, if appropriate.  */
 758         if (target == '\n' && newline_cache)
 759           {
 760             ptrdiff_t next_change;
 761             immediate_quit = 0;
 762             while (region_cache_backward
 763                    (current_buffer, newline_cache, start, &next_change))
 764               start = next_change;
 765             immediate_quit = allow_quit;
 766
 767             start_byte = CHAR_TO_BYTE (start);
 768
 769             /* Start should never be at or before end.  */
 770             if (start_byte <= ceiling_byte)
 771               start_byte = ceiling_byte + 1;
 772
 773             /* Now the text before start is an unknown region, and
 774                next_change is the position of the next known region. */
 775             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 776           }
 777         else
 778           start_byte = CHAR_TO_BYTE (start);
 779
 780         /* Stop scanning before the gap.  */
 781         tem = BUFFER_FLOOR_OF (start_byte - 1);
 782         ceiling_byte = max (tem, ceiling_byte);
 783
 784         {
 785           /* The termination address of the dumb loop.  */
 786           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 787           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 788           unsigned char *base = cursor;
 789
 790           while (cursor >= ceiling_addr)
 791             {
 792               unsigned char *scan_start = cursor;
 793
 794               while (*cursor != target && --cursor >= ceiling_addr)
 795                 ;
 796
 797               /* If we're looking for newlines, cache the fact that
 798                  the region from after the cursor to start is free of them.  */
 799               if (target == '\n' && newline_cache)
 800                 know_region_cache (current_buffer, newline_cache,
 801                                    BYTE_TO_CHAR (start_byte + cursor - base),
 802                                    BYTE_TO_CHAR (start_byte + scan_start - base));
 803
 804               /* Did we find the target character?  */
 805               if (cursor >= ceiling_addr)
 806                 {
 807                   if (++count >= 0)
 808                     {
 809                       immediate_quit = 0;
 810                       return BYTE_TO_CHAR (start_byte + cursor - base);
 811                     }
 812                   cursor--;
 813                 }
 814             }
 815
 816           start = BYTE_TO_CHAR (start_byte + cursor - base);
 817         }
 818       }
 819
 820   immediate_quit = 0;
 821   if (shortage != 0)
 822     *shortage = count * direction;
 823   return start;
 824 }
 825 \f
 826 /* Search for COUNT instances of a line boundary, which means either a
 827    newline or (if selective display enabled) a carriage return.
 828    Start at START.  If COUNT is negative, search backwards.
 829
 830    We report the resulting position by calling TEMP_SET_PT_BOTH.
 831
 832    If we find COUNT instances. we position after (always after,
 833    even if scanning backwards) the COUNTth match, and return 0.
 834
 835    If we don't find COUNT instances before reaching the end of the
 836    buffer (or the beginning, if scanning backwards), we return
 837    the number of line boundaries left unfound, and position at
 838    the limit we bumped up against.
 839
 840    If ALLOW_QUIT, set immediate_quit.  That's good to do
 841    except in special cases.  */
 842
 843 EMACS_INT
 844 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 845               ptrdiff_t limit, ptrdiff_t limit_byte,
 846               EMACS_INT count, bool allow_quit)
 847 {
 848   int direction = ((count > 0) ? 1 : -1);
 849
 850   unsigned char *cursor;
 851   unsigned char *base;
 852
 853   ptrdiff_t ceiling;
 854   unsigned char *ceiling_addr;
 855
 856   bool old_immediate_quit = immediate_quit;
 857
 858   /* The code that follows is like scan_buffer
 859      but checks for either newline or carriage return.  */
 860
 861   if (allow_quit)
 862     immediate_quit++;
 863
 864   start_byte = CHAR_TO_BYTE (start);
 865
 866   if (count > 0)
 867     {
 868       while (start_byte < limit_byte)
 869         {
 870           ceiling =  BUFFER_CEILING_OF (start_byte);
 871           ceiling = min (limit_byte - 1, ceiling);
 872           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 873           base = (cursor = BYTE_POS_ADDR (start_byte));
 874           while (1)
 875             {
 876               while (*cursor != '\n' && ++cursor != ceiling_addr)
 877                 ;
 878
 879               if (cursor != ceiling_addr)
 880                 {
 881                   if (--count == 0)
 882                     {
 883                       immediate_quit = old_immediate_quit;
 884                       start_byte = start_byte + cursor - base + 1;
 885                       start = BYTE_TO_CHAR (start_byte);
 886                       TEMP_SET_PT_BOTH (start, start_byte);
 887                       return 0;
 888                     }
 889                   else
 890                     if (++cursor == ceiling_addr)
 891                       break;
 892                 }
 893               else
 894                 break;
 895             }
 896           start_byte += cursor - base;
 897         }
 898     }
 899   else
 900     {
 901       while (start_byte > limit_byte)
 902         {
 903           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 904           ceiling = max (limit_byte, ceiling);
 905           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 906           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 907           while (1)
 908             {
 909               while (--cursor != ceiling_addr && *cursor != '\n')
 910                 ;
 911
 912               if (cursor != ceiling_addr)
 913                 {
 914                   if (++count == 0)
 915                     {
 916                       immediate_quit = old_immediate_quit;
 917                       /* Return the position AFTER the match we found.  */
 918                       start_byte = start_byte + cursor - base + 1;
 919                       start = BYTE_TO_CHAR (start_byte);
 920                       TEMP_SET_PT_BOTH (start, start_byte);
 921                       return 0;
 922                     }
 923                 }
 924               else
 925                 break;
 926             }
 927           /* Here we add 1 to compensate for the last decrement
 928              of CURSOR, which took it past the valid range.  */
 929           start_byte += cursor - base + 1;
 930         }
 931     }
 932
 933   TEMP_SET_PT_BOTH (limit, limit_byte);
 934   immediate_quit = old_immediate_quit;
 935
 936   return count * direction;
 937 }
 938
 939 ptrdiff_t
 940 find_next_newline_no_quit (ptrdiff_t from, ptrdiff_t cnt)
 941 {
 942   return scan_buffer ('\n', from, 0, cnt, (ptrdiff_t *) 0, 0);
 943 }
 944
 945 /* Like find_next_newline, but returns position before the newline,
 946    not after, and only search up to TO.  This isn't just
 947    find_next_newline (...)-1, because you might hit TO.  */
 948
 949 ptrdiff_t
 950 find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt)
 951 {
 952   ptrdiff_t shortage;
 953   ptrdiff_t pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 954
 955   if (shortage == 0)
 956     pos--;
 957
 958   return pos;
 959 }
 960 \f
 961 /* Subroutines of Lisp buffer search functions. */
 962
 963 static Lisp_Object
 964 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
 965                 Lisp_Object count, int direction, int RE, int posix)
 966 {
 967   register EMACS_INT np;
 968   EMACS_INT lim;
 969   ptrdiff_t lim_byte;
 970   EMACS_INT n = direction;
 971
 972   if (!NILP (count))
 973     {
 974       CHECK_NUMBER (count);
 975       n *= XINT (count);
 976     }
 977
 978   CHECK_STRING (string);
 979   if (NILP (bound))
 980     {
 981       if (n > 0)
 982         lim = ZV, lim_byte = ZV_BYTE;
 983       else
 984         lim = BEGV, lim_byte = BEGV_BYTE;
 985     }
 986   else
 987     {
 988       CHECK_NUMBER_COERCE_MARKER (bound);
 989       lim = XINT (bound);
 990       if (n > 0 ? lim < PT : lim > PT)
 991         error ("Invalid search bound (wrong side of point)");
 992       if (lim > ZV)
 993         lim = ZV, lim_byte = ZV_BYTE;
 994       else if (lim < BEGV)
 995         lim = BEGV, lim_byte = BEGV_BYTE;
 996       else
 997         lim_byte = CHAR_TO_BYTE (lim);
 998     }
 999
1000   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1001   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1002                          BVAR (current_buffer, case_eqv_table));
1003
1004   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1005                       (!NILP (BVAR (current_buffer, case_fold_search))
1006                        ? BVAR (current_buffer, case_canon_table)
1007                        : Qnil),
1008                       (!NILP (BVAR (current_buffer, case_fold_search))
1009                        ? BVAR (current_buffer, case_eqv_table)
1010                        : Qnil),
1011                       posix);
1012   if (np <= 0)
1013     {
1014       if (NILP (noerror))
1015         xsignal1 (Qsearch_failed, string);
1016
1017       if (!EQ (noerror, Qt))
1018         {
1019           if (lim < BEGV || lim > ZV)
1020             emacs_abort ();
1021           SET_PT_BOTH (lim, lim_byte);
1022           return Qnil;
1023 #if 0 /* This would be clean, but maybe programs depend on
1024          a value of nil here.  */
1025           np = lim;
1026 #endif
1027         }
1028       else
1029         return Qnil;
1030     }
1031
1032   if (np < BEGV || np > ZV)
1033     emacs_abort ();
1034
1035   SET_PT (np);
1036
1037   return make_number (np);
1038 }
1039 \f
1040 /* Return 1 if REGEXP it matches just one constant string.  */
1041
1042 static int
1043 trivial_regexp_p (Lisp_Object regexp)
1044 {
1045   ptrdiff_t len = SBYTES (regexp);
1046   unsigned char *s = SDATA (regexp);
1047   while (--len >= 0)
1048     {
1049       switch (*s++)
1050         {
1051         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1052           return 0;
1053         case '\\':
1054           if (--len < 0)
1055             return 0;
1056           switch (*s++)
1057             {
1058             case '|': case '(': case ')': case '`': case '\'': case 'b':
1059             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1060             case 'S': case '=': case '{': case '}': case '_':
1061             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1062             case '1': case '2': case '3': case '4': case '5':
1063             case '6': case '7': case '8': case '9':
1064               return 0;
1065             }
1066         }
1067     }
1068   return 1;
1069 }
1070
1071 /* Search for the n'th occurrence of STRING in the current buffer,
1072    starting at position POS and stopping at position LIM,
1073    treating STRING as a literal string if RE is false or as
1074    a regular expression if RE is true.
1075
1076    If N is positive, searching is forward and LIM must be greater than POS.
1077    If N is negative, searching is backward and LIM must be less than POS.
1078
1079    Returns -x if x occurrences remain to be found (x > 0),
1080    or else the position at the beginning of the Nth occurrence
1081    (if searching backward) or the end (if searching forward).
1082
1083    POSIX is nonzero if we want full backtracking (POSIX style)
1084    for this pattern.  0 means backtrack only enough to get a valid match.  */
1085
1086 #define TRANSLATE(out, trt, d)                  \
1087 do                                              \
1088   {                                             \
1089     if (! NILP (trt))                           \
1090       {                                         \
1091         Lisp_Object temp;                       \
1092         temp = Faref (trt, make_number (d));    \
1093         if (INTEGERP (temp))                    \
1094           out = XINT (temp);                    \
1095         else                                    \
1096           out = d;                              \
1097       }                                         \
1098     else                                        \
1099       out = d;                                  \
1100   }                                             \
1101 while (0)
1102
1103 /* Only used in search_buffer, to record the end position of the match
1104    when searching regexps and SEARCH_REGS should not be changed
1105    (i.e. Vinhibit_changing_match_data is non-nil).  */
1106 static struct re_registers search_regs_1;
1107
1108 static EMACS_INT
1109 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1110                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1111                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1112 {
1113   ptrdiff_t len = SCHARS (string);
1114   ptrdiff_t len_byte = SBYTES (string);
1115   register ptrdiff_t i;
1116
1117   if (running_asynch_code)
1118     save_search_regs ();
1119
1120   /* Searching 0 times means don't move.  */
1121   /* Null string is found at starting position.  */
1122   if (len == 0 || n == 0)
1123     {
1124       set_search_regs (pos_byte, 0);
1125       return pos;
1126     }
1127
1128   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1129     {
1130       unsigned char *p1, *p2;
1131       ptrdiff_t s1, s2;
1132       struct re_pattern_buffer *bufp;
1133
1134       bufp = compile_pattern (string,
1135                               (NILP (Vinhibit_changing_match_data)
1136                                ? &search_regs : &search_regs_1),
1137                               trt, posix,
1138                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1139
1140       immediate_quit = 1;       /* Quit immediately if user types ^G,
1141                                    because letting this function finish
1142                                    can take too long. */
1143       QUIT;                     /* Do a pending quit right away,
1144                                    to avoid paradoxical behavior */
1145       /* Get pointers and sizes of the two strings
1146          that make up the visible portion of the buffer. */
1147
1148       p1 = BEGV_ADDR;
1149       s1 = GPT_BYTE - BEGV_BYTE;
1150       p2 = GAP_END_ADDR;
1151       s2 = ZV_BYTE - GPT_BYTE;
1152       if (s1 < 0)
1153         {
1154           p2 = p1;
1155           s2 = ZV_BYTE - BEGV_BYTE;
1156           s1 = 0;
1157         }
1158       if (s2 < 0)
1159         {
1160           s1 = ZV_BYTE - BEGV_BYTE;
1161           s2 = 0;
1162         }
1163       re_match_object = Qnil;
1164
1165       while (n < 0)
1166         {
1167           ptrdiff_t val;
1168
1169           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1170                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1171                              (NILP (Vinhibit_changing_match_data)
1172                               ? &search_regs : &search_regs_1),
1173                              /* Don't allow match past current point */
1174                              pos_byte - BEGV_BYTE);
1175           if (val == -2)
1176             {
1177               matcher_overflow ();
1178             }
1179           if (val >= 0)
1180             {
1181               if (NILP (Vinhibit_changing_match_data))
1182                 {
1183                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1184                   for (i = 0; i < search_regs.num_regs; i++)
1185                     if (search_regs.start[i] >= 0)
1186                       {
1187                         search_regs.start[i]
1188                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1189                         search_regs.end[i]
1190                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1191                       }
1192                   XSETBUFFER (last_thing_searched, current_buffer);
1193                   /* Set pos to the new position. */
1194                   pos = search_regs.start[0];
1195                 }
1196               else
1197                 {
1198                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1199                   /* Set pos to the new position.  */
1200                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1201                 }
1202             }
1203           else
1204             {
1205               immediate_quit = 0;
1206               return (n);
1207             }
1208           n++;
1209         }
1210       while (n > 0)
1211         {
1212           ptrdiff_t val;
1213
1214           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1215                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1216                              (NILP (Vinhibit_changing_match_data)
1217                               ? &search_regs : &search_regs_1),
1218                              lim_byte - BEGV_BYTE);
1219           if (val == -2)
1220             {
1221               matcher_overflow ();
1222             }
1223           if (val >= 0)
1224             {
1225               if (NILP (Vinhibit_changing_match_data))
1226                 {
1227                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1228                   for (i = 0; i < search_regs.num_regs; i++)
1229                     if (search_regs.start[i] >= 0)
1230                       {
1231                         search_regs.start[i]
1232                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1233                         search_regs.end[i]
1234                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1235                       }
1236                   XSETBUFFER (last_thing_searched, current_buffer);
1237                   pos = search_regs.end[0];
1238                 }
1239               else
1240                 {
1241                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1242                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1243                 }
1244             }
1245           else
1246             {
1247               immediate_quit = 0;
1248               return (0 - n);
1249             }
1250           n--;
1251         }
1252       immediate_quit = 0;
1253       return (pos);
1254     }
1255   else                          /* non-RE case */
1256     {
1257       unsigned char *raw_pattern, *pat;
1258       ptrdiff_t raw_pattern_size;
1259       ptrdiff_t raw_pattern_size_byte;
1260       unsigned char *patbuf;
1261       int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1262       unsigned char *base_pat;
1263       /* Set to positive if we find a non-ASCII char that need
1264          translation.  Otherwise set to zero later.  */
1265       int char_base = -1;
1266       int boyer_moore_ok = 1;
1267
1268       /* MULTIBYTE says whether the text to be searched is multibyte.
1269          We must convert PATTERN to match that, or we will not really
1270          find things right.  */
1271
1272       if (multibyte == STRING_MULTIBYTE (string))
1273         {
1274           raw_pattern = SDATA (string);
1275           raw_pattern_size = SCHARS (string);
1276           raw_pattern_size_byte = SBYTES (string);
1277         }
1278       else if (multibyte)
1279         {
1280           raw_pattern_size = SCHARS (string);
1281           raw_pattern_size_byte
1282             = count_size_as_multibyte (SDATA (string),
1283                                        raw_pattern_size);
1284           raw_pattern = alloca (raw_pattern_size_byte + 1);
1285           copy_text (SDATA (string), raw_pattern,
1286                      SCHARS (string), 0, 1);
1287         }
1288       else
1289         {
1290           /* Converting multibyte to single-byte.
1291
1292              ??? Perhaps this conversion should be done in a special way
1293              by subtracting nonascii-insert-offset from each non-ASCII char,
1294              so that only the multibyte chars which really correspond to
1295              the chosen single-byte character set can possibly match.  */
1296           raw_pattern_size = SCHARS (string);
1297           raw_pattern_size_byte = SCHARS (string);
1298           raw_pattern = alloca (raw_pattern_size + 1);
1299           copy_text (SDATA (string), raw_pattern,
1300                      SBYTES (string), 1, 0);
1301         }
1302
1303       /* Copy and optionally translate the pattern.  */
1304       len = raw_pattern_size;
1305       len_byte = raw_pattern_size_byte;
1306       patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
1307       pat = patbuf;
1308       base_pat = raw_pattern;
1309       if (multibyte)
1310         {
1311           /* Fill patbuf by translated characters in STRING while
1312              checking if we can use boyer-moore search.  If TRT is
1313              non-nil, we can use boyer-moore search only if TRT can be
1314              represented by the byte array of 256 elements.  For that,
1315              all non-ASCII case-equivalents of all case-sensitive
1316              characters in STRING must belong to the same character
1317              group (two characters belong to the same group iff their
1318              multibyte forms are the same except for the last byte;
1319              i.e. every 64 characters form a group; U+0000..U+003F,
1320              U+0040..U+007F, U+0080..U+00BF, ...).  */
1321
1322           while (--len >= 0)
1323             {
1324               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1325               int c, translated, inverse;
1326               int in_charlen, charlen;
1327
1328               /* If we got here and the RE flag is set, it's because we're
1329                  dealing with a regexp known to be trivial, so the backslash
1330                  just quotes the next character.  */
1331               if (RE && *base_pat == '\\')
1332                 {
1333                   len--;
1334                   raw_pattern_size--;
1335                   len_byte--;
1336                   base_pat++;
1337                 }
1338
1339               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1340
1341               if (NILP (trt))
1342                 {
1343                   str = base_pat;
1344                   charlen = in_charlen;
1345                 }
1346               else
1347                 {
1348                   /* Translate the character.  */
1349                   TRANSLATE (translated, trt, c);
1350                   charlen = CHAR_STRING (translated, str_base);
1351                   str = str_base;
1352
1353                   /* Check if C has any other case-equivalents.  */
1354                   TRANSLATE (inverse, inverse_trt, c);
1355                   /* If so, check if we can use boyer-moore.  */
1356                   if (c != inverse && boyer_moore_ok)
1357                     {
1358                       /* Check if all equivalents belong to the same
1359                          group of characters.  Note that the check of C
1360                          itself is done by the last iteration.  */
1361                       int this_char_base = -1;
1362
1363                       while (boyer_moore_ok)
1364                         {
1365                           if (ASCII_BYTE_P (inverse))
1366                             {
1367                               if (this_char_base > 0)
1368                                 boyer_moore_ok = 0;
1369                               else
1370                                 this_char_base = 0;
1371                             }
1372                           else if (CHAR_BYTE8_P (inverse))
1373                             /* Boyer-moore search can't handle a
1374                                translation of an eight-bit
1375                                character.  */
1376                             boyer_moore_ok = 0;
1377                           else if (this_char_base < 0)
1378                             {
1379                               this_char_base = inverse & ~0x3F;
1380                               if (char_base < 0)
1381                                 char_base = this_char_base;
1382                               else if (this_char_base != char_base)
1383                                 boyer_moore_ok = 0;
1384                             }
1385                           else if ((inverse & ~0x3F) != this_char_base)
1386                             boyer_moore_ok = 0;
1387                           if (c == inverse)
1388                             break;
1389                           TRANSLATE (inverse, inverse_trt, inverse);
1390                         }
1391                     }
1392                 }
1393
1394               /* Store this character into the translated pattern.  */
1395               memcpy (pat, str, charlen);
1396               pat += charlen;
1397               base_pat += in_charlen;
1398               len_byte -= in_charlen;
1399             }
1400
1401           /* If char_base is still negative we didn't find any translated
1402              non-ASCII characters.  */
1403           if (char_base < 0)
1404             char_base = 0;
1405         }
1406       else
1407         {
1408           /* Unibyte buffer.  */
1409           char_base = 0;
1410           while (--len >= 0)
1411             {
1412               int c, translated, inverse;
1413
1414               /* If we got here and the RE flag is set, it's because we're
1415                  dealing with a regexp known to be trivial, so the backslash
1416                  just quotes the next character.  */
1417               if (RE && *base_pat == '\\')
1418                 {
1419                   len--;
1420                   raw_pattern_size--;
1421                   base_pat++;
1422                 }
1423               c = *base_pat++;
1424               TRANSLATE (translated, trt, c);
1425               *pat++ = translated;
1426               /* Check that none of C's equivalents violates the
1427                  assumptions of boyer_moore.  */
1428               TRANSLATE (inverse, inverse_trt, c);
1429               while (1)
1430                 {
1431                   if (inverse >= 0200)
1432                     {
1433                       boyer_moore_ok = 0;
1434                       break;
1435                     }
1436                   if (c == inverse)
1437                     break;
1438                   TRANSLATE (inverse, inverse_trt, inverse);
1439                 }
1440             }
1441         }
1442
1443       len_byte = pat - patbuf;
1444       pat = base_pat = patbuf;
1445
1446       if (boyer_moore_ok)
1447         return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1448                             pos_byte, lim_byte,
1449                             char_base);
1450       else
1451         return simple_search (n, pat, raw_pattern_size, len_byte, trt,
1452                               pos, pos_byte, lim, lim_byte);
1453     }
1454 }
1455 \f
1456 /* Do a simple string search N times for the string PAT,
1457    whose length is LEN/LEN_BYTE,
1458    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1459    TRT is the translation table.
1460
1461    Return the character position where the match is found.
1462    Otherwise, if M matches remained to be found, return -M.
1463
1464    This kind of search works regardless of what is in PAT and
1465    regardless of what is in TRT.  It is used in cases where
1466    boyer_moore cannot work.  */
1467
1468 static EMACS_INT
1469 simple_search (EMACS_INT n, unsigned char *pat,
1470                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1471                ptrdiff_t pos, ptrdiff_t pos_byte,
1472                ptrdiff_t lim, ptrdiff_t lim_byte)
1473 {
1474   int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1475   int forward = n > 0;
1476   /* Number of buffer bytes matched.  Note that this may be different
1477      from len_byte in a multibyte buffer.  */
1478   ptrdiff_t match_byte = PTRDIFF_MIN;
1479
1480   if (lim > pos && multibyte)
1481     while (n > 0)
1482       {
1483         while (1)
1484           {
1485             /* Try matching at position POS.  */
1486             ptrdiff_t this_pos = pos;
1487             ptrdiff_t this_pos_byte = pos_byte;
1488             ptrdiff_t this_len = len;
1489             unsigned char *p = pat;
1490             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1491               goto stop;
1492
1493             while (this_len > 0)
1494               {
1495                 int charlen, buf_charlen;
1496                 int pat_ch, buf_ch;
1497
1498                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1499                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1500                                                  buf_charlen);
1501                 TRANSLATE (buf_ch, trt, buf_ch);
1502
1503                 if (buf_ch != pat_ch)
1504                   break;
1505
1506                 this_len--;
1507                 p += charlen;
1508
1509                 this_pos_byte += buf_charlen;
1510                 this_pos++;
1511               }
1512
1513             if (this_len == 0)
1514               {
1515                 match_byte = this_pos_byte - pos_byte;
1516                 pos += len;
1517                 pos_byte += match_byte;
1518                 break;
1519               }
1520
1521             INC_BOTH (pos, pos_byte);
1522           }
1523
1524         n--;
1525       }
1526   else if (lim > pos)
1527     while (n > 0)
1528       {
1529         while (1)
1530           {
1531             /* Try matching at position POS.  */
1532             ptrdiff_t this_pos = pos;
1533             ptrdiff_t this_len = len;
1534             unsigned char *p = pat;
1535
1536             if (pos + len > lim)
1537               goto stop;
1538
1539             while (this_len > 0)
1540               {
1541                 int pat_ch = *p++;
1542                 int buf_ch = FETCH_BYTE (this_pos);
1543                 TRANSLATE (buf_ch, trt, buf_ch);
1544
1545                 if (buf_ch != pat_ch)
1546                   break;
1547
1548                 this_len--;
1549                 this_pos++;
1550               }
1551
1552             if (this_len == 0)
1553               {
1554                 match_byte = len;
1555                 pos += len;
1556                 break;
1557               }
1558
1559             pos++;
1560           }
1561
1562         n--;
1563       }
1564   /* Backwards search.  */
1565   else if (lim < pos && multibyte)
1566     while (n < 0)
1567       {
1568         while (1)
1569           {
1570             /* Try matching at position POS.  */
1571             ptrdiff_t this_pos = pos;
1572             ptrdiff_t this_pos_byte = pos_byte;
1573             ptrdiff_t this_len = len;
1574             const unsigned char *p = pat + len_byte;
1575
1576             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1577               goto stop;
1578
1579             while (this_len > 0)
1580               {
1581                 int pat_ch, buf_ch;
1582
1583                 DEC_BOTH (this_pos, this_pos_byte);
1584                 PREV_CHAR_BOUNDARY (p, pat);
1585                 pat_ch = STRING_CHAR (p);
1586                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1587                 TRANSLATE (buf_ch, trt, buf_ch);
1588
1589                 if (buf_ch != pat_ch)
1590                   break;
1591
1592                 this_len--;
1593               }
1594
1595             if (this_len == 0)
1596               {
1597                 match_byte = pos_byte - this_pos_byte;
1598                 pos = this_pos;
1599                 pos_byte = this_pos_byte;
1600                 break;
1601               }
1602
1603             DEC_BOTH (pos, pos_byte);
1604           }
1605
1606         n++;
1607       }
1608   else if (lim < pos)
1609     while (n < 0)
1610       {
1611         while (1)
1612           {
1613             /* Try matching at position POS.  */
1614             ptrdiff_t this_pos = pos - len;
1615             ptrdiff_t this_len = len;
1616             unsigned char *p = pat;
1617
1618             if (this_pos < lim)
1619               goto stop;
1620
1621             while (this_len > 0)
1622               {
1623                 int pat_ch = *p++;
1624                 int buf_ch = FETCH_BYTE (this_pos);
1625                 TRANSLATE (buf_ch, trt, buf_ch);
1626
1627                 if (buf_ch != pat_ch)
1628                   break;
1629                 this_len--;
1630                 this_pos++;
1631               }
1632
1633             if (this_len == 0)
1634               {
1635                 match_byte = len;
1636                 pos -= len;
1637                 break;
1638               }
1639
1640             pos--;
1641           }
1642
1643         n++;
1644       }
1645
1646  stop:
1647   if (n == 0)
1648     {
1649       eassert (match_byte != PTRDIFF_MIN);
1650       if (forward)
1651         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1652       else
1653         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1654
1655       return pos;
1656     }
1657   else if (n > 0)
1658     return -n;
1659   else
1660     return n;
1661 }
1662 \f
1663 /* Do Boyer-Moore search N times for the string BASE_PAT,
1664    whose length is LEN_BYTE,
1665    from buffer position POS_BYTE until LIM_BYTE.
1666    DIRECTION says which direction we search in.
1667    TRT and INVERSE_TRT are translation tables.
1668    Characters in PAT are already translated by TRT.
1669
1670    This kind of search works if all the characters in BASE_PAT that
1671    have nontrivial translation are the same aside from the last byte.
1672    This makes it possible to translate just the last byte of a
1673    character, and do so after just a simple test of the context.
1674    CHAR_BASE is nonzero if there is such a non-ASCII character.
1675
1676    If that criterion is not satisfied, do not call this function.  */
1677
1678 static EMACS_INT
1679 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1680              ptrdiff_t len_byte,
1681              Lisp_Object trt, Lisp_Object inverse_trt,
1682              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1683              int char_base)
1684 {
1685   int direction = ((n > 0) ? 1 : -1);
1686   register ptrdiff_t dirlen;
1687   ptrdiff_t limit;
1688   int stride_for_teases = 0;
1689   int BM_tab[0400];
1690   register unsigned char *cursor, *p_limit;
1691   register ptrdiff_t i;
1692   register int j;
1693   unsigned char *pat, *pat_end;
1694   int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1695
1696   unsigned char simple_translate[0400];
1697   /* These are set to the preceding bytes of a byte to be translated
1698      if char_base is nonzero.  As the maximum byte length of a
1699      multibyte character is 5, we have to check at most four previous
1700      bytes.  */
1701   int translate_prev_byte1 = 0;
1702   int translate_prev_byte2 = 0;
1703   int translate_prev_byte3 = 0;
1704
1705   /* The general approach is that we are going to maintain that we know
1706      the first (closest to the present position, in whatever direction
1707      we're searching) character that could possibly be the last
1708      (furthest from present position) character of a valid match.  We
1709      advance the state of our knowledge by looking at that character
1710      and seeing whether it indeed matches the last character of the
1711      pattern.  If it does, we take a closer look.  If it does not, we
1712      move our pointer (to putative last characters) as far as is
1713      logically possible.  This amount of movement, which I call a
1714      stride, will be the length of the pattern if the actual character
1715      appears nowhere in the pattern, otherwise it will be the distance
1716      from the last occurrence of that character to the end of the
1717      pattern.  If the amount is zero we have a possible match.  */
1718
1719   /* Here we make a "mickey mouse" BM table.  The stride of the search
1720      is determined only by the last character of the putative match.
1721      If that character does not match, we will stride the proper
1722      distance to propose a match that superimposes it on the last
1723      instance of a character that matches it (per trt), or misses
1724      it entirely if there is none. */
1725
1726   dirlen = len_byte * direction;
1727
1728   /* Record position after the end of the pattern.  */
1729   pat_end = base_pat + len_byte;
1730   /* BASE_PAT points to a character that we start scanning from.
1731      It is the first character in a forward search,
1732      the last character in a backward search.  */
1733   if (direction < 0)
1734     base_pat = pat_end - 1;
1735
1736   /* A character that does not appear in the pattern induces a
1737      stride equal to the pattern length.  */
1738   for (i = 0; i < 0400; i++)
1739     BM_tab[i] = dirlen;
1740
1741   /* We use this for translation, instead of TRT itself.
1742      We fill this in to handle the characters that actually
1743      occur in the pattern.  Others don't matter anyway!  */
1744   for (i = 0; i < 0400; i++)
1745     simple_translate[i] = i;
1746
1747   if (char_base)
1748     {
1749       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1750          byte following them are the target of translation.  */
1751       unsigned char str[MAX_MULTIBYTE_LENGTH];
1752       int cblen = CHAR_STRING (char_base, str);
1753
1754       translate_prev_byte1 = str[cblen - 2];
1755       if (cblen > 2)
1756         {
1757           translate_prev_byte2 = str[cblen - 3];
1758           if (cblen > 3)
1759             translate_prev_byte3 = str[cblen - 4];
1760         }
1761     }
1762
1763   i = 0;
1764   while (i != dirlen)
1765     {
1766       unsigned char *ptr = base_pat + i;
1767       i += direction;
1768       if (! NILP (trt))
1769         {
1770           /* If the byte currently looking at is the last of a
1771              character to check case-equivalents, set CH to that
1772              character.  An ASCII character and a non-ASCII character
1773              matching with CHAR_BASE are to be checked.  */
1774           int ch = -1;
1775
1776           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1777             ch = *ptr;
1778           else if (char_base
1779                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1780             {
1781               unsigned char *charstart = ptr - 1;
1782
1783               while (! (CHAR_HEAD_P (*charstart)))
1784                 charstart--;
1785               ch = STRING_CHAR (charstart);
1786               if (char_base != (ch & ~0x3F))
1787                 ch = -1;
1788             }
1789
1790           if (ch >= 0200 && multibyte)
1791             j = (ch & 0x3F) | 0200;
1792           else
1793             j = *ptr;
1794
1795           if (i == dirlen)
1796             stride_for_teases = BM_tab[j];
1797
1798           BM_tab[j] = dirlen - i;
1799           /* A translation table is accompanied by its inverse -- see
1800              comment following downcase_table for details.  */
1801           if (ch >= 0)
1802             {
1803               int starting_ch = ch;
1804               int starting_j = j;
1805
1806               while (1)
1807                 {
1808                   TRANSLATE (ch, inverse_trt, ch);
1809                   if (ch >= 0200 && multibyte)
1810                     j = (ch & 0x3F) | 0200;
1811                   else
1812                     j = ch;
1813
1814                   /* For all the characters that map into CH,
1815                      set up simple_translate to map the last byte
1816                      into STARTING_J.  */
1817                   simple_translate[j] = starting_j;
1818                   if (ch == starting_ch)
1819                     break;
1820                   BM_tab[j] = dirlen - i;
1821                 }
1822             }
1823         }
1824       else
1825         {
1826           j = *ptr;
1827
1828           if (i == dirlen)
1829             stride_for_teases = BM_tab[j];
1830           BM_tab[j] = dirlen - i;
1831         }
1832       /* stride_for_teases tells how much to stride if we get a
1833          match on the far character but are subsequently
1834          disappointed, by recording what the stride would have been
1835          for that character if the last character had been
1836          different.  */
1837     }
1838   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1839   /* loop invariant - POS_BYTE points at where last char (first
1840      char if reverse) of pattern would align in a possible match.  */
1841   while (n != 0)
1842     {
1843       ptrdiff_t tail_end;
1844       unsigned char *tail_end_ptr;
1845
1846       /* It's been reported that some (broken) compiler thinks that
1847          Boolean expressions in an arithmetic context are unsigned.
1848          Using an explicit ?1:0 prevents this.  */
1849       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1850           < 0)
1851         return (n * (0 - direction));
1852       /* First we do the part we can by pointers (maybe nothing) */
1853       QUIT;
1854       pat = base_pat;
1855       limit = pos_byte - dirlen + direction;
1856       if (direction > 0)
1857         {
1858           limit = BUFFER_CEILING_OF (limit);
1859           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1860              can take on without hitting edge of buffer or the gap.  */
1861           limit = min (limit, pos_byte + 20000);
1862           limit = min (limit, lim_byte - 1);
1863         }
1864       else
1865         {
1866           limit = BUFFER_FLOOR_OF (limit);
1867           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1868              can take on without hitting edge of buffer or the gap.  */
1869           limit = max (limit, pos_byte - 20000);
1870           limit = max (limit, lim_byte);
1871         }
1872       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1873       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1874
1875       if ((limit - pos_byte) * direction > 20)
1876         {
1877           unsigned char *p2;
1878
1879           p_limit = BYTE_POS_ADDR (limit);
1880           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1881           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1882           while (1)             /* use one cursor setting as long as i can */
1883             {
1884               if (direction > 0) /* worth duplicating */
1885                 {
1886                   while (cursor <= p_limit)
1887                     {
1888                       if (BM_tab[*cursor] == 0)
1889                         goto hit;
1890                       cursor += BM_tab[*cursor];
1891                     }
1892                 }
1893               else
1894                 {
1895                   while (cursor >= p_limit)
1896                     {
1897                       if (BM_tab[*cursor] == 0)
1898                         goto hit;
1899                       cursor += BM_tab[*cursor];
1900                     }
1901                 }
1902               /* If you are here, cursor is beyond the end of the
1903                  searched region.  You fail to match within the
1904                  permitted region and would otherwise try a character
1905                  beyond that region.  */
1906               break;
1907
1908             hit:
1909               i = dirlen - direction;
1910               if (! NILP (trt))
1911                 {
1912                   while ((i -= direction) + direction != 0)
1913                     {
1914                       int ch;
1915                       cursor -= direction;
1916                       /* Translate only the last byte of a character.  */
1917                       if (! multibyte
1918                           || ((cursor == tail_end_ptr
1919                                || CHAR_HEAD_P (cursor[1]))
1920                               && (CHAR_HEAD_P (cursor[0])
1921                                   /* Check if this is the last byte of
1922                                      a translatable character.  */
1923                                   || (translate_prev_byte1 == cursor[-1]
1924                                       && (CHAR_HEAD_P (translate_prev_byte1)
1925                                           || (translate_prev_byte2 == cursor[-2]
1926                                               && (CHAR_HEAD_P (translate_prev_byte2)
1927                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1928                         ch = simple_translate[*cursor];
1929                       else
1930                         ch = *cursor;
1931                       if (pat[i] != ch)
1932                         break;
1933                     }
1934                 }
1935               else
1936                 {
1937                   while ((i -= direction) + direction != 0)
1938                     {
1939                       cursor -= direction;
1940                       if (pat[i] != *cursor)
1941                         break;
1942                     }
1943                 }
1944               cursor += dirlen - i - direction; /* fix cursor */
1945               if (i + direction == 0)
1946                 {
1947                   ptrdiff_t position, start, end;
1948
1949                   cursor -= direction;
1950
1951                   position = pos_byte + cursor - p2 + ((direction > 0)
1952                                                        ? 1 - len_byte : 0);
1953                   set_search_regs (position, len_byte);
1954
1955                   if (NILP (Vinhibit_changing_match_data))
1956                     {
1957                       start = search_regs.start[0];
1958                       end = search_regs.end[0];
1959                     }
1960                   else
1961                     /* If Vinhibit_changing_match_data is non-nil,
1962                        search_regs will not be changed.  So let's
1963                        compute start and end here.  */
1964                     {
1965                       start = BYTE_TO_CHAR (position);
1966                       end = BYTE_TO_CHAR (position + len_byte);
1967                     }
1968
1969                   if ((n -= direction) != 0)
1970                     cursor += dirlen; /* to resume search */
1971                   else
1972                     return direction > 0 ? end : start;
1973                 }
1974               else
1975                 cursor += stride_for_teases; /* <sigh> we lose -  */
1976             }
1977           pos_byte += cursor - p2;
1978         }
1979       else
1980         /* Now we'll pick up a clump that has to be done the hard
1981            way because it covers a discontinuity.  */
1982         {
1983           limit = ((direction > 0)
1984                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1985                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1986           limit = ((direction > 0)
1987                    ? min (limit + len_byte, lim_byte - 1)
1988                    : max (limit - len_byte, lim_byte));
1989           /* LIMIT is now the last value POS_BYTE can have
1990              and still be valid for a possible match.  */
1991           while (1)
1992             {
1993               /* This loop can be coded for space rather than
1994                  speed because it will usually run only once.
1995                  (the reach is at most len + 21, and typically
1996                  does not exceed len).  */
1997               while ((limit - pos_byte) * direction >= 0)
1998                 {
1999                   int ch = FETCH_BYTE (pos_byte);
2000                   if (BM_tab[ch] == 0)
2001                     goto hit2;
2002                   pos_byte += BM_tab[ch];
2003                 }
2004               break;    /* ran off the end */
2005
2006             hit2:
2007               /* Found what might be a match.  */
2008               i = dirlen - direction;
2009               while ((i -= direction) + direction != 0)
2010                 {
2011                   int ch;
2012                   unsigned char *ptr;
2013                   pos_byte -= direction;
2014                   ptr = BYTE_POS_ADDR (pos_byte);
2015                   /* Translate only the last byte of a character.  */
2016                   if (! multibyte
2017                       || ((ptr == tail_end_ptr
2018                            || CHAR_HEAD_P (ptr[1]))
2019                           && (CHAR_HEAD_P (ptr[0])
2020                               /* Check if this is the last byte of a
2021                                  translatable character.  */
2022                               || (translate_prev_byte1 == ptr[-1]
2023                                   && (CHAR_HEAD_P (translate_prev_byte1)
2024                                       || (translate_prev_byte2 == ptr[-2]
2025                                           && (CHAR_HEAD_P (translate_prev_byte2)
2026                                               || translate_prev_byte3 == ptr[-3])))))))
2027                     ch = simple_translate[*ptr];
2028                   else
2029                     ch = *ptr;
2030                   if (pat[i] != ch)
2031                     break;
2032                 }
2033               /* Above loop has moved POS_BYTE part or all the way
2034                  back to the first pos (last pos if reverse).
2035                  Set it once again at the last (first if reverse) char.  */
2036               pos_byte += dirlen - i - direction;
2037               if (i + direction == 0)
2038                 {
2039                   ptrdiff_t position, start, end;
2040                   pos_byte -= direction;
2041
2042                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2043                   set_search_regs (position, len_byte);
2044
2045                   if (NILP (Vinhibit_changing_match_data))
2046                     {
2047                       start = search_regs.start[0];
2048                       end = search_regs.end[0];
2049                     }
2050                   else
2051                     /* If Vinhibit_changing_match_data is non-nil,
2052                        search_regs will not be changed.  So let's
2053                        compute start and end here.  */
2054                     {
2055                       start = BYTE_TO_CHAR (position);
2056                       end = BYTE_TO_CHAR (position + len_byte);
2057                     }
2058
2059                   if ((n -= direction) != 0)
2060                     pos_byte += dirlen; /* to resume search */
2061                   else
2062                     return direction > 0 ? end : start;
2063                 }
2064               else
2065                 pos_byte += stride_for_teases;
2066             }
2067           }
2068       /* We have done one clump.  Can we continue? */
2069       if ((lim_byte - pos_byte) * direction < 0)
2070         return ((0 - n) * direction);
2071     }
2072   return BYTE_TO_CHAR (pos_byte);
2073 }
2074
2075 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2076    for the overall match just found in the current buffer.
2077    Also clear out the match data for registers 1 and up.  */
2078
2079 static void
2080 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2081 {
2082   ptrdiff_t i;
2083
2084   if (!NILP (Vinhibit_changing_match_data))
2085     return;
2086
2087   /* Make sure we have registers in which to store
2088      the match position.  */
2089   if (search_regs.num_regs == 0)
2090     {
2091       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2092       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2093       search_regs.num_regs = 2;
2094     }
2095
2096   /* Clear out the other registers.  */
2097   for (i = 1; i < search_regs.num_regs; i++)
2098     {
2099       search_regs.start[i] = -1;
2100       search_regs.end[i] = -1;
2101     }
2102
2103   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2104   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2105   XSETBUFFER (last_thing_searched, current_buffer);
2106 }
2107 \f
2108 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2109        "MSearch backward: ",
2110        doc: /* Search backward from point for STRING.
2111 Set point to the beginning of the occurrence found, and return point.
2112 An optional second argument bounds the search; it is a buffer position.
2113 The match found must not extend before that position.
2114 Optional third argument, if t, means if fail just return nil (no error).
2115  If not nil and not t, position at limit of search and return nil.
2116 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2117  successive occurrences.  If COUNT is negative, search forward,
2118  instead of backward, for -COUNT occurrences.
2119
2120 Search case-sensitivity is determined by the value of the variable
2121 `case-fold-search', which see.
2122
2123 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2124   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2125 {
2126   return search_command (string, bound, noerror, count, -1, 0, 0);
2127 }
2128
2129 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2130        doc: /* Search forward from point for STRING.
2131 Set point to the end of the occurrence found, and return point.
2132 An optional second argument bounds the search; it is a buffer position.
2133 The match found must not extend after that position.  A value of nil is
2134   equivalent to (point-max).
2135 Optional third argument, if t, means if fail just return nil (no error).
2136   If not nil and not t, move to limit of search and return nil.
2137 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2138  successive occurrences.  If COUNT is negative, search backward,
2139  instead of forward, for -COUNT occurrences.
2140
2141 Search case-sensitivity is determined by the value of the variable
2142 `case-fold-search', which see.
2143
2144 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2145   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2146 {
2147   return search_command (string, bound, noerror, count, 1, 0, 0);
2148 }
2149
2150 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2151        "sRE search backward: ",
2152        doc: /* Search backward from point for match for regular expression REGEXP.
2153 Set point to the beginning of the match, and return point.
2154 The match found is the one starting last in the buffer
2155 and yet ending before the origin of the search.
2156 An optional second argument bounds the search; it is a buffer position.
2157 The match found must start at or after that position.
2158 Optional third argument, if t, means if fail just return nil (no error).
2159   If not nil and not t, move to limit of search and return nil.
2160 Optional fourth argument is repeat count--search for successive occurrences.
2161
2162 Search case-sensitivity is determined by the value of the variable
2163 `case-fold-search', which see.
2164
2165 See also the functions `match-beginning', `match-end', `match-string',
2166 and `replace-match'.  */)
2167   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2168 {
2169   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2170 }
2171
2172 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2173        "sRE search: ",
2174        doc: /* Search forward from point for regular expression REGEXP.
2175 Set point to the end of the occurrence found, and return point.
2176 An optional second argument bounds the search; it is a buffer position.
2177 The match found must not extend after that position.
2178 Optional third argument, if t, means if fail just return nil (no error).
2179   If not nil and not t, move to limit of search and return nil.
2180 Optional fourth argument is repeat count--search for successive occurrences.
2181
2182 Search case-sensitivity is determined by the value of the variable
2183 `case-fold-search', which see.
2184
2185 See also the functions `match-beginning', `match-end', `match-string',
2186 and `replace-match'.  */)
2187   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2188 {
2189   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2190 }
2191
2192 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2193        "sPosix search backward: ",
2194        doc: /* Search backward from point for match for regular expression REGEXP.
2195 Find the longest match in accord with Posix regular expression rules.
2196 Set point to the beginning of the match, and return point.
2197 The match found is the one starting last in the buffer
2198 and yet ending before the origin of the search.
2199 An optional second argument bounds the search; it is a buffer position.
2200 The match found must start at or after that position.
2201 Optional third argument, if t, means if fail just return nil (no error).
2202   If not nil and not t, move to limit of search and return nil.
2203 Optional fourth argument is repeat count--search for successive occurrences.
2204
2205 Search case-sensitivity is determined by the value of the variable
2206 `case-fold-search', which see.
2207
2208 See also the functions `match-beginning', `match-end', `match-string',
2209 and `replace-match'.  */)
2210   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2211 {
2212   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2213 }
2214
2215 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2216        "sPosix search: ",
2217        doc: /* Search forward from point for regular expression REGEXP.
2218 Find the longest match in accord with Posix regular expression rules.
2219 Set point to the end of the occurrence found, and return point.
2220 An optional second argument bounds the search; it is a buffer position.
2221 The match found must not extend after that position.
2222 Optional third argument, if t, means if fail just return nil (no error).
2223   If not nil and not t, move to limit of search and return nil.
2224 Optional fourth argument is repeat count--search for successive occurrences.
2225
2226 Search case-sensitivity is determined by the value of the variable
2227 `case-fold-search', which see.
2228
2229 See also the functions `match-beginning', `match-end', `match-string',
2230 and `replace-match'.  */)
2231   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2232 {
2233   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2234 }
2235 \f
2236 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2237        doc: /* Replace text matched by last search with NEWTEXT.
2238 Leave point at the end of the replacement text.
2239
2240 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2241 the replacement text.  Otherwise, maybe capitalize the whole text, or
2242 maybe just word initials, based on the replaced text.  If the replaced
2243 text has only capital letters and has at least one multiletter word,
2244 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2245 in the replaced text, capitalize each word in NEWTEXT.
2246
2247 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2248 Otherwise treat `\\' as special:
2249   `\\&' in NEWTEXT means substitute original matched text.
2250   `\\N' means substitute what matched the Nth `\\(...\\)'.
2251        If Nth parens didn't match, substitute nothing.
2252   `\\\\' means insert one `\\'.
2253   `\\?' is treated literally
2254        (for compatibility with `query-replace-regexp').
2255   Any other character following `\\' signals an error.
2256 Case conversion does not apply to these substitutions.
2257
2258 If optional fourth argument STRING is non-nil, it should be a string
2259 to act on; this should be the string on which the previous match was
2260 done via `string-match'.  In this case, `replace-match' creates and
2261 returns a new string, made by copying STRING and replacing the part of
2262 STRING that was matched (the original STRING itself is not altered).
2263
2264 The optional fifth argument SUBEXP specifies a subexpression;
2265 it says to replace just that subexpression with NEWTEXT,
2266 rather than replacing the entire matched text.
2267 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2268 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2269 NEWTEXT in place of subexp N.
2270 This is useful only after a regular expression search or match,
2271 since only regular expressions have distinguished subexpressions.  */)
2272   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2273 {
2274   enum { nochange, all_caps, cap_initial } case_action;
2275   register ptrdiff_t pos, pos_byte;
2276   int some_multiletter_word;
2277   int some_lowercase;
2278   int some_uppercase;
2279   int some_nonuppercase_initial;
2280   register int c, prevc;
2281   ptrdiff_t sub;
2282   ptrdiff_t opoint, newpoint;
2283
2284   CHECK_STRING (newtext);
2285
2286   if (! NILP (string))
2287     CHECK_STRING (string);
2288
2289   case_action = nochange;       /* We tried an initialization */
2290                                 /* but some C compilers blew it */
2291
2292   if (search_regs.num_regs <= 0)
2293     error ("`replace-match' called before any match found");
2294
2295   if (NILP (subexp))
2296     sub = 0;
2297   else
2298     {
2299       CHECK_NUMBER (subexp);
2300       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2301         args_out_of_range (subexp, make_number (search_regs.num_regs));
2302       sub = XINT (subexp);
2303     }
2304
2305   if (NILP (string))
2306     {
2307       if (search_regs.start[sub] < BEGV
2308           || search_regs.start[sub] > search_regs.end[sub]
2309           || search_regs.end[sub] > ZV)
2310         args_out_of_range (make_number (search_regs.start[sub]),
2311                            make_number (search_regs.end[sub]));
2312     }
2313   else
2314     {
2315       if (search_regs.start[sub] < 0
2316           || search_regs.start[sub] > search_regs.end[sub]
2317           || search_regs.end[sub] > SCHARS (string))
2318         args_out_of_range (make_number (search_regs.start[sub]),
2319                            make_number (search_regs.end[sub]));
2320     }
2321
2322   if (NILP (fixedcase))
2323     {
2324       /* Decide how to casify by examining the matched text. */
2325       ptrdiff_t last;
2326
2327       pos = search_regs.start[sub];
2328       last = search_regs.end[sub];
2329
2330       if (NILP (string))
2331         pos_byte = CHAR_TO_BYTE (pos);
2332       else
2333         pos_byte = string_char_to_byte (string, pos);
2334
2335       prevc = '\n';
2336       case_action = all_caps;
2337
2338       /* some_multiletter_word is set nonzero if any original word
2339          is more than one letter long. */
2340       some_multiletter_word = 0;
2341       some_lowercase = 0;
2342       some_nonuppercase_initial = 0;
2343       some_uppercase = 0;
2344
2345       while (pos < last)
2346         {
2347           if (NILP (string))
2348             {
2349               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2350               INC_BOTH (pos, pos_byte);
2351             }
2352           else
2353             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2354
2355           if (lowercasep (c))
2356             {
2357               /* Cannot be all caps if any original char is lower case */
2358
2359               some_lowercase = 1;
2360               if (SYNTAX (prevc) != Sword)
2361                 some_nonuppercase_initial = 1;
2362               else
2363                 some_multiletter_word = 1;
2364             }
2365           else if (uppercasep (c))
2366             {
2367               some_uppercase = 1;
2368               if (SYNTAX (prevc) != Sword)
2369                 ;
2370               else
2371                 some_multiletter_word = 1;
2372             }
2373           else
2374             {
2375               /* If the initial is a caseless word constituent,
2376                  treat that like a lowercase initial.  */
2377               if (SYNTAX (prevc) != Sword)
2378                 some_nonuppercase_initial = 1;
2379             }
2380
2381           prevc = c;
2382         }
2383
2384       /* Convert to all caps if the old text is all caps
2385          and has at least one multiletter word.  */
2386       if (! some_lowercase && some_multiletter_word)
2387         case_action = all_caps;
2388       /* Capitalize each word, if the old text has all capitalized words.  */
2389       else if (!some_nonuppercase_initial && some_multiletter_word)
2390         case_action = cap_initial;
2391       else if (!some_nonuppercase_initial && some_uppercase)
2392         /* Should x -> yz, operating on X, give Yz or YZ?
2393            We'll assume the latter.  */
2394         case_action = all_caps;
2395       else
2396         case_action = nochange;
2397     }
2398
2399   /* Do replacement in a string.  */
2400   if (!NILP (string))
2401     {
2402       Lisp_Object before, after;
2403
2404       before = Fsubstring (string, make_number (0),
2405                            make_number (search_regs.start[sub]));
2406       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2407
2408       /* Substitute parts of the match into NEWTEXT
2409          if desired.  */
2410       if (NILP (literal))
2411         {
2412           ptrdiff_t lastpos = 0;
2413           ptrdiff_t lastpos_byte = 0;
2414           /* We build up the substituted string in ACCUM.  */
2415           Lisp_Object accum;
2416           Lisp_Object middle;
2417           ptrdiff_t length = SBYTES (newtext);
2418
2419           accum = Qnil;
2420
2421           for (pos_byte = 0, pos = 0; pos_byte < length;)
2422             {
2423               ptrdiff_t substart = -1;
2424               ptrdiff_t subend = 0;
2425               int delbackslash = 0;
2426
2427               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2428
2429               if (c == '\\')
2430                 {
2431                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2432
2433                   if (c == '&')
2434                     {
2435                       substart = search_regs.start[sub];
2436                       subend = search_regs.end[sub];
2437                     }
2438                   else if (c >= '1' && c <= '9')
2439                     {
2440                       if (c - '0' < search_regs.num_regs
2441                           && 0 <= search_regs.start[c - '0'])
2442                         {
2443                           substart = search_regs.start[c - '0'];
2444                           subend = search_regs.end[c - '0'];
2445                         }
2446                       else
2447                         {
2448                           /* If that subexp did not match,
2449                              replace \\N with nothing.  */
2450                           substart = 0;
2451                           subend = 0;
2452                         }
2453                     }
2454                   else if (c == '\\')
2455                     delbackslash = 1;
2456                   else if (c != '?')
2457                     error ("Invalid use of `\\' in replacement text");
2458                 }
2459               if (substart >= 0)
2460                 {
2461                   if (pos - 2 != lastpos)
2462                     middle = substring_both (newtext, lastpos,
2463                                              lastpos_byte,
2464                                              pos - 2, pos_byte - 2);
2465                   else
2466                     middle = Qnil;
2467                   accum = concat3 (accum, middle,
2468                                    Fsubstring (string,
2469                                                make_number (substart),
2470                                                make_number (subend)));
2471                   lastpos = pos;
2472                   lastpos_byte = pos_byte;
2473                 }
2474               else if (delbackslash)
2475                 {
2476                   middle = substring_both (newtext, lastpos,
2477                                            lastpos_byte,
2478                                            pos - 1, pos_byte - 1);
2479
2480                   accum = concat2 (accum, middle);
2481                   lastpos = pos;
2482                   lastpos_byte = pos_byte;
2483                 }
2484             }
2485
2486           if (pos != lastpos)
2487             middle = substring_both (newtext, lastpos,
2488                                      lastpos_byte,
2489                                      pos, pos_byte);
2490           else
2491             middle = Qnil;
2492
2493           newtext = concat2 (accum, middle);
2494         }
2495
2496       /* Do case substitution in NEWTEXT if desired.  */
2497       if (case_action == all_caps)
2498         newtext = Fupcase (newtext);
2499       else if (case_action == cap_initial)
2500         newtext = Fupcase_initials (newtext);
2501
2502       return concat3 (before, newtext, after);
2503     }
2504
2505   /* Record point, then move (quietly) to the start of the match.  */
2506   if (PT >= search_regs.end[sub])
2507     opoint = PT - ZV;
2508   else if (PT > search_regs.start[sub])
2509     opoint = search_regs.end[sub] - ZV;
2510   else
2511     opoint = PT;
2512
2513   /* If we want non-literal replacement,
2514      perform substitution on the replacement string.  */
2515   if (NILP (literal))
2516     {
2517       ptrdiff_t length = SBYTES (newtext);
2518       unsigned char *substed;
2519       ptrdiff_t substed_alloc_size, substed_len;
2520       int buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2521       int str_multibyte = STRING_MULTIBYTE (newtext);
2522       int really_changed = 0;
2523
2524       substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length
2525                             ? STRING_BYTES_BOUND
2526                             : length * 2 + 100);
2527       substed = xmalloc (substed_alloc_size);
2528       substed_len = 0;
2529
2530       /* Go thru NEWTEXT, producing the actual text to insert in
2531          SUBSTED while adjusting multibyteness to that of the current
2532          buffer.  */
2533
2534       for (pos_byte = 0, pos = 0; pos_byte < length;)
2535         {
2536           unsigned char str[MAX_MULTIBYTE_LENGTH];
2537           const unsigned char *add_stuff = NULL;
2538           ptrdiff_t add_len = 0;
2539           ptrdiff_t idx = -1;
2540
2541           if (str_multibyte)
2542             {
2543               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2544               if (!buf_multibyte)
2545                 c = multibyte_char_to_unibyte (c);
2546             }
2547           else
2548             {
2549               /* Note that we don't have to increment POS.  */
2550               c = SREF (newtext, pos_byte++);
2551               if (buf_multibyte)
2552                 MAKE_CHAR_MULTIBYTE (c);
2553             }
2554
2555           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2556              or set IDX to a match index, which means put that part
2557              of the buffer text into SUBSTED.  */
2558
2559           if (c == '\\')
2560             {
2561               really_changed = 1;
2562
2563               if (str_multibyte)
2564                 {
2565                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2566                                                       pos, pos_byte);
2567                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2568                     c = multibyte_char_to_unibyte (c);
2569                 }
2570               else
2571                 {
2572                   c = SREF (newtext, pos_byte++);
2573                   if (buf_multibyte)
2574                     MAKE_CHAR_MULTIBYTE (c);
2575                 }
2576
2577               if (c == '&')
2578                 idx = sub;
2579               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2580                 {
2581                   if (search_regs.start[c - '0'] >= 1)
2582                     idx = c - '0';
2583                 }
2584               else if (c == '\\')
2585                 add_len = 1, add_stuff = (unsigned char *) "\\";
2586               else
2587                 {
2588                   xfree (substed);
2589                   error ("Invalid use of `\\' in replacement text");
2590                 }
2591             }
2592           else
2593             {
2594               add_len = CHAR_STRING (c, str);
2595               add_stuff = str;
2596             }
2597
2598           /* If we want to copy part of a previous match,
2599              set up ADD_STUFF and ADD_LEN to point to it.  */
2600           if (idx >= 0)
2601             {
2602               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2603               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2604               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2605                 move_gap_both (search_regs.start[idx], begbyte);
2606               add_stuff = BYTE_POS_ADDR (begbyte);
2607             }
2608
2609           /* Now the stuff we want to add to SUBSTED
2610              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2611
2612           /* Make sure SUBSTED is big enough.  */
2613           if (substed_alloc_size - substed_len < add_len)
2614             substed =
2615               xpalloc (substed, &substed_alloc_size,
2616                        add_len - (substed_alloc_size - substed_len),
2617                        STRING_BYTES_BOUND, 1);
2618
2619           /* Now add to the end of SUBSTED.  */
2620           if (add_stuff)
2621             {
2622               memcpy (substed + substed_len, add_stuff, add_len);
2623               substed_len += add_len;
2624             }
2625         }
2626
2627       if (really_changed)
2628         {
2629           if (buf_multibyte)
2630             {
2631               ptrdiff_t nchars =
2632                 multibyte_chars_in_text (substed, substed_len);
2633
2634               newtext = make_multibyte_string ((char *) substed, nchars,
2635                                                substed_len);
2636             }
2637           else
2638             newtext = make_unibyte_string ((char *) substed, substed_len);
2639         }
2640       xfree (substed);
2641     }
2642
2643   /* Replace the old text with the new in the cleanest possible way.  */
2644   replace_range (search_regs.start[sub], search_regs.end[sub],
2645                  newtext, 1, 0, 1);
2646   newpoint = search_regs.start[sub] + SCHARS (newtext);
2647
2648   if (case_action == all_caps)
2649     Fupcase_region (make_number (search_regs.start[sub]),
2650                     make_number (newpoint));
2651   else if (case_action == cap_initial)
2652     Fupcase_initials_region (make_number (search_regs.start[sub]),
2653                              make_number (newpoint));
2654
2655   /* Adjust search data for this change.  */
2656   {
2657     ptrdiff_t oldend = search_regs.end[sub];
2658     ptrdiff_t oldstart = search_regs.start[sub];
2659     ptrdiff_t change = newpoint - search_regs.end[sub];
2660     ptrdiff_t i;
2661
2662     for (i = 0; i < search_regs.num_regs; i++)
2663       {
2664         if (search_regs.start[i] >= oldend)
2665           search_regs.start[i] += change;
2666         else if (search_regs.start[i] > oldstart)
2667           search_regs.start[i] = oldstart;
2668         if (search_regs.end[i] >= oldend)
2669           search_regs.end[i] += change;
2670         else if (search_regs.end[i] > oldstart)
2671           search_regs.end[i] = oldstart;
2672       }
2673   }
2674
2675   /* Put point back where it was in the text.  */
2676   if (opoint <= 0)
2677     TEMP_SET_PT (opoint + ZV);
2678   else
2679     TEMP_SET_PT (opoint);
2680
2681   /* Now move point "officially" to the start of the inserted replacement.  */
2682   move_if_not_intangible (newpoint);
2683
2684   return Qnil;
2685 }
2686 \f
2687 static Lisp_Object
2688 match_limit (Lisp_Object num, int beginningp)
2689 {
2690   EMACS_INT n;
2691
2692   CHECK_NUMBER (num);
2693   n = XINT (num);
2694   if (n < 0)
2695     args_out_of_range (num, make_number (0));
2696   if (search_regs.num_regs <= 0)
2697     error ("No match data, because no search succeeded");
2698   if (n >= search_regs.num_regs
2699       || search_regs.start[n] < 0)
2700     return Qnil;
2701   return (make_number ((beginningp) ? search_regs.start[n]
2702                                     : search_regs.end[n]));
2703 }
2704
2705 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2706        doc: /* Return position of start of text matched by last search.
2707 SUBEXP, a number, specifies which parenthesized expression in the last
2708   regexp.
2709 Value is nil if SUBEXPth pair didn't match, or there were less than
2710   SUBEXP pairs.
2711 Zero means the entire text matched by the whole regexp or whole string.  */)
2712   (Lisp_Object subexp)
2713 {
2714   return match_limit (subexp, 1);
2715 }
2716
2717 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2718        doc: /* Return position of end of text matched by last search.
2719 SUBEXP, a number, specifies which parenthesized expression in the last
2720   regexp.
2721 Value is nil if SUBEXPth pair didn't match, or there were less than
2722   SUBEXP pairs.
2723 Zero means the entire text matched by the whole regexp or whole string.  */)
2724   (Lisp_Object subexp)
2725 {
2726   return match_limit (subexp, 0);
2727 }
2728
2729 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2730        doc: /* Return a list containing all info on what the last search matched.
2731 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2732 All the elements are markers or nil (nil if the Nth pair didn't match)
2733 if the last match was on a buffer; integers or nil if a string was matched.
2734 Use `set-match-data' to reinstate the data in this list.
2735
2736 If INTEGERS (the optional first argument) is non-nil, always use
2737 integers \(rather than markers) to represent buffer positions.  In
2738 this case, and if the last match was in a buffer, the buffer will get
2739 stored as one additional element at the end of the list.
2740
2741 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2742 enough to hold all the values, and if INTEGERS is non-nil, no consing
2743 is done.
2744
2745 If optional third arg RESEAT is non-nil, any previous markers on the
2746 REUSE list will be modified to point to nowhere.
2747
2748 Return value is undefined if the last search failed.  */)
2749   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2750 {
2751   Lisp_Object tail, prev;
2752   Lisp_Object *data;
2753   ptrdiff_t i, len;
2754
2755   if (!NILP (reseat))
2756     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2757       if (MARKERP (XCAR (tail)))
2758         {
2759           unchain_marker (XMARKER (XCAR (tail)));
2760           XSETCAR (tail, Qnil);
2761         }
2762
2763   if (NILP (last_thing_searched))
2764     return Qnil;
2765
2766   prev = Qnil;
2767
2768   data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
2769
2770   len = 0;
2771   for (i = 0; i < search_regs.num_regs; i++)
2772     {
2773       ptrdiff_t start = search_regs.start[i];
2774       if (start >= 0)
2775         {
2776           if (EQ (last_thing_searched, Qt)
2777               || ! NILP (integers))
2778             {
2779               XSETFASTINT (data[2 * i], start);
2780               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2781             }
2782           else if (BUFFERP (last_thing_searched))
2783             {
2784               data[2 * i] = Fmake_marker ();
2785               Fset_marker (data[2 * i],
2786                            make_number (start),
2787                            last_thing_searched);
2788               data[2 * i + 1] = Fmake_marker ();
2789               Fset_marker (data[2 * i + 1],
2790                            make_number (search_regs.end[i]),
2791                            last_thing_searched);
2792             }
2793           else
2794             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2795             emacs_abort ();
2796
2797           len = 2 * i + 2;
2798         }
2799       else
2800         data[2 * i] = data[2 * i + 1] = Qnil;
2801     }
2802
2803   if (BUFFERP (last_thing_searched) && !NILP (integers))
2804     {
2805       data[len] = last_thing_searched;
2806       len++;
2807     }
2808
2809   /* If REUSE is not usable, cons up the values and return them.  */
2810   if (! CONSP (reuse))
2811     return Flist (len, data);
2812
2813   /* If REUSE is a list, store as many value elements as will fit
2814      into the elements of REUSE.  */
2815   for (i = 0, tail = reuse; CONSP (tail);
2816        i++, tail = XCDR (tail))
2817     {
2818       if (i < len)
2819         XSETCAR (tail, data[i]);
2820       else
2821         XSETCAR (tail, Qnil);
2822       prev = tail;
2823     }
2824
2825   /* If we couldn't fit all value elements into REUSE,
2826      cons up the rest of them and add them to the end of REUSE.  */
2827   if (i < len)
2828     XSETCDR (prev, Flist (len - i, data + i));
2829
2830   return reuse;
2831 }
2832
2833 /* We used to have an internal use variant of `reseat' described as:
2834
2835       If RESEAT is `evaporate', put the markers back on the free list
2836       immediately.  No other references to the markers must exist in this
2837       case, so it is used only internally on the unwind stack and
2838       save-match-data from Lisp.
2839
2840    But it was ill-conceived: those supposedly-internal markers get exposed via
2841    the undo-list, so freeing them here is unsafe.  */
2842
2843 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2844        doc: /* Set internal data on last search match from elements of LIST.
2845 LIST should have been created by calling `match-data' previously.
2846
2847 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2848   (register Lisp_Object list, Lisp_Object reseat)
2849 {
2850   ptrdiff_t i;
2851   register Lisp_Object marker;
2852
2853   if (running_asynch_code)
2854     save_search_regs ();
2855
2856   CHECK_LIST (list);
2857
2858   /* Unless we find a marker with a buffer or an explicit buffer
2859      in LIST, assume that this match data came from a string.  */
2860   last_thing_searched = Qt;
2861
2862   /* Allocate registers if they don't already exist.  */
2863   {
2864     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2865
2866     if (length > search_regs.num_regs)
2867       {
2868         ptrdiff_t num_regs = search_regs.num_regs;
2869         if (PTRDIFF_MAX < length)
2870           memory_full (SIZE_MAX);
2871         search_regs.start =
2872           xpalloc (search_regs.start, &num_regs, length - num_regs,
2873                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2874         search_regs.end =
2875           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2876
2877         for (i = search_regs.num_regs; i < num_regs; i++)
2878           search_regs.start[i] = -1;
2879
2880         search_regs.num_regs = num_regs;
2881       }
2882
2883     for (i = 0; CONSP (list); i++)
2884       {
2885         marker = XCAR (list);
2886         if (BUFFERP (marker))
2887           {
2888             last_thing_searched = marker;
2889             break;
2890           }
2891         if (i >= length)
2892           break;
2893         if (NILP (marker))
2894           {
2895             search_regs.start[i] = -1;
2896             list = XCDR (list);
2897           }
2898         else
2899           {
2900             Lisp_Object from;
2901             Lisp_Object m;
2902
2903             m = marker;
2904             if (MARKERP (marker))
2905               {
2906                 if (XMARKER (marker)->buffer == 0)
2907                   XSETFASTINT (marker, 0);
2908                 else
2909                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2910               }
2911
2912             CHECK_NUMBER_COERCE_MARKER (marker);
2913             from = marker;
2914
2915             if (!NILP (reseat) && MARKERP (m))
2916               {
2917                 unchain_marker (XMARKER (m));
2918                 XSETCAR (list, Qnil);
2919               }
2920
2921             if ((list = XCDR (list), !CONSP (list)))
2922               break;
2923
2924             m = marker = XCAR (list);
2925
2926             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2927               XSETFASTINT (marker, 0);
2928
2929             CHECK_NUMBER_COERCE_MARKER (marker);
2930             if ((XINT (from) < 0
2931                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2932                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2933                 && (XINT (marker) < 0
2934                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2935                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2936               {
2937                 search_regs.start[i] = XINT (from);
2938                 search_regs.end[i] = XINT (marker);
2939               }
2940             else
2941               {
2942                 search_regs.start[i] = -1;
2943               }
2944
2945             if (!NILP (reseat) && MARKERP (m))
2946               {
2947                 unchain_marker (XMARKER (m));
2948                 XSETCAR (list, Qnil);
2949               }
2950           }
2951         list = XCDR (list);
2952       }
2953
2954     for (; i < search_regs.num_regs; i++)
2955       search_regs.start[i] = -1;
2956   }
2957
2958   return Qnil;
2959 }
2960
2961 /* If non-zero the match data have been saved in saved_search_regs
2962    during the execution of a sentinel or filter. */
2963 static int search_regs_saved;
2964 static struct re_registers saved_search_regs;
2965 static Lisp_Object saved_last_thing_searched;
2966
2967 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2968    if asynchronous code (filter or sentinel) is running. */
2969 static void
2970 save_search_regs (void)
2971 {
2972   if (!search_regs_saved)
2973     {
2974       saved_search_regs.num_regs = search_regs.num_regs;
2975       saved_search_regs.start = search_regs.start;
2976       saved_search_regs.end = search_regs.end;
2977       saved_last_thing_searched = last_thing_searched;
2978       last_thing_searched = Qnil;
2979       search_regs.num_regs = 0;
2980       search_regs.start = 0;
2981       search_regs.end = 0;
2982
2983       search_regs_saved = 1;
2984     }
2985 }
2986
2987 /* Called upon exit from filters and sentinels. */
2988 void
2989 restore_search_regs (void)
2990 {
2991   if (search_regs_saved)
2992     {
2993       if (search_regs.num_regs > 0)
2994         {
2995           xfree (search_regs.start);
2996           xfree (search_regs.end);
2997         }
2998       search_regs.num_regs = saved_search_regs.num_regs;
2999       search_regs.start = saved_search_regs.start;
3000       search_regs.end = saved_search_regs.end;
3001       last_thing_searched = saved_last_thing_searched;
3002       saved_last_thing_searched = Qnil;
3003       search_regs_saved = 0;
3004     }
3005 }
3006
3007 static Lisp_Object
3008 unwind_set_match_data (Lisp_Object list)
3009 {
3010   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3011   return Fset_match_data (list, Qt);
3012 }
3013
3014 /* Called to unwind protect the match data.  */
3015 void
3016 record_unwind_save_match_data (void)
3017 {
3018   record_unwind_protect (unwind_set_match_data,
3019                          Fmatch_data (Qnil, Qnil, Qnil));
3020 }
3021
3022 /* Quote a string to deactivate reg-expr chars */
3023
3024 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3025        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3026   (Lisp_Object string)
3027 {
3028   register char *in, *out, *end;
3029   register char *temp;
3030   int backslashes_added = 0;
3031
3032   CHECK_STRING (string);
3033
3034   temp = alloca (SBYTES (string) * 2);
3035
3036   /* Now copy the data into the new string, inserting escapes. */
3037
3038   in = SSDATA (string);
3039   end = in + SBYTES (string);
3040   out = temp;
3041
3042   for (; in != end; in++)
3043     {
3044       if (*in == '['
3045           || *in == '*' || *in == '.' || *in == '\\'
3046           || *in == '?' || *in == '+'
3047           || *in == '^' || *in == '$')
3048         *out++ = '\\', backslashes_added++;
3049       *out++ = *in;
3050     }
3051
3052   return make_specified_string (temp,
3053                                 SCHARS (string) + backslashes_added,
3054                                 out - temp,
3055                                 STRING_MULTIBYTE (string));
3056 }
3057 \f
3058 void
3059 syms_of_search (void)
3060 {
3061   register int i;
3062
3063   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3064     {
3065       searchbufs[i].buf.allocated = 100;
3066       searchbufs[i].buf.buffer = xmalloc (100);
3067       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3068       searchbufs[i].regexp = Qnil;
3069       searchbufs[i].whitespace_regexp = Qnil;
3070       searchbufs[i].syntax_table = Qnil;
3071       staticpro (&searchbufs[i].regexp);
3072       staticpro (&searchbufs[i].whitespace_regexp);
3073       staticpro (&searchbufs[i].syntax_table);
3074       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3075     }
3076   searchbuf_head = &searchbufs[0];
3077
3078   DEFSYM (Qsearch_failed, "search-failed");
3079   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3080
3081   Fput (Qsearch_failed, Qerror_conditions,
3082         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3083   Fput (Qsearch_failed, Qerror_message,
3084         build_pure_c_string ("Search failed"));
3085
3086   Fput (Qinvalid_regexp, Qerror_conditions,
3087         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3088   Fput (Qinvalid_regexp, Qerror_message,
3089         build_pure_c_string ("Invalid regexp"));
3090
3091   last_thing_searched = Qnil;
3092   staticpro (&last_thing_searched);
3093
3094   saved_last_thing_searched = Qnil;
3095   staticpro (&saved_last_thing_searched);
3096
3097   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3098       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3099 Some commands use this for user-specified regexps.
3100 Spaces that occur inside character classes or repetition operators
3101 or other such regexp constructs are not replaced with this.
3102 A value of nil (which is the normal value) means treat spaces literally.  */);
3103   Vsearch_spaces_regexp = Qnil;
3104
3105   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3106       doc: /* Internal use only.
3107 If non-nil, the primitive searching and matching functions
3108 such as `looking-at', `string-match', `re-search-forward', etc.,
3109 do not set the match data.  The proper way to use this variable
3110 is to bind it with `let' around a small expression.  */);
3111   Vinhibit_changing_match_data = Qnil;
3112
3113   defsubr (&Slooking_at);
3114   defsubr (&Sposix_looking_at);
3115   defsubr (&Sstring_match);
3116   defsubr (&Sposix_string_match);
3117   defsubr (&Ssearch_forward);
3118   defsubr (&Ssearch_backward);
3119   defsubr (&Sre_search_forward);
3120   defsubr (&Sre_search_backward);
3121   defsubr (&Sposix_search_forward);
3122   defsubr (&Sposix_search_backward);
3123   defsubr (&Sreplace_match);
3124   defsubr (&Smatch_beginning);
3125   defsubr (&Smatch_end);
3126   defsubr (&Smatch_data);
3127   defsubr (&Sset_match_data);
3128   defsubr (&Sregexp_quote);
3129 }