src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "category.h"
  26 #include "character.h"
  27 #include "buffer.h"
  28 #include "syntax.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* True means regexp was compiled to do full POSIX backtracking.  */
  53   bool posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  88 static void save_search_regs (void);
  89 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  90                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  91                                 ptrdiff_t, ptrdiff_t);
  92 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  93                               Lisp_Object, Lisp_Object, ptrdiff_t,
  94                               ptrdiff_t, int);
  95 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
  96                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
  97                                 Lisp_Object, Lisp_Object, bool);
  98
  99 static _Noreturn void
 100 matcher_overflow (void)
 101 {
 102   error ("Stack overflow in regexp matcher");
 103 }
 104
 105 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 106    PATTERN is the pattern to compile.
 107    CP is the place to put the result.
 108    TRANSLATE is a translation table for ignoring case, or nil for none.
 109    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 110    False means backtrack only enough to get a valid match.
 111
 112    The behavior also depends on Vsearch_spaces_regexp.  */
 113
 114 static void
 115 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 116                    Lisp_Object translate, bool posix)
 117 {
 118   char *val;
 119   reg_syntax_t old;
 120
 121   cp->regexp = Qnil;
 122   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 123   cp->posix = posix;
 124   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 125   cp->buf.charset_unibyte = charset_unibyte;
 126   if (STRINGP (Vsearch_spaces_regexp))
 127     cp->whitespace_regexp = Vsearch_spaces_regexp;
 128   else
 129     cp->whitespace_regexp = Qnil;
 130
 131   /* rms: I think BLOCK_INPUT is not needed here any more,
 132      because regex.c defines malloc to call xmalloc.
 133      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 134      So let's turn it off.  */
 135   /*  BLOCK_INPUT;  */
 136   old = re_set_syntax (RE_SYNTAX_EMACS
 137                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 138
 139   if (STRINGP (Vsearch_spaces_regexp))
 140     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 141   else
 142     re_set_whitespace_regexp (NULL);
 143
 144   val = (char *) re_compile_pattern (SSDATA (pattern),
 145                                      SBYTES (pattern), &cp->buf);
 146
 147   /* If the compiled pattern hard codes some of the contents of the
 148      syntax-table, it can only be reused with *this* syntax table.  */
 149   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 150
 151   re_set_whitespace_regexp (NULL);
 152
 153   re_set_syntax (old);
 154   /* unblock_input ();  */
 155   if (val)
 156     xsignal1 (Qinvalid_regexp, build_string (val));
 157
 158   cp->regexp = Fcopy_sequence (pattern);
 159 }
 160
 161 /* Shrink each compiled regexp buffer in the cache
 162    to the size actually used right now.
 163    This is called from garbage collection.  */
 164
 165 void
 166 shrink_regexp_cache (void)
 167 {
 168   struct regexp_cache *cp;
 169
 170   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 171     {
 172       cp->buf.allocated = cp->buf.used;
 173       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 174     }
 175 }
 176
 177 /* Clear the regexp cache w.r.t. a particular syntax table,
 178    because it was changed.
 179    There is no danger of memory leak here because re_compile_pattern
 180    automagically manages the memory in each re_pattern_buffer struct,
 181    based on its `allocated' and `buffer' values.  */
 182 void
 183 clear_regexp_cache (void)
 184 {
 185   int i;
 186
 187   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 188     /* It's tempting to compare with the syntax-table we've actually changed,
 189        but it's not sufficient because char-table inheritance means that
 190        modifying one syntax-table can change others at the same time.  */
 191     if (!EQ (searchbufs[i].syntax_table, Qt))
 192       searchbufs[i].regexp = Qnil;
 193 }
 194
 195 /* Compile a regexp if necessary, but first check to see if there's one in
 196    the cache.
 197    PATTERN is the pattern to compile.
 198    TRANSLATE is a translation table for ignoring case, or nil for none.
 199    REGP is the structure that says where to store the "register"
 200    values that will result from matching this pattern.
 201    If it is 0, we should compile the pattern not to record any
 202    subexpression bounds.
 203    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 204    False means backtrack only enough to get a valid match.  */
 205
 206 struct re_pattern_buffer *
 207 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 208                  Lisp_Object translate, bool posix, bool multibyte)
 209 {
 210   struct regexp_cache *cp, **cpp;
 211
 212   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 213     {
 214       cp = *cpp;
 215       /* Entries are initialized to nil, and may be set to nil by
 216          compile_pattern_1 if the pattern isn't valid.  Don't apply
 217          string accessors in those cases.  However, compile_pattern_1
 218          is only applied to the cache entry we pick here to reuse.  So
 219          nil should never appear before a non-nil entry.  */
 220       if (NILP (cp->regexp))
 221         goto compile_it;
 222       if (SCHARS (cp->regexp) == SCHARS (pattern)
 223           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 224           && !NILP (Fstring_equal (cp->regexp, pattern))
 225           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 226           && cp->posix == posix
 227           && (EQ (cp->syntax_table, Qt)
 228               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 229           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 230           && cp->buf.charset_unibyte == charset_unibyte)
 231         break;
 232
 233       /* If we're at the end of the cache, compile into the nil cell
 234          we found, or the last (least recently used) cell with a
 235          string value.  */
 236       if (cp->next == 0)
 237         {
 238         compile_it:
 239           compile_pattern_1 (cp, pattern, translate, posix);
 240           break;
 241         }
 242     }
 243
 244   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 245      either because we found it in the cache or because we just compiled it.
 246      Move it to the front of the queue to mark it as most recently used.  */
 247   *cpp = cp->next;
 248   cp->next = searchbuf_head;
 249   searchbuf_head = cp;
 250
 251   /* Advise the searching functions about the space we have allocated
 252      for register data.  */
 253   if (regp)
 254     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 255
 256   /* The compiled pattern can be used both for multibyte and unibyte
 257      target.  But, we have to tell which the pattern is used for. */
 258   cp->buf.target_multibyte = multibyte;
 259
 260   return &cp->buf;
 261 }
 262
 263 \f
 264 static Lisp_Object
 265 looking_at_1 (Lisp_Object string, bool posix)
 266 {
 267   Lisp_Object val;
 268   unsigned char *p1, *p2;
 269   ptrdiff_t s1, s2;
 270   register ptrdiff_t i;
 271   struct re_pattern_buffer *bufp;
 272
 273   if (running_asynch_code)
 274     save_search_regs ();
 275
 276   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 277   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 278                          BVAR (current_buffer, case_eqv_table));
 279
 280   CHECK_STRING (string);
 281   bufp = compile_pattern (string,
 282                           (NILP (Vinhibit_changing_match_data)
 283                            ? &search_regs : NULL),
 284                           (!NILP (BVAR (current_buffer, case_fold_search))
 285                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 286                           posix,
 287                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 288
 289   immediate_quit = 1;
 290   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 291
 292   /* Get pointers and sizes of the two strings
 293      that make up the visible portion of the buffer. */
 294
 295   p1 = BEGV_ADDR;
 296   s1 = GPT_BYTE - BEGV_BYTE;
 297   p2 = GAP_END_ADDR;
 298   s2 = ZV_BYTE - GPT_BYTE;
 299   if (s1 < 0)
 300     {
 301       p2 = p1;
 302       s2 = ZV_BYTE - BEGV_BYTE;
 303       s1 = 0;
 304     }
 305   if (s2 < 0)
 306     {
 307       s1 = ZV_BYTE - BEGV_BYTE;
 308       s2 = 0;
 309     }
 310
 311   re_match_object = Qnil;
 312
 313   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 314                   PT_BYTE - BEGV_BYTE,
 315                   (NILP (Vinhibit_changing_match_data)
 316                    ? &search_regs : NULL),
 317                   ZV_BYTE - BEGV_BYTE);
 318   immediate_quit = 0;
 319
 320   if (i == -2)
 321     matcher_overflow ();
 322
 323   val = (i >= 0 ? Qt : Qnil);
 324   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 325   {
 326     for (i = 0; i < search_regs.num_regs; i++)
 327       if (search_regs.start[i] >= 0)
 328         {
 329           search_regs.start[i]
 330             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 331          search_regs.end[i]
 332            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 333        }
 334     /* Set last_thing_searched only when match data is changed.  */
 335     XSETBUFFER (last_thing_searched, current_buffer);
 336   }
 337
 338   return val;
 339 }
 340
 341 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 342        doc: /* Return t if text after point matches regular expression REGEXP.
 343 This function modifies the match data that `match-beginning',
 344 `match-end' and `match-data' access; save and restore the match
 345 data if you want to preserve them.  */)
 346   (Lisp_Object regexp)
 347 {
 348   return looking_at_1 (regexp, 0);
 349 }
 350
 351 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 352        doc: /* Return t if text after point matches regular expression REGEXP.
 353 Find the longest match, in accord with Posix regular expression rules.
 354 This function modifies the match data that `match-beginning',
 355 `match-end' and `match-data' access; save and restore the match
 356 data if you want to preserve them.  */)
 357   (Lisp_Object regexp)
 358 {
 359   return looking_at_1 (regexp, 1);
 360 }
 361 \f
 362 static Lisp_Object
 363 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 364                 bool posix)
 365 {
 366   ptrdiff_t val;
 367   struct re_pattern_buffer *bufp;
 368   EMACS_INT pos;
 369   ptrdiff_t pos_byte, i;
 370
 371   if (running_asynch_code)
 372     save_search_regs ();
 373
 374   CHECK_STRING (regexp);
 375   CHECK_STRING (string);
 376
 377   if (NILP (start))
 378     pos = 0, pos_byte = 0;
 379   else
 380     {
 381       ptrdiff_t len = SCHARS (string);
 382
 383       CHECK_NUMBER (start);
 384       pos = XINT (start);
 385       if (pos < 0 && -pos <= len)
 386         pos = len + pos;
 387       else if (0 > pos || pos > len)
 388         args_out_of_range (string, start);
 389       pos_byte = string_char_to_byte (string, pos);
 390     }
 391
 392   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 393   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 394                          BVAR (current_buffer, case_eqv_table));
 395
 396   bufp = compile_pattern (regexp,
 397                           (NILP (Vinhibit_changing_match_data)
 398                            ? &search_regs : NULL),
 399                           (!NILP (BVAR (current_buffer, case_fold_search))
 400                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 401                           posix,
 402                           STRING_MULTIBYTE (string));
 403   immediate_quit = 1;
 404   re_match_object = string;
 405
 406   val = re_search (bufp, SSDATA (string),
 407                    SBYTES (string), pos_byte,
 408                    SBYTES (string) - pos_byte,
 409                    (NILP (Vinhibit_changing_match_data)
 410                     ? &search_regs : NULL));
 411   immediate_quit = 0;
 412
 413   /* Set last_thing_searched only when match data is changed.  */
 414   if (NILP (Vinhibit_changing_match_data))
 415     last_thing_searched = Qt;
 416
 417   if (val == -2)
 418     matcher_overflow ();
 419   if (val < 0) return Qnil;
 420
 421   if (NILP (Vinhibit_changing_match_data))
 422     for (i = 0; i < search_regs.num_regs; i++)
 423       if (search_regs.start[i] >= 0)
 424         {
 425           search_regs.start[i]
 426             = string_byte_to_char (string, search_regs.start[i]);
 427           search_regs.end[i]
 428             = string_byte_to_char (string, search_regs.end[i]);
 429         }
 430
 431   return make_number (string_byte_to_char (string, val));
 432 }
 433
 434 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 435        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 436 Matching ignores case if `case-fold-search' is non-nil.
 437 If third arg START is non-nil, start search at that index in STRING.
 438 For index of first char beyond the match, do (match-end 0).
 439 `match-end' and `match-beginning' also give indices of substrings
 440 matched by parenthesis constructs in the pattern.
 441
 442 You can use the function `match-string' to extract the substrings
 443 matched by the parenthesis constructions in REGEXP. */)
 444   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 445 {
 446   return string_match_1 (regexp, string, start, 0);
 447 }
 448
 449 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 450        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 451 Find the longest match, in accord with Posix regular expression rules.
 452 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 453 If third arg START is non-nil, start search at that index in STRING.
 454 For index of first char beyond the match, do (match-end 0).
 455 `match-end' and `match-beginning' also give indices of substrings
 456 matched by parenthesis constructs in the pattern.  */)
 457   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 458 {
 459   return string_match_1 (regexp, string, start, 1);
 460 }
 461
 462 /* Match REGEXP against STRING using translation table TABLE,
 463    searching all of STRING, and return the index of the match,
 464    or negative on failure.  This does not clobber the match data.  */
 465
 466 ptrdiff_t
 467 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
 468                             Lisp_Object table)
 469 {
 470   ptrdiff_t val;
 471   struct re_pattern_buffer *bufp;
 472
 473   bufp = compile_pattern (regexp, 0, table,
 474                           0, STRING_MULTIBYTE (string));
 475   immediate_quit = 1;
 476   re_match_object = string;
 477
 478   val = re_search (bufp, SSDATA (string),
 479                    SBYTES (string), 0,
 480                    SBYTES (string), 0);
 481   immediate_quit = 0;
 482   return val;
 483 }
 484
 485 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 486    and return the index of the match, or negative on failure.
 487    This does not clobber the match data.
 488    We assume that STRING contains single-byte characters.  */
 489
 490 ptrdiff_t
 491 fast_c_string_match_ignore_case (Lisp_Object regexp,
 492                                  const char *string, ptrdiff_t len)
 493 {
 494   ptrdiff_t val;
 495   struct re_pattern_buffer *bufp;
 496
 497   regexp = string_make_unibyte (regexp);
 498   re_match_object = Qt;
 499   bufp = compile_pattern (regexp, 0,
 500                           Vascii_canon_table, 0,
 501                           0);
 502   immediate_quit = 1;
 503   val = re_search (bufp, string, len, 0, len, 0);
 504   immediate_quit = 0;
 505   return val;
 506 }
 507
 508 /* Match REGEXP against the characters after POS to LIMIT, and return
 509    the number of matched characters.  If STRING is non-nil, match
 510    against the characters in it.  In that case, POS and LIMIT are
 511    indices into the string.  This function doesn't modify the match
 512    data.  */
 513
 514 ptrdiff_t
 515 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
 516                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 517 {
 518   bool multibyte;
 519   struct re_pattern_buffer *buf;
 520   unsigned char *p1, *p2;
 521   ptrdiff_t s1, s2;
 522   ptrdiff_t len;
 523
 524   if (STRINGP (string))
 525     {
 526       if (pos_byte < 0)
 527         pos_byte = string_char_to_byte (string, pos);
 528       if (limit_byte < 0)
 529         limit_byte = string_char_to_byte (string, limit);
 530       p1 = NULL;
 531       s1 = 0;
 532       p2 = SDATA (string);
 533       s2 = SBYTES (string);
 534       re_match_object = string;
 535       multibyte = STRING_MULTIBYTE (string);
 536     }
 537   else
 538     {
 539       if (pos_byte < 0)
 540         pos_byte = CHAR_TO_BYTE (pos);
 541       if (limit_byte < 0)
 542         limit_byte = CHAR_TO_BYTE (limit);
 543       pos_byte -= BEGV_BYTE;
 544       limit_byte -= BEGV_BYTE;
 545       p1 = BEGV_ADDR;
 546       s1 = GPT_BYTE - BEGV_BYTE;
 547       p2 = GAP_END_ADDR;
 548       s2 = ZV_BYTE - GPT_BYTE;
 549       if (s1 < 0)
 550         {
 551           p2 = p1;
 552           s2 = ZV_BYTE - BEGV_BYTE;
 553           s1 = 0;
 554         }
 555       if (s2 < 0)
 556         {
 557           s1 = ZV_BYTE - BEGV_BYTE;
 558           s2 = 0;
 559         }
 560       re_match_object = Qnil;
 561       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 562     }
 563
 564   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 565   immediate_quit = 1;
 566   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 567                     pos_byte, NULL, limit_byte);
 568   immediate_quit = 0;
 569
 570   return len;
 571 }
 572
 573 \f
 574 /* The newline cache: remembering which sections of text have no newlines.  */
 575
 576 /* If the user has requested the long scans caching, make sure it's on.
 577    Otherwise, make sure it's off.
 578    This is our cheezy way of associating an action with the change of
 579    state of a buffer-local variable.  */
 580 static struct region_cache *
 581 newline_cache_on_off (struct buffer *buf)
 582 {
 583   struct buffer *base_buf = buf;
 584   bool indirect_p = false;
 585
 586   if (buf->base_buffer)
 587     {
 588       base_buf = buf->base_buffer;
 589       indirect_p = true;
 590     }
 591
 592   /* Don't turn on or off the cache in the base buffer, if the value
 593      of cache-long-scans of the base buffer is inconsistent with that.
 594      This is because doing so will just make the cache pure overhead,
 595      since if we turn it on via indirect buffer, it will be
 596      immediately turned off by its base buffer.  */
 597   if (NILP (BVAR (buf, cache_long_scans)))
 598     {
 599       if (!indirect_p
 600           || NILP (BVAR (base_buf, cache_long_scans)))
 601         {
 602           /* It should be off.  */
 603           if (base_buf->newline_cache)
 604             {
 605               free_region_cache (base_buf->newline_cache);
 606               base_buf->newline_cache = 0;
 607             }
 608         }
 609       return NULL;
 610     }
 611   else
 612     {
 613       if (!indirect_p
 614           || !NILP (BVAR (base_buf, cache_long_scans)))
 615         {
 616           /* It should be on.  */
 617           if (base_buf->newline_cache == 0)
 618             base_buf->newline_cache = new_region_cache ();
 619         }
 620       return base_buf->newline_cache;
 621     }
 622 }
 623
 624 \f
 625 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
 626
 627    If COUNT is positive, search forwards; END must be >= START.
 628    If COUNT is negative, search backwards for the -COUNTth instance;
 629       END must be <= START.
 630    If COUNT is zero, do anything you please; run rogue, for all I care.
 631
 632    If END is zero, use BEGV or ZV instead, as appropriate for the
 633    direction indicated by COUNT.
 634
 635    If we find COUNT instances, set *SHORTAGE to zero, and return the
 636    position past the COUNTth match.  Note that for reverse motion
 637    this is not the same as the usual convention for Emacs motion commands.
 638
 639    If we don't find COUNT instances before reaching END, set *SHORTAGE
 640    to the number of newlines left unfound, and return END.
 641
 642    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
 643    to the returned character position.
 644
 645    If ALLOW_QUIT, set immediate_quit.  That's good to do
 646    except when inside redisplay.  */
 647
 648 ptrdiff_t
 649 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 650               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
 651               ptrdiff_t *bytepos, bool allow_quit)
 652 {
 653   struct region_cache *newline_cache;
 654   int direction;
 655   struct buffer *cache_buffer;
 656
 657   if (count > 0)
 658     {
 659       direction = 1;
 660       if (!end)
 661         end = ZV, end_byte = ZV_BYTE;
 662     }
 663   else
 664     {
 665       direction = -1;
 666       if (!end)
 667         end = BEGV, end_byte = BEGV_BYTE;
 668     }
 669   if (end_byte == -1)
 670     end_byte = CHAR_TO_BYTE (end);
 671
 672   newline_cache = newline_cache_on_off (current_buffer);
 673   if (current_buffer->base_buffer)
 674     cache_buffer = current_buffer->base_buffer;
 675   else
 676     cache_buffer = current_buffer;
 677
 678   if (shortage != 0)
 679     *shortage = 0;
 680
 681   immediate_quit = allow_quit;
 682
 683   if (count > 0)
 684     while (start != end)
 685       {
 686         /* Our innermost scanning loop is very simple; it doesn't know
 687            about gaps, buffer ends, or the newline cache.  ceiling is
 688            the position of the last character before the next such
 689            obstacle --- the last character the dumb search loop should
 690            examine.  */
 691         ptrdiff_t tem, ceiling_byte = end_byte - 1;
 692
 693         /* If we're using the newline cache, consult it to see whether
 694            we can avoid some scanning.  */
 695         if (newline_cache)
 696           {
 697             ptrdiff_t next_change;
 698             int result = 1;
 699
 700             immediate_quit = 0;
 701             while (start < end && result)
 702               {
 703                 ptrdiff_t lim1;
 704
 705                 result = region_cache_forward (cache_buffer, newline_cache,
 706                                                start, &next_change);
 707                 if (result)
 708                   {
 709                     start = next_change;
 710                     lim1 = next_change = end;
 711                   }
 712                 else
 713                   lim1 = min (next_change, end);
 714
 715                 /* The cache returned zero for this region; see if
 716                    this is because the region is known and includes
 717                    only newlines.  While at that, count any newlines
 718                    we bump into, and exit if we found enough off them.  */
 719                 start_byte = CHAR_TO_BYTE (start);
 720                 while (start < lim1
 721                        && FETCH_BYTE (start_byte) == '\n')
 722                   {
 723                     start_byte++;
 724                     start++;
 725                     if (--count == 0)
 726                       {
 727                         if (bytepos)
 728                           *bytepos = start_byte;
 729                         return start;
 730                       }
 731                   }
 732                 /* If we found a non-newline character before hitting
 733                    position where the cache will again return non-zero
 734                    (i.e. no newlines beyond that position), it means
 735                    this region is not yet known to the cache, and we
 736                    must resort to the "dumb loop" method.  */
 737                 if (start < next_change && !result)
 738                   break;
 739                 result = 1;
 740               }
 741             if (start >= end)
 742               {
 743                 start = end;
 744                 start_byte = end_byte;
 745                 break;
 746               }
 747             immediate_quit = allow_quit;
 748
 749             /* START should never be after END.  */
 750             if (start_byte > ceiling_byte)
 751               start_byte = ceiling_byte;
 752
 753             /* Now the text after start is an unknown region, and
 754                next_change is the position of the next known region. */
 755             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 756           }
 757         else if (start_byte == -1)
 758           start_byte = CHAR_TO_BYTE (start);
 759
 760         /* The dumb loop can only scan text stored in contiguous
 761            bytes. BUFFER_CEILING_OF returns the last character
 762            position that is contiguous, so the ceiling is the
 763            position after that.  */
 764         tem = BUFFER_CEILING_OF (start_byte);
 765         ceiling_byte = min (tem, ceiling_byte);
 766
 767         {
 768           /* The termination address of the dumb loop.  */
 769           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 770           ptrdiff_t lim_byte = ceiling_byte + 1;
 771
 772           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
 773              of the base, the cursor, and the next line.  */
 774           ptrdiff_t base = start_byte - lim_byte;
 775           ptrdiff_t cursor, next;
 776
 777           for (cursor = base; cursor < 0; cursor = next)
 778             {
 779               /* The dumb loop.  */
 780               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
 781               next = nl ? nl - lim_addr : 0;
 782
 783               /* If we're using the newline cache, cache the fact that
 784                  the region we just traversed is free of newlines. */
 785               if (newline_cache && cursor != next)
 786                 {
 787                   know_region_cache (cache_buffer, newline_cache,
 788                                      BYTE_TO_CHAR (lim_byte + cursor),
 789                                      BYTE_TO_CHAR (lim_byte + next));
 790                   /* know_region_cache can relocate buffer text.  */
 791                   lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 792                 }
 793
 794               if (! nl)
 795                 break;
 796               next++;
 797
 798               if (--count == 0)
 799                 {
 800                   immediate_quit = 0;
 801                   if (bytepos)
 802                     *bytepos = lim_byte + next;
 803                   return BYTE_TO_CHAR (lim_byte + next);
 804                 }
 805             }
 806
 807           start_byte = lim_byte;
 808           start = BYTE_TO_CHAR (start_byte);
 809         }
 810       }
 811   else
 812     while (start > end)
 813       {
 814         /* The last character to check before the next obstacle.  */
 815         ptrdiff_t tem, ceiling_byte = end_byte;
 816
 817         /* Consult the newline cache, if appropriate.  */
 818         if (newline_cache)
 819           {
 820             ptrdiff_t next_change;
 821             int result = 1;
 822
 823             immediate_quit = 0;
 824             while (start > end && result)
 825               {
 826                 ptrdiff_t lim1;
 827
 828                 result = region_cache_backward (cache_buffer, newline_cache,
 829                                                 start, &next_change);
 830                 if (result)
 831                   {
 832                     start = next_change;
 833                     lim1 = next_change = end;
 834                   }
 835                 else
 836                   lim1 = max (next_change, end);
 837                 start_byte = CHAR_TO_BYTE (start);
 838                 while (start > lim1
 839                        && FETCH_BYTE (start_byte - 1) == '\n')
 840                   {
 841                     if (++count == 0)
 842                       {
 843                         if (bytepos)
 844                           *bytepos = start_byte;
 845                         return start;
 846                       }
 847                     start_byte--;
 848                     start--;
 849                   }
 850                 if (start > next_change && !result)
 851                   break;
 852                 result = 1;
 853               }
 854             if (start <= end)
 855               {
 856                 start = end;
 857                 start_byte = end_byte;
 858                 break;
 859               }
 860             immediate_quit = allow_quit;
 861
 862             /* Start should never be at or before end.  */
 863             if (start_byte <= ceiling_byte)
 864               start_byte = ceiling_byte + 1;
 865
 866             /* Now the text before start is an unknown region, and
 867                next_change is the position of the next known region. */
 868             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 869           }
 870         else if (start_byte == -1)
 871           start_byte = CHAR_TO_BYTE (start);
 872
 873         /* Stop scanning before the gap.  */
 874         tem = BUFFER_FLOOR_OF (start_byte - 1);
 875         ceiling_byte = max (tem, ceiling_byte);
 876
 877         {
 878           /* The termination address of the dumb loop.  */
 879           unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 880
 881           /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
 882              the base, the cursor, and the previous line.  These
 883              offsets are at least -1.  */
 884           ptrdiff_t base = start_byte - ceiling_byte;
 885           ptrdiff_t cursor, prev;
 886
 887           for (cursor = base; 0 < cursor; cursor = prev)
 888             {
 889               unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
 890               prev = nl ? nl - ceiling_addr : -1;
 891
 892               /* If we're looking for newlines, cache the fact that
 893                  this line's region is free of them. */
 894               if (newline_cache && cursor != prev + 1)
 895                 {
 896                   know_region_cache (cache_buffer, newline_cache,
 897                                      BYTE_TO_CHAR (ceiling_byte + prev + 1),
 898                                      BYTE_TO_CHAR (ceiling_byte + cursor));
 899                   /* know_region_cache can relocate buffer text.  */
 900                   ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 901                 }
 902
 903               if (! nl)
 904                 break;
 905
 906               if (++count >= 0)
 907                 {
 908                   immediate_quit = 0;
 909                   if (bytepos)
 910                     *bytepos = ceiling_byte + prev + 1;
 911                   return BYTE_TO_CHAR (ceiling_byte + prev + 1);
 912                 }
 913             }
 914
 915           start_byte = ceiling_byte;
 916           start = BYTE_TO_CHAR (start_byte);
 917         }
 918       }
 919
 920   immediate_quit = 0;
 921   if (shortage)
 922     *shortage = count * direction;
 923   if (bytepos)
 924     {
 925       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
 926       eassert (*bytepos == CHAR_TO_BYTE (start));
 927     }
 928   return start;
 929 }
 930 \f
 931 /* Search for COUNT instances of a line boundary.
 932    Start at START.  If COUNT is negative, search backwards.
 933
 934    We report the resulting position by calling TEMP_SET_PT_BOTH.
 935
 936    If we find COUNT instances. we position after (always after,
 937    even if scanning backwards) the COUNTth match, and return 0.
 938
 939    If we don't find COUNT instances before reaching the end of the
 940    buffer (or the beginning, if scanning backwards), we return
 941    the number of line boundaries left unfound, and position at
 942    the limit we bumped up against.
 943
 944    If ALLOW_QUIT, set immediate_quit.  That's good to do
 945    except in special cases.  */
 946
 947 ptrdiff_t
 948 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 949               ptrdiff_t limit, ptrdiff_t limit_byte,
 950               ptrdiff_t count, bool allow_quit)
 951 {
 952   ptrdiff_t charpos, bytepos, shortage;
 953
 954   charpos = find_newline (start, start_byte, limit, limit_byte,
 955                           count, &shortage, &bytepos, allow_quit);
 956   if (shortage)
 957     TEMP_SET_PT_BOTH (limit, limit_byte);
 958   else
 959     TEMP_SET_PT_BOTH (charpos, bytepos);
 960   return shortage;
 961 }
 962
 963 /* Like above, but always scan from point and report the
 964    resulting position in *CHARPOS and *BYTEPOS.  */
 965
 966 ptrdiff_t
 967 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
 968                          ptrdiff_t *bytepos)
 969 {
 970   ptrdiff_t shortage;
 971
 972   if (count <= 0)
 973     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
 974                              &shortage, bytepos, 1);
 975   else
 976     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
 977                              &shortage, bytepos, 1);
 978   return shortage;
 979 }
 980
 981 /* Like find_newline, but doesn't allow QUITting and doesn't return
 982    SHORTAGE.  */
 983 ptrdiff_t
 984 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
 985                       ptrdiff_t cnt, ptrdiff_t *bytepos)
 986 {
 987   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
 988 }
 989
 990 /* Like find_newline, but returns position before the newline, not
 991    after, and only search up to TO.
 992    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
 993
 994 ptrdiff_t
 995 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
 996                           ptrdiff_t cnt, ptrdiff_t *bytepos)
 997 {
 998   ptrdiff_t shortage;
 999   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
1000
1001   if (shortage == 0)
1002     {
1003       if (bytepos)
1004         DEC_BOTH (pos, *bytepos);
1005       else
1006         pos--;
1007     }
1008   return pos;
1009 }
1010 \f
1011 /* Subroutines of Lisp buffer search functions. */
1012
1013 static Lisp_Object
1014 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1015                 Lisp_Object count, int direction, int RE, bool posix)
1016 {
1017   EMACS_INT np;
1018   EMACS_INT lim;
1019   ptrdiff_t lim_byte;
1020   EMACS_INT n = direction;
1021
1022   if (!NILP (count))
1023     {
1024       CHECK_NUMBER (count);
1025       n *= XINT (count);
1026     }
1027
1028   CHECK_STRING (string);
1029   if (NILP (bound))
1030     {
1031       if (n > 0)
1032         lim = ZV, lim_byte = ZV_BYTE;
1033       else
1034         lim = BEGV, lim_byte = BEGV_BYTE;
1035     }
1036   else
1037     {
1038       CHECK_NUMBER_COERCE_MARKER (bound);
1039       lim = XINT (bound);
1040       if (n > 0 ? lim < PT : lim > PT)
1041         error ("Invalid search bound (wrong side of point)");
1042       if (lim > ZV)
1043         lim = ZV, lim_byte = ZV_BYTE;
1044       else if (lim < BEGV)
1045         lim = BEGV, lim_byte = BEGV_BYTE;
1046       else
1047         lim_byte = CHAR_TO_BYTE (lim);
1048     }
1049
1050   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1051   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1052                          BVAR (current_buffer, case_eqv_table));
1053
1054   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1055                       (!NILP (BVAR (current_buffer, case_fold_search))
1056                        ? BVAR (current_buffer, case_canon_table)
1057                        : Qnil),
1058                       (!NILP (BVAR (current_buffer, case_fold_search))
1059                        ? BVAR (current_buffer, case_eqv_table)
1060                        : Qnil),
1061                       posix);
1062   if (np <= 0)
1063     {
1064       if (NILP (noerror))
1065         xsignal1 (Qsearch_failed, string);
1066
1067       if (!EQ (noerror, Qt))
1068         {
1069           eassert (BEGV <= lim && lim <= ZV);
1070           SET_PT_BOTH (lim, lim_byte);
1071           return Qnil;
1072 #if 0 /* This would be clean, but maybe programs depend on
1073          a value of nil here.  */
1074           np = lim;
1075 #endif
1076         }
1077       else
1078         return Qnil;
1079     }
1080
1081   eassert (BEGV <= np && np <= ZV);
1082   SET_PT (np);
1083
1084   return make_number (np);
1085 }
1086 \f
1087 /* Return true if REGEXP it matches just one constant string.  */
1088
1089 static bool
1090 trivial_regexp_p (Lisp_Object regexp)
1091 {
1092   ptrdiff_t len = SBYTES (regexp);
1093   unsigned char *s = SDATA (regexp);
1094   while (--len >= 0)
1095     {
1096       switch (*s++)
1097         {
1098         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1099           return 0;
1100         case '\\':
1101           if (--len < 0)
1102             return 0;
1103           switch (*s++)
1104             {
1105             case '|': case '(': case ')': case '`': case '\'': case 'b':
1106             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1107             case 'S': case '=': case '{': case '}': case '_':
1108             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1109             case '1': case '2': case '3': case '4': case '5':
1110             case '6': case '7': case '8': case '9':
1111               return 0;
1112             }
1113         }
1114     }
1115   return 1;
1116 }
1117
1118 /* Search for the n'th occurrence of STRING in the current buffer,
1119    starting at position POS and stopping at position LIM,
1120    treating STRING as a literal string if RE is false or as
1121    a regular expression if RE is true.
1122
1123    If N is positive, searching is forward and LIM must be greater than POS.
1124    If N is negative, searching is backward and LIM must be less than POS.
1125
1126    Returns -x if x occurrences remain to be found (x > 0),
1127    or else the position at the beginning of the Nth occurrence
1128    (if searching backward) or the end (if searching forward).
1129
1130    POSIX is nonzero if we want full backtracking (POSIX style)
1131    for this pattern.  0 means backtrack only enough to get a valid match.  */
1132
1133 #define TRANSLATE(out, trt, d)                  \
1134 do                                              \
1135   {                                             \
1136     if (! NILP (trt))                           \
1137       {                                         \
1138         Lisp_Object temp;                       \
1139         temp = Faref (trt, make_number (d));    \
1140         if (INTEGERP (temp))                    \
1141           out = XINT (temp);                    \
1142         else                                    \
1143           out = d;                              \
1144       }                                         \
1145     else                                        \
1146       out = d;                                  \
1147   }                                             \
1148 while (0)
1149
1150 /* Only used in search_buffer, to record the end position of the match
1151    when searching regexps and SEARCH_REGS should not be changed
1152    (i.e. Vinhibit_changing_match_data is non-nil).  */
1153 static struct re_registers search_regs_1;
1154
1155 static EMACS_INT
1156 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1157                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1158                int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1159 {
1160   ptrdiff_t len = SCHARS (string);
1161   ptrdiff_t len_byte = SBYTES (string);
1162   register ptrdiff_t i;
1163
1164   if (running_asynch_code)
1165     save_search_regs ();
1166
1167   /* Searching 0 times means don't move.  */
1168   /* Null string is found at starting position.  */
1169   if (len == 0 || n == 0)
1170     {
1171       set_search_regs (pos_byte, 0);
1172       return pos;
1173     }
1174
1175   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1176     {
1177       unsigned char *p1, *p2;
1178       ptrdiff_t s1, s2;
1179       struct re_pattern_buffer *bufp;
1180
1181       bufp = compile_pattern (string,
1182                               (NILP (Vinhibit_changing_match_data)
1183                                ? &search_regs : &search_regs_1),
1184                               trt, posix,
1185                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1186
1187       immediate_quit = 1;       /* Quit immediately if user types ^G,
1188                                    because letting this function finish
1189                                    can take too long. */
1190       QUIT;                     /* Do a pending quit right away,
1191                                    to avoid paradoxical behavior */
1192       /* Get pointers and sizes of the two strings
1193          that make up the visible portion of the buffer. */
1194
1195       p1 = BEGV_ADDR;
1196       s1 = GPT_BYTE - BEGV_BYTE;
1197       p2 = GAP_END_ADDR;
1198       s2 = ZV_BYTE - GPT_BYTE;
1199       if (s1 < 0)
1200         {
1201           p2 = p1;
1202           s2 = ZV_BYTE - BEGV_BYTE;
1203           s1 = 0;
1204         }
1205       if (s2 < 0)
1206         {
1207           s1 = ZV_BYTE - BEGV_BYTE;
1208           s2 = 0;
1209         }
1210       re_match_object = Qnil;
1211
1212       while (n < 0)
1213         {
1214           ptrdiff_t val;
1215
1216           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1217                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1218                              (NILP (Vinhibit_changing_match_data)
1219                               ? &search_regs : &search_regs_1),
1220                              /* Don't allow match past current point */
1221                              pos_byte - BEGV_BYTE);
1222           if (val == -2)
1223             {
1224               matcher_overflow ();
1225             }
1226           if (val >= 0)
1227             {
1228               if (NILP (Vinhibit_changing_match_data))
1229                 {
1230                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1231                   for (i = 0; i < search_regs.num_regs; i++)
1232                     if (search_regs.start[i] >= 0)
1233                       {
1234                         search_regs.start[i]
1235                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1236                         search_regs.end[i]
1237                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1238                       }
1239                   XSETBUFFER (last_thing_searched, current_buffer);
1240                   /* Set pos to the new position. */
1241                   pos = search_regs.start[0];
1242                 }
1243               else
1244                 {
1245                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1246                   /* Set pos to the new position.  */
1247                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1248                 }
1249             }
1250           else
1251             {
1252               immediate_quit = 0;
1253               return (n);
1254             }
1255           n++;
1256         }
1257       while (n > 0)
1258         {
1259           ptrdiff_t val;
1260
1261           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1262                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1263                              (NILP (Vinhibit_changing_match_data)
1264                               ? &search_regs : &search_regs_1),
1265                              lim_byte - BEGV_BYTE);
1266           if (val == -2)
1267             {
1268               matcher_overflow ();
1269             }
1270           if (val >= 0)
1271             {
1272               if (NILP (Vinhibit_changing_match_data))
1273                 {
1274                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1275                   for (i = 0; i < search_regs.num_regs; i++)
1276                     if (search_regs.start[i] >= 0)
1277                       {
1278                         search_regs.start[i]
1279                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1280                         search_regs.end[i]
1281                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1282                       }
1283                   XSETBUFFER (last_thing_searched, current_buffer);
1284                   pos = search_regs.end[0];
1285                 }
1286               else
1287                 {
1288                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1289                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1290                 }
1291             }
1292           else
1293             {
1294               immediate_quit = 0;
1295               return (0 - n);
1296             }
1297           n--;
1298         }
1299       immediate_quit = 0;
1300       return (pos);
1301     }
1302   else                          /* non-RE case */
1303     {
1304       unsigned char *raw_pattern, *pat;
1305       ptrdiff_t raw_pattern_size;
1306       ptrdiff_t raw_pattern_size_byte;
1307       unsigned char *patbuf;
1308       bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1309       unsigned char *base_pat;
1310       /* Set to positive if we find a non-ASCII char that need
1311          translation.  Otherwise set to zero later.  */
1312       int char_base = -1;
1313       bool boyer_moore_ok = 1;
1314       USE_SAFE_ALLOCA;
1315
1316       /* MULTIBYTE says whether the text to be searched is multibyte.
1317          We must convert PATTERN to match that, or we will not really
1318          find things right.  */
1319
1320       if (multibyte == STRING_MULTIBYTE (string))
1321         {
1322           raw_pattern = SDATA (string);
1323           raw_pattern_size = SCHARS (string);
1324           raw_pattern_size_byte = SBYTES (string);
1325         }
1326       else if (multibyte)
1327         {
1328           raw_pattern_size = SCHARS (string);
1329           raw_pattern_size_byte
1330             = count_size_as_multibyte (SDATA (string),
1331                                        raw_pattern_size);
1332           raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1333           copy_text (SDATA (string), raw_pattern,
1334                      SCHARS (string), 0, 1);
1335         }
1336       else
1337         {
1338           /* Converting multibyte to single-byte.
1339
1340              ??? Perhaps this conversion should be done in a special way
1341              by subtracting nonascii-insert-offset from each non-ASCII char,
1342              so that only the multibyte chars which really correspond to
1343              the chosen single-byte character set can possibly match.  */
1344           raw_pattern_size = SCHARS (string);
1345           raw_pattern_size_byte = SCHARS (string);
1346           raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1347           copy_text (SDATA (string), raw_pattern,
1348                      SBYTES (string), 1, 0);
1349         }
1350
1351       /* Copy and optionally translate the pattern.  */
1352       len = raw_pattern_size;
1353       len_byte = raw_pattern_size_byte;
1354       SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1355       pat = patbuf;
1356       base_pat = raw_pattern;
1357       if (multibyte)
1358         {
1359           /* Fill patbuf by translated characters in STRING while
1360              checking if we can use boyer-moore search.  If TRT is
1361              non-nil, we can use boyer-moore search only if TRT can be
1362              represented by the byte array of 256 elements.  For that,
1363              all non-ASCII case-equivalents of all case-sensitive
1364              characters in STRING must belong to the same character
1365              group (two characters belong to the same group iff their
1366              multibyte forms are the same except for the last byte;
1367              i.e. every 64 characters form a group; U+0000..U+003F,
1368              U+0040..U+007F, U+0080..U+00BF, ...).  */
1369
1370           while (--len >= 0)
1371             {
1372               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1373               int c, translated, inverse;
1374               int in_charlen, charlen;
1375
1376               /* If we got here and the RE flag is set, it's because we're
1377                  dealing with a regexp known to be trivial, so the backslash
1378                  just quotes the next character.  */
1379               if (RE && *base_pat == '\\')
1380                 {
1381                   len--;
1382                   raw_pattern_size--;
1383                   len_byte--;
1384                   base_pat++;
1385                 }
1386
1387               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1388
1389               if (NILP (trt))
1390                 {
1391                   str = base_pat;
1392                   charlen = in_charlen;
1393                 }
1394               else
1395                 {
1396                   /* Translate the character.  */
1397                   TRANSLATE (translated, trt, c);
1398                   charlen = CHAR_STRING (translated, str_base);
1399                   str = str_base;
1400
1401                   /* Check if C has any other case-equivalents.  */
1402                   TRANSLATE (inverse, inverse_trt, c);
1403                   /* If so, check if we can use boyer-moore.  */
1404                   if (c != inverse && boyer_moore_ok)
1405                     {
1406                       /* Check if all equivalents belong to the same
1407                          group of characters.  Note that the check of C
1408                          itself is done by the last iteration.  */
1409                       int this_char_base = -1;
1410
1411                       while (boyer_moore_ok)
1412                         {
1413                           if (ASCII_CHAR_P (inverse))
1414                             {
1415                               if (this_char_base > 0)
1416                                 boyer_moore_ok = 0;
1417                               else
1418                                 this_char_base = 0;
1419                             }
1420                           else if (CHAR_BYTE8_P (inverse))
1421                             /* Boyer-moore search can't handle a
1422                                translation of an eight-bit
1423                                character.  */
1424                             boyer_moore_ok = 0;
1425                           else if (this_char_base < 0)
1426                             {
1427                               this_char_base = inverse & ~0x3F;
1428                               if (char_base < 0)
1429                                 char_base = this_char_base;
1430                               else if (this_char_base != char_base)
1431                                 boyer_moore_ok = 0;
1432                             }
1433                           else if ((inverse & ~0x3F) != this_char_base)
1434                             boyer_moore_ok = 0;
1435                           if (c == inverse)
1436                             break;
1437                           TRANSLATE (inverse, inverse_trt, inverse);
1438                         }
1439                     }
1440                 }
1441
1442               /* Store this character into the translated pattern.  */
1443               memcpy (pat, str, charlen);
1444               pat += charlen;
1445               base_pat += in_charlen;
1446               len_byte -= in_charlen;
1447             }
1448
1449           /* If char_base is still negative we didn't find any translated
1450              non-ASCII characters.  */
1451           if (char_base < 0)
1452             char_base = 0;
1453         }
1454       else
1455         {
1456           /* Unibyte buffer.  */
1457           char_base = 0;
1458           while (--len >= 0)
1459             {
1460               int c, translated, inverse;
1461
1462               /* If we got here and the RE flag is set, it's because we're
1463                  dealing with a regexp known to be trivial, so the backslash
1464                  just quotes the next character.  */
1465               if (RE && *base_pat == '\\')
1466                 {
1467                   len--;
1468                   raw_pattern_size--;
1469                   base_pat++;
1470                 }
1471               c = *base_pat++;
1472               TRANSLATE (translated, trt, c);
1473               *pat++ = translated;
1474               /* Check that none of C's equivalents violates the
1475                  assumptions of boyer_moore.  */
1476               TRANSLATE (inverse, inverse_trt, c);
1477               while (1)
1478                 {
1479                   if (inverse >= 0200)
1480                     {
1481                       boyer_moore_ok = 0;
1482                       break;
1483                     }
1484                   if (c == inverse)
1485                     break;
1486                   TRANSLATE (inverse, inverse_trt, inverse);
1487                 }
1488             }
1489         }
1490
1491       len_byte = pat - patbuf;
1492       pat = base_pat = patbuf;
1493
1494       EMACS_INT result
1495         = (boyer_moore_ok
1496            ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1497                           pos_byte, lim_byte,
1498                           char_base)
1499            : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1500                             pos, pos_byte, lim, lim_byte));
1501       SAFE_FREE ();
1502       return result;
1503     }
1504 }
1505 \f
1506 /* Do a simple string search N times for the string PAT,
1507    whose length is LEN/LEN_BYTE,
1508    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1509    TRT is the translation table.
1510
1511    Return the character position where the match is found.
1512    Otherwise, if M matches remained to be found, return -M.
1513
1514    This kind of search works regardless of what is in PAT and
1515    regardless of what is in TRT.  It is used in cases where
1516    boyer_moore cannot work.  */
1517
1518 static EMACS_INT
1519 simple_search (EMACS_INT n, unsigned char *pat,
1520                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1521                ptrdiff_t pos, ptrdiff_t pos_byte,
1522                ptrdiff_t lim, ptrdiff_t lim_byte)
1523 {
1524   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1525   bool forward = n > 0;
1526   /* Number of buffer bytes matched.  Note that this may be different
1527      from len_byte in a multibyte buffer.  */
1528   ptrdiff_t match_byte = PTRDIFF_MIN;
1529
1530   if (lim > pos && multibyte)
1531     while (n > 0)
1532       {
1533         while (1)
1534           {
1535             /* Try matching at position POS.  */
1536             ptrdiff_t this_pos = pos;
1537             ptrdiff_t this_pos_byte = pos_byte;
1538             ptrdiff_t this_len = len;
1539             unsigned char *p = pat;
1540             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1541               goto stop;
1542
1543             while (this_len > 0)
1544               {
1545                 int charlen, buf_charlen;
1546                 int pat_ch, buf_ch;
1547
1548                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1549                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1550                                                  buf_charlen);
1551                 TRANSLATE (buf_ch, trt, buf_ch);
1552
1553                 if (buf_ch != pat_ch)
1554                   break;
1555
1556                 this_len--;
1557                 p += charlen;
1558
1559                 this_pos_byte += buf_charlen;
1560                 this_pos++;
1561               }
1562
1563             if (this_len == 0)
1564               {
1565                 match_byte = this_pos_byte - pos_byte;
1566                 pos += len;
1567                 pos_byte += match_byte;
1568                 break;
1569               }
1570
1571             INC_BOTH (pos, pos_byte);
1572           }
1573
1574         n--;
1575       }
1576   else if (lim > pos)
1577     while (n > 0)
1578       {
1579         while (1)
1580           {
1581             /* Try matching at position POS.  */
1582             ptrdiff_t this_pos = pos;
1583             ptrdiff_t this_len = len;
1584             unsigned char *p = pat;
1585
1586             if (pos + len > lim)
1587               goto stop;
1588
1589             while (this_len > 0)
1590               {
1591                 int pat_ch = *p++;
1592                 int buf_ch = FETCH_BYTE (this_pos);
1593                 TRANSLATE (buf_ch, trt, buf_ch);
1594
1595                 if (buf_ch != pat_ch)
1596                   break;
1597
1598                 this_len--;
1599                 this_pos++;
1600               }
1601
1602             if (this_len == 0)
1603               {
1604                 match_byte = len;
1605                 pos += len;
1606                 break;
1607               }
1608
1609             pos++;
1610           }
1611
1612         n--;
1613       }
1614   /* Backwards search.  */
1615   else if (lim < pos && multibyte)
1616     while (n < 0)
1617       {
1618         while (1)
1619           {
1620             /* Try matching at position POS.  */
1621             ptrdiff_t this_pos = pos;
1622             ptrdiff_t this_pos_byte = pos_byte;
1623             ptrdiff_t this_len = len;
1624             const unsigned char *p = pat + len_byte;
1625
1626             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1627               goto stop;
1628
1629             while (this_len > 0)
1630               {
1631                 int pat_ch, buf_ch;
1632
1633                 DEC_BOTH (this_pos, this_pos_byte);
1634                 PREV_CHAR_BOUNDARY (p, pat);
1635                 pat_ch = STRING_CHAR (p);
1636                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1637                 TRANSLATE (buf_ch, trt, buf_ch);
1638
1639                 if (buf_ch != pat_ch)
1640                   break;
1641
1642                 this_len--;
1643               }
1644
1645             if (this_len == 0)
1646               {
1647                 match_byte = pos_byte - this_pos_byte;
1648                 pos = this_pos;
1649                 pos_byte = this_pos_byte;
1650                 break;
1651               }
1652
1653             DEC_BOTH (pos, pos_byte);
1654           }
1655
1656         n++;
1657       }
1658   else if (lim < pos)
1659     while (n < 0)
1660       {
1661         while (1)
1662           {
1663             /* Try matching at position POS.  */
1664             ptrdiff_t this_pos = pos - len;
1665             ptrdiff_t this_len = len;
1666             unsigned char *p = pat;
1667
1668             if (this_pos < lim)
1669               goto stop;
1670
1671             while (this_len > 0)
1672               {
1673                 int pat_ch = *p++;
1674                 int buf_ch = FETCH_BYTE (this_pos);
1675                 TRANSLATE (buf_ch, trt, buf_ch);
1676
1677                 if (buf_ch != pat_ch)
1678                   break;
1679                 this_len--;
1680                 this_pos++;
1681               }
1682
1683             if (this_len == 0)
1684               {
1685                 match_byte = len;
1686                 pos -= len;
1687                 break;
1688               }
1689
1690             pos--;
1691           }
1692
1693         n++;
1694       }
1695
1696  stop:
1697   if (n == 0)
1698     {
1699       eassert (match_byte != PTRDIFF_MIN);
1700       if (forward)
1701         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1702       else
1703         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1704
1705       return pos;
1706     }
1707   else if (n > 0)
1708     return -n;
1709   else
1710     return n;
1711 }
1712 \f
1713 /* Do Boyer-Moore search N times for the string BASE_PAT,
1714    whose length is LEN_BYTE,
1715    from buffer position POS_BYTE until LIM_BYTE.
1716    DIRECTION says which direction we search in.
1717    TRT and INVERSE_TRT are translation tables.
1718    Characters in PAT are already translated by TRT.
1719
1720    This kind of search works if all the characters in BASE_PAT that
1721    have nontrivial translation are the same aside from the last byte.
1722    This makes it possible to translate just the last byte of a
1723    character, and do so after just a simple test of the context.
1724    CHAR_BASE is nonzero if there is such a non-ASCII character.
1725
1726    If that criterion is not satisfied, do not call this function.  */
1727
1728 static EMACS_INT
1729 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1730              ptrdiff_t len_byte,
1731              Lisp_Object trt, Lisp_Object inverse_trt,
1732              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1733              int char_base)
1734 {
1735   int direction = ((n > 0) ? 1 : -1);
1736   register ptrdiff_t dirlen;
1737   ptrdiff_t limit;
1738   int stride_for_teases = 0;
1739   int BM_tab[0400];
1740   register unsigned char *cursor, *p_limit;
1741   register ptrdiff_t i;
1742   register int j;
1743   unsigned char *pat, *pat_end;
1744   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1745
1746   unsigned char simple_translate[0400];
1747   /* These are set to the preceding bytes of a byte to be translated
1748      if char_base is nonzero.  As the maximum byte length of a
1749      multibyte character is 5, we have to check at most four previous
1750      bytes.  */
1751   int translate_prev_byte1 = 0;
1752   int translate_prev_byte2 = 0;
1753   int translate_prev_byte3 = 0;
1754
1755   /* The general approach is that we are going to maintain that we know
1756      the first (closest to the present position, in whatever direction
1757      we're searching) character that could possibly be the last
1758      (furthest from present position) character of a valid match.  We
1759      advance the state of our knowledge by looking at that character
1760      and seeing whether it indeed matches the last character of the
1761      pattern.  If it does, we take a closer look.  If it does not, we
1762      move our pointer (to putative last characters) as far as is
1763      logically possible.  This amount of movement, which I call a
1764      stride, will be the length of the pattern if the actual character
1765      appears nowhere in the pattern, otherwise it will be the distance
1766      from the last occurrence of that character to the end of the
1767      pattern.  If the amount is zero we have a possible match.  */
1768
1769   /* Here we make a "mickey mouse" BM table.  The stride of the search
1770      is determined only by the last character of the putative match.
1771      If that character does not match, we will stride the proper
1772      distance to propose a match that superimposes it on the last
1773      instance of a character that matches it (per trt), or misses
1774      it entirely if there is none. */
1775
1776   dirlen = len_byte * direction;
1777
1778   /* Record position after the end of the pattern.  */
1779   pat_end = base_pat + len_byte;
1780   /* BASE_PAT points to a character that we start scanning from.
1781      It is the first character in a forward search,
1782      the last character in a backward search.  */
1783   if (direction < 0)
1784     base_pat = pat_end - 1;
1785
1786   /* A character that does not appear in the pattern induces a
1787      stride equal to the pattern length.  */
1788   for (i = 0; i < 0400; i++)
1789     BM_tab[i] = dirlen;
1790
1791   /* We use this for translation, instead of TRT itself.
1792      We fill this in to handle the characters that actually
1793      occur in the pattern.  Others don't matter anyway!  */
1794   for (i = 0; i < 0400; i++)
1795     simple_translate[i] = i;
1796
1797   if (char_base)
1798     {
1799       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1800          byte following them are the target of translation.  */
1801       unsigned char str[MAX_MULTIBYTE_LENGTH];
1802       int cblen = CHAR_STRING (char_base, str);
1803
1804       translate_prev_byte1 = str[cblen - 2];
1805       if (cblen > 2)
1806         {
1807           translate_prev_byte2 = str[cblen - 3];
1808           if (cblen > 3)
1809             translate_prev_byte3 = str[cblen - 4];
1810         }
1811     }
1812
1813   i = 0;
1814   while (i != dirlen)
1815     {
1816       unsigned char *ptr = base_pat + i;
1817       i += direction;
1818       if (! NILP (trt))
1819         {
1820           /* If the byte currently looking at is the last of a
1821              character to check case-equivalents, set CH to that
1822              character.  An ASCII character and a non-ASCII character
1823              matching with CHAR_BASE are to be checked.  */
1824           int ch = -1;
1825
1826           if (ASCII_CHAR_P (*ptr) || ! multibyte)
1827             ch = *ptr;
1828           else if (char_base
1829                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1830             {
1831               unsigned char *charstart = ptr - 1;
1832
1833               while (! (CHAR_HEAD_P (*charstart)))
1834                 charstart--;
1835               ch = STRING_CHAR (charstart);
1836               if (char_base != (ch & ~0x3F))
1837                 ch = -1;
1838             }
1839
1840           if (ch >= 0200 && multibyte)
1841             j = (ch & 0x3F) | 0200;
1842           else
1843             j = *ptr;
1844
1845           if (i == dirlen)
1846             stride_for_teases = BM_tab[j];
1847
1848           BM_tab[j] = dirlen - i;
1849           /* A translation table is accompanied by its inverse -- see
1850              comment following downcase_table for details.  */
1851           if (ch >= 0)
1852             {
1853               int starting_ch = ch;
1854               int starting_j = j;
1855
1856               while (1)
1857                 {
1858                   TRANSLATE (ch, inverse_trt, ch);
1859                   if (ch >= 0200 && multibyte)
1860                     j = (ch & 0x3F) | 0200;
1861                   else
1862                     j = ch;
1863
1864                   /* For all the characters that map into CH,
1865                      set up simple_translate to map the last byte
1866                      into STARTING_J.  */
1867                   simple_translate[j] = starting_j;
1868                   if (ch == starting_ch)
1869                     break;
1870                   BM_tab[j] = dirlen - i;
1871                 }
1872             }
1873         }
1874       else
1875         {
1876           j = *ptr;
1877
1878           if (i == dirlen)
1879             stride_for_teases = BM_tab[j];
1880           BM_tab[j] = dirlen - i;
1881         }
1882       /* stride_for_teases tells how much to stride if we get a
1883          match on the far character but are subsequently
1884          disappointed, by recording what the stride would have been
1885          for that character if the last character had been
1886          different.  */
1887     }
1888   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1889   /* loop invariant - POS_BYTE points at where last char (first
1890      char if reverse) of pattern would align in a possible match.  */
1891   while (n != 0)
1892     {
1893       ptrdiff_t tail_end;
1894       unsigned char *tail_end_ptr;
1895
1896       /* It's been reported that some (broken) compiler thinks that
1897          Boolean expressions in an arithmetic context are unsigned.
1898          Using an explicit ?1:0 prevents this.  */
1899       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1900           < 0)
1901         return (n * (0 - direction));
1902       /* First we do the part we can by pointers (maybe nothing) */
1903       QUIT;
1904       pat = base_pat;
1905       limit = pos_byte - dirlen + direction;
1906       if (direction > 0)
1907         {
1908           limit = BUFFER_CEILING_OF (limit);
1909           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1910              can take on without hitting edge of buffer or the gap.  */
1911           limit = min (limit, pos_byte + 20000);
1912           limit = min (limit, lim_byte - 1);
1913         }
1914       else
1915         {
1916           limit = BUFFER_FLOOR_OF (limit);
1917           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1918              can take on without hitting edge of buffer or the gap.  */
1919           limit = max (limit, pos_byte - 20000);
1920           limit = max (limit, lim_byte);
1921         }
1922       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1923       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1924
1925       if ((limit - pos_byte) * direction > 20)
1926         {
1927           unsigned char *p2;
1928
1929           p_limit = BYTE_POS_ADDR (limit);
1930           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1931           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1932           while (1)             /* use one cursor setting as long as i can */
1933             {
1934               if (direction > 0) /* worth duplicating */
1935                 {
1936                   while (cursor <= p_limit)
1937                     {
1938                       if (BM_tab[*cursor] == 0)
1939                         goto hit;
1940                       cursor += BM_tab[*cursor];
1941                     }
1942                 }
1943               else
1944                 {
1945                   while (cursor >= p_limit)
1946                     {
1947                       if (BM_tab[*cursor] == 0)
1948                         goto hit;
1949                       cursor += BM_tab[*cursor];
1950                     }
1951                 }
1952               /* If you are here, cursor is beyond the end of the
1953                  searched region.  You fail to match within the
1954                  permitted region and would otherwise try a character
1955                  beyond that region.  */
1956               break;
1957
1958             hit:
1959               i = dirlen - direction;
1960               if (! NILP (trt))
1961                 {
1962                   while ((i -= direction) + direction != 0)
1963                     {
1964                       int ch;
1965                       cursor -= direction;
1966                       /* Translate only the last byte of a character.  */
1967                       if (! multibyte
1968                           || ((cursor == tail_end_ptr
1969                                || CHAR_HEAD_P (cursor[1]))
1970                               && (CHAR_HEAD_P (cursor[0])
1971                                   /* Check if this is the last byte of
1972                                      a translatable character.  */
1973                                   || (translate_prev_byte1 == cursor[-1]
1974                                       && (CHAR_HEAD_P (translate_prev_byte1)
1975                                           || (translate_prev_byte2 == cursor[-2]
1976                                               && (CHAR_HEAD_P (translate_prev_byte2)
1977                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1978                         ch = simple_translate[*cursor];
1979                       else
1980                         ch = *cursor;
1981                       if (pat[i] != ch)
1982                         break;
1983                     }
1984                 }
1985               else
1986                 {
1987                   while ((i -= direction) + direction != 0)
1988                     {
1989                       cursor -= direction;
1990                       if (pat[i] != *cursor)
1991                         break;
1992                     }
1993                 }
1994               cursor += dirlen - i - direction; /* fix cursor */
1995               if (i + direction == 0)
1996                 {
1997                   ptrdiff_t position, start, end;
1998
1999                   cursor -= direction;
2000
2001                   position = pos_byte + cursor - p2 + ((direction > 0)
2002                                                        ? 1 - len_byte : 0);
2003                   set_search_regs (position, len_byte);
2004
2005                   if (NILP (Vinhibit_changing_match_data))
2006                     {
2007                       start = search_regs.start[0];
2008                       end = search_regs.end[0];
2009                     }
2010                   else
2011                     /* If Vinhibit_changing_match_data is non-nil,
2012                        search_regs will not be changed.  So let's
2013                        compute start and end here.  */
2014                     {
2015                       start = BYTE_TO_CHAR (position);
2016                       end = BYTE_TO_CHAR (position + len_byte);
2017                     }
2018
2019                   if ((n -= direction) != 0)
2020                     cursor += dirlen; /* to resume search */
2021                   else
2022                     return direction > 0 ? end : start;
2023                 }
2024               else
2025                 cursor += stride_for_teases; /* <sigh> we lose -  */
2026             }
2027           pos_byte += cursor - p2;
2028         }
2029       else
2030         /* Now we'll pick up a clump that has to be done the hard
2031            way because it covers a discontinuity.  */
2032         {
2033           limit = ((direction > 0)
2034                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2035                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2036           limit = ((direction > 0)
2037                    ? min (limit + len_byte, lim_byte - 1)
2038                    : max (limit - len_byte, lim_byte));
2039           /* LIMIT is now the last value POS_BYTE can have
2040              and still be valid for a possible match.  */
2041           while (1)
2042             {
2043               /* This loop can be coded for space rather than
2044                  speed because it will usually run only once.
2045                  (the reach is at most len + 21, and typically
2046                  does not exceed len).  */
2047               while ((limit - pos_byte) * direction >= 0)
2048                 {
2049                   int ch = FETCH_BYTE (pos_byte);
2050                   if (BM_tab[ch] == 0)
2051                     goto hit2;
2052                   pos_byte += BM_tab[ch];
2053                 }
2054               break;    /* ran off the end */
2055
2056             hit2:
2057               /* Found what might be a match.  */
2058               i = dirlen - direction;
2059               while ((i -= direction) + direction != 0)
2060                 {
2061                   int ch;
2062                   unsigned char *ptr;
2063                   pos_byte -= direction;
2064                   ptr = BYTE_POS_ADDR (pos_byte);
2065                   /* Translate only the last byte of a character.  */
2066                   if (! multibyte
2067                       || ((ptr == tail_end_ptr
2068                            || CHAR_HEAD_P (ptr[1]))
2069                           && (CHAR_HEAD_P (ptr[0])
2070                               /* Check if this is the last byte of a
2071                                  translatable character.  */
2072                               || (translate_prev_byte1 == ptr[-1]
2073                                   && (CHAR_HEAD_P (translate_prev_byte1)
2074                                       || (translate_prev_byte2 == ptr[-2]
2075                                           && (CHAR_HEAD_P (translate_prev_byte2)
2076                                               || translate_prev_byte3 == ptr[-3])))))))
2077                     ch = simple_translate[*ptr];
2078                   else
2079                     ch = *ptr;
2080                   if (pat[i] != ch)
2081                     break;
2082                 }
2083               /* Above loop has moved POS_BYTE part or all the way
2084                  back to the first pos (last pos if reverse).
2085                  Set it once again at the last (first if reverse) char.  */
2086               pos_byte += dirlen - i - direction;
2087               if (i + direction == 0)
2088                 {
2089                   ptrdiff_t position, start, end;
2090                   pos_byte -= direction;
2091
2092                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2093                   set_search_regs (position, len_byte);
2094
2095                   if (NILP (Vinhibit_changing_match_data))
2096                     {
2097                       start = search_regs.start[0];
2098                       end = search_regs.end[0];
2099                     }
2100                   else
2101                     /* If Vinhibit_changing_match_data is non-nil,
2102                        search_regs will not be changed.  So let's
2103                        compute start and end here.  */
2104                     {
2105                       start = BYTE_TO_CHAR (position);
2106                       end = BYTE_TO_CHAR (position + len_byte);
2107                     }
2108
2109                   if ((n -= direction) != 0)
2110                     pos_byte += dirlen; /* to resume search */
2111                   else
2112                     return direction > 0 ? end : start;
2113                 }
2114               else
2115                 pos_byte += stride_for_teases;
2116             }
2117           }
2118       /* We have done one clump.  Can we continue? */
2119       if ((lim_byte - pos_byte) * direction < 0)
2120         return ((0 - n) * direction);
2121     }
2122   return BYTE_TO_CHAR (pos_byte);
2123 }
2124
2125 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2126    for the overall match just found in the current buffer.
2127    Also clear out the match data for registers 1 and up.  */
2128
2129 static void
2130 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2131 {
2132   ptrdiff_t i;
2133
2134   if (!NILP (Vinhibit_changing_match_data))
2135     return;
2136
2137   /* Make sure we have registers in which to store
2138      the match position.  */
2139   if (search_regs.num_regs == 0)
2140     {
2141       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2142       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2143       search_regs.num_regs = 2;
2144     }
2145
2146   /* Clear out the other registers.  */
2147   for (i = 1; i < search_regs.num_regs; i++)
2148     {
2149       search_regs.start[i] = -1;
2150       search_regs.end[i] = -1;
2151     }
2152
2153   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2154   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2155   XSETBUFFER (last_thing_searched, current_buffer);
2156 }
2157 \f
2158 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2159        "MSearch backward: ",
2160        doc: /* Search backward from point for STRING.
2161 Set point to the beginning of the occurrence found, and return point.
2162 An optional second argument bounds the search; it is a buffer position.
2163 The match found must not extend before that position.
2164 Optional third argument, if t, means if fail just return nil (no error).
2165  If not nil and not t, position at limit of search and return nil.
2166 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2167  successive occurrences.  If COUNT is negative, search forward,
2168  instead of backward, for -COUNT occurrences.
2169
2170 Search case-sensitivity is determined by the value of the variable
2171 `case-fold-search', which see.
2172
2173 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2174   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2175 {
2176   return search_command (string, bound, noerror, count, -1, 0, 0);
2177 }
2178
2179 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2180        doc: /* Search forward from point for STRING.
2181 Set point to the end of the occurrence found, and return point.
2182 An optional second argument bounds the search; it is a buffer position.
2183 The match found must not extend after that position.  A value of nil is
2184   equivalent to (point-max).
2185 Optional third argument, if t, means if fail just return nil (no error).
2186   If not nil and not t, move to limit of search and return nil.
2187 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2188  successive occurrences.  If COUNT is negative, search backward,
2189  instead of forward, for -COUNT occurrences.
2190
2191 Search case-sensitivity is determined by the value of the variable
2192 `case-fold-search', which see.
2193
2194 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2195   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2196 {
2197   return search_command (string, bound, noerror, count, 1, 0, 0);
2198 }
2199
2200 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2201        "sRE search backward: ",
2202        doc: /* Search backward from point for match for regular expression REGEXP.
2203 Set point to the beginning of the match, and return point.
2204 The match found is the one starting last in the buffer
2205 and yet ending before the origin of the search.
2206 An optional second argument bounds the search; it is a buffer position.
2207 The match found must start at or after that position.
2208 Optional third argument, if t, means if fail just return nil (no error).
2209   If not nil and not t, move to limit of search and return nil.
2210 Optional fourth argument is repeat count--search for successive occurrences.
2211
2212 Search case-sensitivity is determined by the value of the variable
2213 `case-fold-search', which see.
2214
2215 See also the functions `match-beginning', `match-end', `match-string',
2216 and `replace-match'.  */)
2217   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2218 {
2219   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2220 }
2221
2222 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2223        "sRE search: ",
2224        doc: /* Search forward from point for regular expression REGEXP.
2225 Set point to the end of the occurrence found, and return point.
2226 An optional second argument bounds the search; it is a buffer position.
2227 The match found must not extend after that position.
2228 Optional third argument, if t, means if fail just return nil (no error).
2229   If not nil and not t, move to limit of search and return nil.
2230 Optional fourth argument is repeat count--search for successive occurrences.
2231
2232 Search case-sensitivity is determined by the value of the variable
2233 `case-fold-search', which see.
2234
2235 See also the functions `match-beginning', `match-end', `match-string',
2236 and `replace-match'.  */)
2237   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2238 {
2239   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2240 }
2241
2242 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2243        "sPosix search backward: ",
2244        doc: /* Search backward from point for match for regular expression REGEXP.
2245 Find the longest match in accord with Posix regular expression rules.
2246 Set point to the beginning of the match, and return point.
2247 The match found is the one starting last in the buffer
2248 and yet ending before the origin of the search.
2249 An optional second argument bounds the search; it is a buffer position.
2250 The match found must start at or after that position.
2251 Optional third argument, if t, means if fail just return nil (no error).
2252   If not nil and not t, move to limit of search and return nil.
2253 Optional fourth argument is repeat count--search for successive occurrences.
2254
2255 Search case-sensitivity is determined by the value of the variable
2256 `case-fold-search', which see.
2257
2258 See also the functions `match-beginning', `match-end', `match-string',
2259 and `replace-match'.  */)
2260   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2261 {
2262   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2263 }
2264
2265 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2266        "sPosix search: ",
2267        doc: /* Search forward from point for regular expression REGEXP.
2268 Find the longest match in accord with Posix regular expression rules.
2269 Set point to the end of the occurrence found, and return point.
2270 An optional second argument bounds the search; it is a buffer position.
2271 The match found must not extend after that position.
2272 Optional third argument, if t, means if fail just return nil (no error).
2273   If not nil and not t, move to limit of search and return nil.
2274 Optional fourth argument is repeat count--search for successive occurrences.
2275
2276 Search case-sensitivity is determined by the value of the variable
2277 `case-fold-search', which see.
2278
2279 See also the functions `match-beginning', `match-end', `match-string',
2280 and `replace-match'.  */)
2281   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2282 {
2283   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2284 }
2285 \f
2286 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2287        doc: /* Replace text matched by last search with NEWTEXT.
2288 Leave point at the end of the replacement text.
2289
2290 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2291 the replacement text.  Otherwise, maybe capitalize the whole text, or
2292 maybe just word initials, based on the replaced text.  If the replaced
2293 text has only capital letters and has at least one multiletter word,
2294 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2295 in the replaced text, capitalize each word in NEWTEXT.
2296
2297 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2298 Otherwise treat `\\' as special:
2299   `\\&' in NEWTEXT means substitute original matched text.
2300   `\\N' means substitute what matched the Nth `\\(...\\)'.
2301        If Nth parens didn't match, substitute nothing.
2302   `\\\\' means insert one `\\'.
2303   `\\?' is treated literally
2304        (for compatibility with `query-replace-regexp').
2305   Any other character following `\\' signals an error.
2306 Case conversion does not apply to these substitutions.
2307
2308 If optional fourth argument STRING is non-nil, it should be a string
2309 to act on; this should be the string on which the previous match was
2310 done via `string-match'.  In this case, `replace-match' creates and
2311 returns a new string, made by copying STRING and replacing the part of
2312 STRING that was matched (the original STRING itself is not altered).
2313
2314 The optional fifth argument SUBEXP specifies a subexpression;
2315 it says to replace just that subexpression with NEWTEXT,
2316 rather than replacing the entire matched text.
2317 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2318 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2319 NEWTEXT in place of subexp N.
2320 This is useful only after a regular expression search or match,
2321 since only regular expressions have distinguished subexpressions.  */)
2322   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2323 {
2324   enum { nochange, all_caps, cap_initial } case_action;
2325   ptrdiff_t pos, pos_byte;
2326   bool some_multiletter_word;
2327   bool some_lowercase;
2328   bool some_uppercase;
2329   bool some_nonuppercase_initial;
2330   int c, prevc;
2331   ptrdiff_t sub;
2332   ptrdiff_t opoint, newpoint;
2333
2334   CHECK_STRING (newtext);
2335
2336   if (! NILP (string))
2337     CHECK_STRING (string);
2338
2339   case_action = nochange;       /* We tried an initialization */
2340                                 /* but some C compilers blew it */
2341
2342   if (search_regs.num_regs <= 0)
2343     error ("`replace-match' called before any match found");
2344
2345   if (NILP (subexp))
2346     sub = 0;
2347   else
2348     {
2349       CHECK_NUMBER (subexp);
2350       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2351         args_out_of_range (subexp, make_number (search_regs.num_regs));
2352       sub = XINT (subexp);
2353     }
2354
2355   if (NILP (string))
2356     {
2357       if (search_regs.start[sub] < BEGV
2358           || search_regs.start[sub] > search_regs.end[sub]
2359           || search_regs.end[sub] > ZV)
2360         args_out_of_range (make_number (search_regs.start[sub]),
2361                            make_number (search_regs.end[sub]));
2362     }
2363   else
2364     {
2365       if (search_regs.start[sub] < 0
2366           || search_regs.start[sub] > search_regs.end[sub]
2367           || search_regs.end[sub] > SCHARS (string))
2368         args_out_of_range (make_number (search_regs.start[sub]),
2369                            make_number (search_regs.end[sub]));
2370     }
2371
2372   if (NILP (fixedcase))
2373     {
2374       /* Decide how to casify by examining the matched text. */
2375       ptrdiff_t last;
2376
2377       pos = search_regs.start[sub];
2378       last = search_regs.end[sub];
2379
2380       if (NILP (string))
2381         pos_byte = CHAR_TO_BYTE (pos);
2382       else
2383         pos_byte = string_char_to_byte (string, pos);
2384
2385       prevc = '\n';
2386       case_action = all_caps;
2387
2388       /* some_multiletter_word is set nonzero if any original word
2389          is more than one letter long. */
2390       some_multiletter_word = 0;
2391       some_lowercase = 0;
2392       some_nonuppercase_initial = 0;
2393       some_uppercase = 0;
2394
2395       while (pos < last)
2396         {
2397           if (NILP (string))
2398             {
2399               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2400               INC_BOTH (pos, pos_byte);
2401             }
2402           else
2403             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2404
2405           if (lowercasep (c))
2406             {
2407               /* Cannot be all caps if any original char is lower case */
2408
2409               some_lowercase = 1;
2410               if (SYNTAX (prevc) != Sword)
2411                 some_nonuppercase_initial = 1;
2412               else
2413                 some_multiletter_word = 1;
2414             }
2415           else if (uppercasep (c))
2416             {
2417               some_uppercase = 1;
2418               if (SYNTAX (prevc) != Sword)
2419                 ;
2420               else
2421                 some_multiletter_word = 1;
2422             }
2423           else
2424             {
2425               /* If the initial is a caseless word constituent,
2426                  treat that like a lowercase initial.  */
2427               if (SYNTAX (prevc) != Sword)
2428                 some_nonuppercase_initial = 1;
2429             }
2430
2431           prevc = c;
2432         }
2433
2434       /* Convert to all caps if the old text is all caps
2435          and has at least one multiletter word.  */
2436       if (! some_lowercase && some_multiletter_word)
2437         case_action = all_caps;
2438       /* Capitalize each word, if the old text has all capitalized words.  */
2439       else if (!some_nonuppercase_initial && some_multiletter_word)
2440         case_action = cap_initial;
2441       else if (!some_nonuppercase_initial && some_uppercase)
2442         /* Should x -> yz, operating on X, give Yz or YZ?
2443            We'll assume the latter.  */
2444         case_action = all_caps;
2445       else
2446         case_action = nochange;
2447     }
2448
2449   /* Do replacement in a string.  */
2450   if (!NILP (string))
2451     {
2452       Lisp_Object before, after;
2453
2454       before = Fsubstring (string, make_number (0),
2455                            make_number (search_regs.start[sub]));
2456       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2457
2458       /* Substitute parts of the match into NEWTEXT
2459          if desired.  */
2460       if (NILP (literal))
2461         {
2462           ptrdiff_t lastpos = 0;
2463           ptrdiff_t lastpos_byte = 0;
2464           /* We build up the substituted string in ACCUM.  */
2465           Lisp_Object accum;
2466           Lisp_Object middle;
2467           ptrdiff_t length = SBYTES (newtext);
2468
2469           accum = Qnil;
2470
2471           for (pos_byte = 0, pos = 0; pos_byte < length;)
2472             {
2473               ptrdiff_t substart = -1;
2474               ptrdiff_t subend = 0;
2475               bool delbackslash = 0;
2476
2477               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2478
2479               if (c == '\\')
2480                 {
2481                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2482
2483                   if (c == '&')
2484                     {
2485                       substart = search_regs.start[sub];
2486                       subend = search_regs.end[sub];
2487                     }
2488                   else if (c >= '1' && c <= '9')
2489                     {
2490                       if (c - '0' < search_regs.num_regs
2491                           && search_regs.start[c - '0'] >= 0)
2492                         {
2493                           substart = search_regs.start[c - '0'];
2494                           subend = search_regs.end[c - '0'];
2495                         }
2496                       else
2497                         {
2498                           /* If that subexp did not match,
2499                              replace \\N with nothing.  */
2500                           substart = 0;
2501                           subend = 0;
2502                         }
2503                     }
2504                   else if (c == '\\')
2505                     delbackslash = 1;
2506                   else if (c != '?')
2507                     error ("Invalid use of `\\' in replacement text");
2508                 }
2509               if (substart >= 0)
2510                 {
2511                   if (pos - 2 != lastpos)
2512                     middle = substring_both (newtext, lastpos,
2513                                              lastpos_byte,
2514                                              pos - 2, pos_byte - 2);
2515                   else
2516                     middle = Qnil;
2517                   accum = concat3 (accum, middle,
2518                                    Fsubstring (string,
2519                                                make_number (substart),
2520                                                make_number (subend)));
2521                   lastpos = pos;
2522                   lastpos_byte = pos_byte;
2523                 }
2524               else if (delbackslash)
2525                 {
2526                   middle = substring_both (newtext, lastpos,
2527                                            lastpos_byte,
2528                                            pos - 1, pos_byte - 1);
2529
2530                   accum = concat2 (accum, middle);
2531                   lastpos = pos;
2532                   lastpos_byte = pos_byte;
2533                 }
2534             }
2535
2536           if (pos != lastpos)
2537             middle = substring_both (newtext, lastpos,
2538                                      lastpos_byte,
2539                                      pos, pos_byte);
2540           else
2541             middle = Qnil;
2542
2543           newtext = concat2 (accum, middle);
2544         }
2545
2546       /* Do case substitution in NEWTEXT if desired.  */
2547       if (case_action == all_caps)
2548         newtext = Fupcase (newtext);
2549       else if (case_action == cap_initial)
2550         newtext = Fupcase_initials (newtext);
2551
2552       return concat3 (before, newtext, after);
2553     }
2554
2555   /* Record point, then move (quietly) to the start of the match.  */
2556   if (PT >= search_regs.end[sub])
2557     opoint = PT - ZV;
2558   else if (PT > search_regs.start[sub])
2559     opoint = search_regs.end[sub] - ZV;
2560   else
2561     opoint = PT;
2562
2563   /* If we want non-literal replacement,
2564      perform substitution on the replacement string.  */
2565   if (NILP (literal))
2566     {
2567       ptrdiff_t length = SBYTES (newtext);
2568       unsigned char *substed;
2569       ptrdiff_t substed_alloc_size, substed_len;
2570       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2571       bool str_multibyte = STRING_MULTIBYTE (newtext);
2572       bool really_changed = 0;
2573
2574       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2575                             ? length * 2 + 100
2576                             : STRING_BYTES_BOUND);
2577       substed = xmalloc (substed_alloc_size);
2578       substed_len = 0;
2579
2580       /* Go thru NEWTEXT, producing the actual text to insert in
2581          SUBSTED while adjusting multibyteness to that of the current
2582          buffer.  */
2583
2584       for (pos_byte = 0, pos = 0; pos_byte < length;)
2585         {
2586           unsigned char str[MAX_MULTIBYTE_LENGTH];
2587           const unsigned char *add_stuff = NULL;
2588           ptrdiff_t add_len = 0;
2589           ptrdiff_t idx = -1;
2590
2591           if (str_multibyte)
2592             {
2593               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2594               if (!buf_multibyte)
2595                 c = CHAR_TO_BYTE8 (c);
2596             }
2597           else
2598             {
2599               /* Note that we don't have to increment POS.  */
2600               c = SREF (newtext, pos_byte++);
2601               if (buf_multibyte)
2602                 MAKE_CHAR_MULTIBYTE (c);
2603             }
2604
2605           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2606              or set IDX to a match index, which means put that part
2607              of the buffer text into SUBSTED.  */
2608
2609           if (c == '\\')
2610             {
2611               really_changed = 1;
2612
2613               if (str_multibyte)
2614                 {
2615                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2616                                                       pos, pos_byte);
2617                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2618                     c = CHAR_TO_BYTE8 (c);
2619                 }
2620               else
2621                 {
2622                   c = SREF (newtext, pos_byte++);
2623                   if (buf_multibyte)
2624                     MAKE_CHAR_MULTIBYTE (c);
2625                 }
2626
2627               if (c == '&')
2628                 idx = sub;
2629               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2630                 {
2631                   if (search_regs.start[c - '0'] >= 1)
2632                     idx = c - '0';
2633                 }
2634               else if (c == '\\')
2635                 add_len = 1, add_stuff = (unsigned char *) "\\";
2636               else
2637                 {
2638                   xfree (substed);
2639                   error ("Invalid use of `\\' in replacement text");
2640                 }
2641             }
2642           else
2643             {
2644               add_len = CHAR_STRING (c, str);
2645               add_stuff = str;
2646             }
2647
2648           /* If we want to copy part of a previous match,
2649              set up ADD_STUFF and ADD_LEN to point to it.  */
2650           if (idx >= 0)
2651             {
2652               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2653               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2654               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2655                 move_gap_both (search_regs.start[idx], begbyte);
2656               add_stuff = BYTE_POS_ADDR (begbyte);
2657             }
2658
2659           /* Now the stuff we want to add to SUBSTED
2660              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2661
2662           /* Make sure SUBSTED is big enough.  */
2663           if (substed_alloc_size - substed_len < add_len)
2664             substed =
2665               xpalloc (substed, &substed_alloc_size,
2666                        add_len - (substed_alloc_size - substed_len),
2667                        STRING_BYTES_BOUND, 1);
2668
2669           /* Now add to the end of SUBSTED.  */
2670           if (add_stuff)
2671             {
2672               memcpy (substed + substed_len, add_stuff, add_len);
2673               substed_len += add_len;
2674             }
2675         }
2676
2677       if (really_changed)
2678         newtext = make_specified_string ((const char *) substed, -1,
2679                                          substed_len, buf_multibyte);
2680       xfree (substed);
2681     }
2682
2683   /* Replace the old text with the new in the cleanest possible way.  */
2684   replace_range (search_regs.start[sub], search_regs.end[sub],
2685                  newtext, 1, 0, 1);
2686   newpoint = search_regs.start[sub] + SCHARS (newtext);
2687
2688   if (case_action == all_caps)
2689     Fupcase_region (make_number (search_regs.start[sub]),
2690                     make_number (newpoint));
2691   else if (case_action == cap_initial)
2692     Fupcase_initials_region (make_number (search_regs.start[sub]),
2693                              make_number (newpoint));
2694
2695   /* Adjust search data for this change.  */
2696   {
2697     ptrdiff_t oldend = search_regs.end[sub];
2698     ptrdiff_t oldstart = search_regs.start[sub];
2699     ptrdiff_t change = newpoint - search_regs.end[sub];
2700     ptrdiff_t i;
2701
2702     for (i = 0; i < search_regs.num_regs; i++)
2703       {
2704         if (search_regs.start[i] >= oldend)
2705           search_regs.start[i] += change;
2706         else if (search_regs.start[i] > oldstart)
2707           search_regs.start[i] = oldstart;
2708         if (search_regs.end[i] >= oldend)
2709           search_regs.end[i] += change;
2710         else if (search_regs.end[i] > oldstart)
2711           search_regs.end[i] = oldstart;
2712       }
2713   }
2714
2715   /* Put point back where it was in the text.  */
2716   if (opoint <= 0)
2717     TEMP_SET_PT (opoint + ZV);
2718   else
2719     TEMP_SET_PT (opoint);
2720
2721   /* Now move point "officially" to the start of the inserted replacement.  */
2722   move_if_not_intangible (newpoint);
2723
2724   return Qnil;
2725 }
2726 \f
2727 static Lisp_Object
2728 match_limit (Lisp_Object num, bool beginningp)
2729 {
2730   EMACS_INT n;
2731
2732   CHECK_NUMBER (num);
2733   n = XINT (num);
2734   if (n < 0)
2735     args_out_of_range (num, make_number (0));
2736   if (search_regs.num_regs <= 0)
2737     error ("No match data, because no search succeeded");
2738   if (n >= search_regs.num_regs
2739       || search_regs.start[n] < 0)
2740     return Qnil;
2741   return (make_number ((beginningp) ? search_regs.start[n]
2742                                     : search_regs.end[n]));
2743 }
2744
2745 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2746        doc: /* Return position of start of text matched by last search.
2747 SUBEXP, a number, specifies which parenthesized expression in the last
2748   regexp.
2749 Value is nil if SUBEXPth pair didn't match, or there were less than
2750   SUBEXP pairs.
2751 Zero means the entire text matched by the whole regexp or whole string.  */)
2752   (Lisp_Object subexp)
2753 {
2754   return match_limit (subexp, 1);
2755 }
2756
2757 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2758        doc: /* Return position of end of text matched by last search.
2759 SUBEXP, a number, specifies which parenthesized expression in the last
2760   regexp.
2761 Value is nil if SUBEXPth pair didn't match, or there were less than
2762   SUBEXP pairs.
2763 Zero means the entire text matched by the whole regexp or whole string.  */)
2764   (Lisp_Object subexp)
2765 {
2766   return match_limit (subexp, 0);
2767 }
2768
2769 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2770        doc: /* Return a list containing all info on what the last search matched.
2771 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2772 All the elements are markers or nil (nil if the Nth pair didn't match)
2773 if the last match was on a buffer; integers or nil if a string was matched.
2774 Use `set-match-data' to reinstate the data in this list.
2775
2776 If INTEGERS (the optional first argument) is non-nil, always use
2777 integers \(rather than markers) to represent buffer positions.  In
2778 this case, and if the last match was in a buffer, the buffer will get
2779 stored as one additional element at the end of the list.
2780
2781 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2782 enough to hold all the values, and if INTEGERS is non-nil, no consing
2783 is done.
2784
2785 If optional third arg RESEAT is non-nil, any previous markers on the
2786 REUSE list will be modified to point to nowhere.
2787
2788 Return value is undefined if the last search failed.  */)
2789   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2790 {
2791   Lisp_Object tail, prev;
2792   Lisp_Object *data;
2793   ptrdiff_t i, len;
2794
2795   if (!NILP (reseat))
2796     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2797       if (MARKERP (XCAR (tail)))
2798         {
2799           unchain_marker (XMARKER (XCAR (tail)));
2800           XSETCAR (tail, Qnil);
2801         }
2802
2803   if (NILP (last_thing_searched))
2804     return Qnil;
2805
2806   prev = Qnil;
2807
2808   USE_SAFE_ALLOCA;
2809   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2810
2811   len = 0;
2812   for (i = 0; i < search_regs.num_regs; i++)
2813     {
2814       ptrdiff_t start = search_regs.start[i];
2815       if (start >= 0)
2816         {
2817           if (EQ (last_thing_searched, Qt)
2818               || ! NILP (integers))
2819             {
2820               XSETFASTINT (data[2 * i], start);
2821               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2822             }
2823           else if (BUFFERP (last_thing_searched))
2824             {
2825               data[2 * i] = Fmake_marker ();
2826               Fset_marker (data[2 * i],
2827                            make_number (start),
2828                            last_thing_searched);
2829               data[2 * i + 1] = Fmake_marker ();
2830               Fset_marker (data[2 * i + 1],
2831                            make_number (search_regs.end[i]),
2832                            last_thing_searched);
2833             }
2834           else
2835             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2836             emacs_abort ();
2837
2838           len = 2 * i + 2;
2839         }
2840       else
2841         data[2 * i] = data[2 * i + 1] = Qnil;
2842     }
2843
2844   if (BUFFERP (last_thing_searched) && !NILP (integers))
2845     {
2846       data[len] = last_thing_searched;
2847       len++;
2848     }
2849
2850   /* If REUSE is not usable, cons up the values and return them.  */
2851   if (! CONSP (reuse))
2852     reuse = Flist (len, data);
2853   else
2854     {
2855       /* If REUSE is a list, store as many value elements as will fit
2856          into the elements of REUSE.  */
2857       for (i = 0, tail = reuse; CONSP (tail);
2858            i++, tail = XCDR (tail))
2859         {
2860           if (i < len)
2861             XSETCAR (tail, data[i]);
2862           else
2863             XSETCAR (tail, Qnil);
2864           prev = tail;
2865         }
2866
2867       /* If we couldn't fit all value elements into REUSE,
2868          cons up the rest of them and add them to the end of REUSE.  */
2869       if (i < len)
2870         XSETCDR (prev, Flist (len - i, data + i));
2871     }
2872
2873   SAFE_FREE ();
2874   return reuse;
2875 }
2876
2877 /* We used to have an internal use variant of `reseat' described as:
2878
2879       If RESEAT is `evaporate', put the markers back on the free list
2880       immediately.  No other references to the markers must exist in this
2881       case, so it is used only internally on the unwind stack and
2882       save-match-data from Lisp.
2883
2884    But it was ill-conceived: those supposedly-internal markers get exposed via
2885    the undo-list, so freeing them here is unsafe.  */
2886
2887 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2888        doc: /* Set internal data on last search match from elements of LIST.
2889 LIST should have been created by calling `match-data' previously.
2890
2891 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2892   (register Lisp_Object list, Lisp_Object reseat)
2893 {
2894   ptrdiff_t i;
2895   register Lisp_Object marker;
2896
2897   if (running_asynch_code)
2898     save_search_regs ();
2899
2900   CHECK_LIST (list);
2901
2902   /* Unless we find a marker with a buffer or an explicit buffer
2903      in LIST, assume that this match data came from a string.  */
2904   last_thing_searched = Qt;
2905
2906   /* Allocate registers if they don't already exist.  */
2907   {
2908     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2909
2910     if (length > search_regs.num_regs)
2911       {
2912         ptrdiff_t num_regs = search_regs.num_regs;
2913         if (PTRDIFF_MAX < length)
2914           memory_full (SIZE_MAX);
2915         search_regs.start =
2916           xpalloc (search_regs.start, &num_regs, length - num_regs,
2917                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2918         search_regs.end =
2919           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2920
2921         for (i = search_regs.num_regs; i < num_regs; i++)
2922           search_regs.start[i] = -1;
2923
2924         search_regs.num_regs = num_regs;
2925       }
2926
2927     for (i = 0; CONSP (list); i++)
2928       {
2929         marker = XCAR (list);
2930         if (BUFFERP (marker))
2931           {
2932             last_thing_searched = marker;
2933             break;
2934           }
2935         if (i >= length)
2936           break;
2937         if (NILP (marker))
2938           {
2939             search_regs.start[i] = -1;
2940             list = XCDR (list);
2941           }
2942         else
2943           {
2944             Lisp_Object from;
2945             Lisp_Object m;
2946
2947             m = marker;
2948             if (MARKERP (marker))
2949               {
2950                 if (XMARKER (marker)->buffer == 0)
2951                   XSETFASTINT (marker, 0);
2952                 else
2953                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2954               }
2955
2956             CHECK_NUMBER_COERCE_MARKER (marker);
2957             from = marker;
2958
2959             if (!NILP (reseat) && MARKERP (m))
2960               {
2961                 unchain_marker (XMARKER (m));
2962                 XSETCAR (list, Qnil);
2963               }
2964
2965             if ((list = XCDR (list), !CONSP (list)))
2966               break;
2967
2968             m = marker = XCAR (list);
2969
2970             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2971               XSETFASTINT (marker, 0);
2972
2973             CHECK_NUMBER_COERCE_MARKER (marker);
2974             if ((XINT (from) < 0
2975                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2976                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2977                 && (XINT (marker) < 0
2978                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2979                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2980               {
2981                 search_regs.start[i] = XINT (from);
2982                 search_regs.end[i] = XINT (marker);
2983               }
2984             else
2985               {
2986                 search_regs.start[i] = -1;
2987               }
2988
2989             if (!NILP (reseat) && MARKERP (m))
2990               {
2991                 unchain_marker (XMARKER (m));
2992                 XSETCAR (list, Qnil);
2993               }
2994           }
2995         list = XCDR (list);
2996       }
2997
2998     for (; i < search_regs.num_regs; i++)
2999       search_regs.start[i] = -1;
3000   }
3001
3002   return Qnil;
3003 }
3004
3005 /* If true the match data have been saved in saved_search_regs
3006    during the execution of a sentinel or filter. */
3007 static bool search_regs_saved;
3008 static struct re_registers saved_search_regs;
3009 static Lisp_Object saved_last_thing_searched;
3010
3011 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3012    if asynchronous code (filter or sentinel) is running. */
3013 static void
3014 save_search_regs (void)
3015 {
3016   if (!search_regs_saved)
3017     {
3018       saved_search_regs.num_regs = search_regs.num_regs;
3019       saved_search_regs.start = search_regs.start;
3020       saved_search_regs.end = search_regs.end;
3021       saved_last_thing_searched = last_thing_searched;
3022       last_thing_searched = Qnil;
3023       search_regs.num_regs = 0;
3024       search_regs.start = 0;
3025       search_regs.end = 0;
3026
3027       search_regs_saved = 1;
3028     }
3029 }
3030
3031 /* Called upon exit from filters and sentinels. */
3032 void
3033 restore_search_regs (void)
3034 {
3035   if (search_regs_saved)
3036     {
3037       if (search_regs.num_regs > 0)
3038         {
3039           xfree (search_regs.start);
3040           xfree (search_regs.end);
3041         }
3042       search_regs.num_regs = saved_search_regs.num_regs;
3043       search_regs.start = saved_search_regs.start;
3044       search_regs.end = saved_search_regs.end;
3045       last_thing_searched = saved_last_thing_searched;
3046       saved_last_thing_searched = Qnil;
3047       search_regs_saved = 0;
3048     }
3049 }
3050
3051 static void
3052 unwind_set_match_data (Lisp_Object list)
3053 {
3054   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3055   Fset_match_data (list, Qt);
3056 }
3057
3058 /* Called to unwind protect the match data.  */
3059 void
3060 record_unwind_save_match_data (void)
3061 {
3062   record_unwind_protect (unwind_set_match_data,
3063                          Fmatch_data (Qnil, Qnil, Qnil));
3064 }
3065
3066 /* Quote a string to deactivate reg-expr chars */
3067
3068 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3069        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3070   (Lisp_Object string)
3071 {
3072   char *in, *out, *end;
3073   char *temp;
3074   ptrdiff_t backslashes_added = 0;
3075
3076   CHECK_STRING (string);
3077
3078   USE_SAFE_ALLOCA;
3079   SAFE_NALLOCA (temp, 2, SBYTES (string));
3080
3081   /* Now copy the data into the new string, inserting escapes. */
3082
3083   in = SSDATA (string);
3084   end = in + SBYTES (string);
3085   out = temp;
3086
3087   for (; in != end; in++)
3088     {
3089       if (*in == '['
3090           || *in == '*' || *in == '.' || *in == '\\'
3091           || *in == '?' || *in == '+'
3092           || *in == '^' || *in == '$')
3093         *out++ = '\\', backslashes_added++;
3094       *out++ = *in;
3095     }
3096
3097   Lisp_Object result
3098     = make_specified_string (temp,
3099                              SCHARS (string) + backslashes_added,
3100                              out - temp,
3101                              STRING_MULTIBYTE (string));
3102   SAFE_FREE ();
3103   return result;
3104 }
3105
3106 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
3107 static ptrdiff_t
3108 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3109                ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
3110                ptrdiff_t *bytepos, bool allow_quit)
3111 {
3112   if (count > 0)
3113     {
3114       if (!end)
3115         end = ZV, end_byte = ZV_BYTE;
3116     }
3117   else
3118     {
3119       if (!end)
3120         end = BEGV, end_byte = BEGV_BYTE;
3121     }
3122   if (end_byte == -1)
3123     end_byte = CHAR_TO_BYTE (end);
3124
3125   if (shortage != 0)
3126     *shortage = 0;
3127
3128   immediate_quit = allow_quit;
3129
3130   if (count > 0)
3131     while (start != end)
3132       {
3133         /* Our innermost scanning loop is very simple; it doesn't know
3134            about gaps, buffer ends, or the newline cache.  ceiling is
3135            the position of the last character before the next such
3136            obstacle --- the last character the dumb search loop should
3137            examine.  */
3138         ptrdiff_t tem, ceiling_byte = end_byte - 1;
3139
3140         if (start_byte == -1)
3141           start_byte = CHAR_TO_BYTE (start);
3142
3143         /* The dumb loop can only scan text stored in contiguous
3144            bytes. BUFFER_CEILING_OF returns the last character
3145            position that is contiguous, so the ceiling is the
3146            position after that.  */
3147         tem = BUFFER_CEILING_OF (start_byte);
3148         ceiling_byte = min (tem, ceiling_byte);
3149
3150         {
3151           /* The termination address of the dumb loop.  */
3152           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3153           ptrdiff_t lim_byte = ceiling_byte + 1;
3154
3155           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3156              of the base, the cursor, and the next line.  */
3157           ptrdiff_t base = start_byte - lim_byte;
3158           ptrdiff_t cursor, next;
3159
3160           for (cursor = base; cursor < 0; cursor = next)
3161             {
3162               /* The dumb loop.  */
3163               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3164               next = nl ? nl - lim_addr : 0;
3165
3166               if (! nl)
3167                 break;
3168               next++;
3169
3170               if (--count == 0)
3171                 {
3172                   immediate_quit = 0;
3173                   if (bytepos)
3174                     *bytepos = lim_byte + next;
3175                   return BYTE_TO_CHAR (lim_byte + next);
3176                 }
3177             }
3178
3179           start_byte = lim_byte;
3180           start = BYTE_TO_CHAR (start_byte);
3181         }
3182       }
3183
3184   immediate_quit = 0;
3185   if (shortage)
3186     *shortage = count;
3187   if (bytepos)
3188     {
3189       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3190       eassert (*bytepos == CHAR_TO_BYTE (start));
3191     }
3192   return start;
3193 }
3194
3195 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3196        0, 1, 0,
3197        doc: /* Check the newline cache of BUFFER against buffer contents.
3198
3199 BUFFER defaults to the current buffer.
3200
3201 Value is an array of 2 sub-arrays of buffer positions for newlines,
3202 the first based on the cache, the second based on actually scanning
3203 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
3204   (Lisp_Object buffer)
3205 {
3206   struct buffer *buf, *old = NULL;
3207   ptrdiff_t shortage, nl_count_cache, nl_count_buf;
3208   Lisp_Object cache_newlines, buf_newlines, val;
3209   ptrdiff_t from, found, i;
3210
3211   if (NILP (buffer))
3212     buf = current_buffer;
3213   else
3214     {
3215       CHECK_BUFFER (buffer);
3216       buf = XBUFFER (buffer);
3217       old = current_buffer;
3218     }
3219   if (buf->base_buffer)
3220     buf = buf->base_buffer;
3221
3222   /* If the buffer doesn't have a newline cache, return nil.  */
3223   if (NILP (BVAR (buf, cache_long_scans))
3224       || buf->newline_cache == NULL)
3225     return Qnil;
3226
3227   /* find_newline can only work on the current buffer.  */
3228   if (old != NULL)
3229     set_buffer_internal_1 (buf);
3230
3231   /* How many newlines are there according to the cache?  */
3232   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3233                 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3234   nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3235
3236   /* Create vector and populate it.  */
3237   cache_newlines = make_uninit_vector (nl_count_cache);
3238
3239   if (nl_count_cache)
3240     {
3241       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3242         {
3243           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3244
3245           found = find_newline (from, from_byte, 0, -1, 1, &shortage,
3246                                 NULL, true);
3247           if (shortage != 0 || i >= nl_count_cache)
3248             break;
3249           ASET (cache_newlines, i, make_number (found - 1));
3250         }
3251       /* Fill the rest of slots with an invalid position.  */
3252       for ( ; i < nl_count_cache; i++)
3253         ASET (cache_newlines, i, make_number (-1));
3254     }
3255
3256   /* Now do the same, but without using the cache.  */
3257   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3258                  TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3259   nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3260   buf_newlines = make_uninit_vector (nl_count_buf);
3261   if (nl_count_buf)
3262     {
3263       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3264         {
3265           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3266
3267           found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
3268                                  NULL, true);
3269           if (shortage != 0 || i >= nl_count_buf)
3270             break;
3271           ASET (buf_newlines, i, make_number (found - 1));
3272         }
3273       for ( ; i < nl_count_buf; i++)
3274         ASET (buf_newlines, i, make_number (-1));
3275     }
3276
3277   /* Construct the value and return it.  */
3278   val = make_uninit_vector (2);
3279   ASET (val, 0, cache_newlines);
3280   ASET (val, 1, buf_newlines);
3281
3282   if (old != NULL)
3283     set_buffer_internal_1 (old);
3284   return val;
3285 }
3286 \f
3287 void
3288 syms_of_search (void)
3289 {
3290   register int i;
3291
3292   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3293     {
3294       searchbufs[i].buf.allocated = 100;
3295       searchbufs[i].buf.buffer = xmalloc (100);
3296       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3297       searchbufs[i].regexp = Qnil;
3298       searchbufs[i].whitespace_regexp = Qnil;
3299       searchbufs[i].syntax_table = Qnil;
3300       staticpro (&searchbufs[i].regexp);
3301       staticpro (&searchbufs[i].whitespace_regexp);
3302       staticpro (&searchbufs[i].syntax_table);
3303       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3304     }
3305   searchbuf_head = &searchbufs[0];
3306
3307   /* Error condition used for failing searches.  */
3308   DEFSYM (Qsearch_failed, "search-failed");
3309
3310   /* Error condition signaled when regexp compile_pattern fails.  */
3311   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3312
3313   Fput (Qsearch_failed, Qerror_conditions,
3314         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3315   Fput (Qsearch_failed, Qerror_message,
3316         build_pure_c_string ("Search failed"));
3317
3318   Fput (Qinvalid_regexp, Qerror_conditions,
3319         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3320   Fput (Qinvalid_regexp, Qerror_message,
3321         build_pure_c_string ("Invalid regexp"));
3322
3323   last_thing_searched = Qnil;
3324   staticpro (&last_thing_searched);
3325
3326   saved_last_thing_searched = Qnil;
3327   staticpro (&saved_last_thing_searched);
3328
3329   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3330       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3331 Some commands use this for user-specified regexps.
3332 Spaces that occur inside character classes or repetition operators
3333 or other such regexp constructs are not replaced with this.
3334 A value of nil (which is the normal value) means treat spaces literally.  */);
3335   Vsearch_spaces_regexp = Qnil;
3336
3337   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3338       doc: /* Internal use only.
3339 If non-nil, the primitive searching and matching functions
3340 such as `looking-at', `string-match', `re-search-forward', etc.,
3341 do not set the match data.  The proper way to use this variable
3342 is to bind it with `let' around a small expression.  */);
3343   Vinhibit_changing_match_data = Qnil;
3344
3345   defsubr (&Slooking_at);
3346   defsubr (&Sposix_looking_at);
3347   defsubr (&Sstring_match);
3348   defsubr (&Sposix_string_match);
3349   defsubr (&Ssearch_forward);
3350   defsubr (&Ssearch_backward);
3351   defsubr (&Sre_search_forward);
3352   defsubr (&Sre_search_backward);
3353   defsubr (&Sposix_search_forward);
3354   defsubr (&Sposix_search_backward);
3355   defsubr (&Sreplace_match);
3356   defsubr (&Smatch_beginning);
3357   defsubr (&Smatch_end);
3358   defsubr (&Smatch_data);
3359   defsubr (&Sset_match_data);
3360   defsubr (&Sregexp_quote);
3361   defsubr (&Snewline_cache_check);
3362 }