src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "category.h"
  26 #include "character.h"
  27 #include "buffer.h"
  28 #include "syntax.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* True means regexp was compiled to do full POSIX backtracking.  */
  53   bool posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  88 static void save_search_regs (void);
  89 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  90                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  91                                 ptrdiff_t, ptrdiff_t);
  92 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  93                               Lisp_Object, Lisp_Object, ptrdiff_t,
  94                               ptrdiff_t, int);
  95 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
  96                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
  97                                 Lisp_Object, Lisp_Object, bool);
  98
  99 static _Noreturn void
 100 matcher_overflow (void)
 101 {
 102   error ("Stack overflow in regexp matcher");
 103 }
 104
 105 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 106    PATTERN is the pattern to compile.
 107    CP is the place to put the result.
 108    TRANSLATE is a translation table for ignoring case, or nil for none.
 109    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 110    False means backtrack only enough to get a valid match.
 111
 112    The behavior also depends on Vsearch_spaces_regexp.  */
 113
 114 static void
 115 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 116                    Lisp_Object translate, bool posix)
 117 {
 118   char *val;
 119   reg_syntax_t old;
 120
 121   cp->regexp = Qnil;
 122   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 123   cp->posix = posix;
 124   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 125   cp->buf.charset_unibyte = charset_unibyte;
 126   if (STRINGP (Vsearch_spaces_regexp))
 127     cp->whitespace_regexp = Vsearch_spaces_regexp;
 128   else
 129     cp->whitespace_regexp = Qnil;
 130
 131   /* rms: I think BLOCK_INPUT is not needed here any more,
 132      because regex.c defines malloc to call xmalloc.
 133      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 134      So let's turn it off.  */
 135   /*  BLOCK_INPUT;  */
 136   old = re_set_syntax (RE_SYNTAX_EMACS
 137                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 138
 139   if (STRINGP (Vsearch_spaces_regexp))
 140     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 141   else
 142     re_set_whitespace_regexp (NULL);
 143
 144   val = (char *) re_compile_pattern (SSDATA (pattern),
 145                                      SBYTES (pattern), &cp->buf);
 146
 147   /* If the compiled pattern hard codes some of the contents of the
 148      syntax-table, it can only be reused with *this* syntax table.  */
 149   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 150
 151   re_set_whitespace_regexp (NULL);
 152
 153   re_set_syntax (old);
 154   /* unblock_input ();  */
 155   if (val)
 156     xsignal1 (Qinvalid_regexp, build_string (val));
 157
 158   cp->regexp = Fcopy_sequence (pattern);
 159 }
 160
 161 /* Shrink each compiled regexp buffer in the cache
 162    to the size actually used right now.
 163    This is called from garbage collection.  */
 164
 165 void
 166 shrink_regexp_cache (void)
 167 {
 168   struct regexp_cache *cp;
 169
 170   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 171     {
 172       cp->buf.allocated = cp->buf.used;
 173       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 174     }
 175 }
 176
 177 /* Clear the regexp cache w.r.t. a particular syntax table,
 178    because it was changed.
 179    There is no danger of memory leak here because re_compile_pattern
 180    automagically manages the memory in each re_pattern_buffer struct,
 181    based on its `allocated' and `buffer' values.  */
 182 void
 183 clear_regexp_cache (void)
 184 {
 185   int i;
 186
 187   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 188     /* It's tempting to compare with the syntax-table we've actually changed,
 189        but it's not sufficient because char-table inheritance means that
 190        modifying one syntax-table can change others at the same time.  */
 191     if (!EQ (searchbufs[i].syntax_table, Qt))
 192       searchbufs[i].regexp = Qnil;
 193 }
 194
 195 /* Compile a regexp if necessary, but first check to see if there's one in
 196    the cache.
 197    PATTERN is the pattern to compile.
 198    TRANSLATE is a translation table for ignoring case, or nil for none.
 199    REGP is the structure that says where to store the "register"
 200    values that will result from matching this pattern.
 201    If it is 0, we should compile the pattern not to record any
 202    subexpression bounds.
 203    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 204    False means backtrack only enough to get a valid match.  */
 205
 206 struct re_pattern_buffer *
 207 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 208                  Lisp_Object translate, bool posix, bool multibyte)
 209 {
 210   struct regexp_cache *cp, **cpp;
 211
 212   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 213     {
 214       cp = *cpp;
 215       /* Entries are initialized to nil, and may be set to nil by
 216          compile_pattern_1 if the pattern isn't valid.  Don't apply
 217          string accessors in those cases.  However, compile_pattern_1
 218          is only applied to the cache entry we pick here to reuse.  So
 219          nil should never appear before a non-nil entry.  */
 220       if (NILP (cp->regexp))
 221         goto compile_it;
 222       if (SCHARS (cp->regexp) == SCHARS (pattern)
 223           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 224           && !NILP (Fstring_equal (cp->regexp, pattern))
 225           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 226           && cp->posix == posix
 227           && (EQ (cp->syntax_table, Qt)
 228               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 229           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 230           && cp->buf.charset_unibyte == charset_unibyte)
 231         break;
 232
 233       /* If we're at the end of the cache, compile into the nil cell
 234          we found, or the last (least recently used) cell with a
 235          string value.  */
 236       if (cp->next == 0)
 237         {
 238         compile_it:
 239           compile_pattern_1 (cp, pattern, translate, posix);
 240           break;
 241         }
 242     }
 243
 244   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 245      either because we found it in the cache or because we just compiled it.
 246      Move it to the front of the queue to mark it as most recently used.  */
 247   *cpp = cp->next;
 248   cp->next = searchbuf_head;
 249   searchbuf_head = cp;
 250
 251   /* Advise the searching functions about the space we have allocated
 252      for register data.  */
 253   if (regp)
 254     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 255
 256   /* The compiled pattern can be used both for multibyte and unibyte
 257      target.  But, we have to tell which the pattern is used for. */
 258   cp->buf.target_multibyte = multibyte;
 259
 260   return &cp->buf;
 261 }
 262
 263 \f
 264 static Lisp_Object
 265 looking_at_1 (Lisp_Object string, bool posix)
 266 {
 267   Lisp_Object val;
 268   unsigned char *p1, *p2;
 269   ptrdiff_t s1, s2;
 270   register ptrdiff_t i;
 271   struct re_pattern_buffer *bufp;
 272
 273   if (running_asynch_code)
 274     save_search_regs ();
 275
 276   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 277   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 278                          BVAR (current_buffer, case_eqv_table));
 279
 280   CHECK_STRING (string);
 281   bufp = compile_pattern (string,
 282                           (NILP (Vinhibit_changing_match_data)
 283                            ? &search_regs : NULL),
 284                           (!NILP (BVAR (current_buffer, case_fold_search))
 285                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 286                           posix,
 287                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 288
 289   immediate_quit = 1;
 290   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 291
 292   /* Get pointers and sizes of the two strings
 293      that make up the visible portion of the buffer. */
 294
 295   p1 = BEGV_ADDR;
 296   s1 = GPT_BYTE - BEGV_BYTE;
 297   p2 = GAP_END_ADDR;
 298   s2 = ZV_BYTE - GPT_BYTE;
 299   if (s1 < 0)
 300     {
 301       p2 = p1;
 302       s2 = ZV_BYTE - BEGV_BYTE;
 303       s1 = 0;
 304     }
 305   if (s2 < 0)
 306     {
 307       s1 = ZV_BYTE - BEGV_BYTE;
 308       s2 = 0;
 309     }
 310
 311   re_match_object = Qnil;
 312
 313   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 314                   PT_BYTE - BEGV_BYTE,
 315                   (NILP (Vinhibit_changing_match_data)
 316                    ? &search_regs : NULL),
 317                   ZV_BYTE - BEGV_BYTE);
 318   immediate_quit = 0;
 319
 320   if (i == -2)
 321     matcher_overflow ();
 322
 323   val = (i >= 0 ? Qt : Qnil);
 324   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 325   {
 326     for (i = 0; i < search_regs.num_regs; i++)
 327       if (search_regs.start[i] >= 0)
 328         {
 329           search_regs.start[i]
 330             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 331          search_regs.end[i]
 332            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 333        }
 334     /* Set last_thing_searched only when match data is changed.  */
 335     XSETBUFFER (last_thing_searched, current_buffer);
 336   }
 337
 338   return val;
 339 }
 340
 341 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 342        doc: /* Return t if text after point matches regular expression REGEXP.
 343 This function modifies the match data that `match-beginning',
 344 `match-end' and `match-data' access; save and restore the match
 345 data if you want to preserve them.  */)
 346   (Lisp_Object regexp)
 347 {
 348   return looking_at_1 (regexp, 0);
 349 }
 350
 351 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 352        doc: /* Return t if text after point matches regular expression REGEXP.
 353 Find the longest match, in accord with Posix regular expression rules.
 354 This function modifies the match data that `match-beginning',
 355 `match-end' and `match-data' access; save and restore the match
 356 data if you want to preserve them.  */)
 357   (Lisp_Object regexp)
 358 {
 359   return looking_at_1 (regexp, 1);
 360 }
 361 \f
 362 static Lisp_Object
 363 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 364                 bool posix)
 365 {
 366   ptrdiff_t val;
 367   struct re_pattern_buffer *bufp;
 368   EMACS_INT pos;
 369   ptrdiff_t pos_byte, i;
 370
 371   if (running_asynch_code)
 372     save_search_regs ();
 373
 374   CHECK_STRING (regexp);
 375   CHECK_STRING (string);
 376
 377   if (NILP (start))
 378     pos = 0, pos_byte = 0;
 379   else
 380     {
 381       ptrdiff_t len = SCHARS (string);
 382
 383       CHECK_NUMBER (start);
 384       pos = XINT (start);
 385       if (pos < 0 && -pos <= len)
 386         pos = len + pos;
 387       else if (0 > pos || pos > len)
 388         args_out_of_range (string, start);
 389       pos_byte = string_char_to_byte (string, pos);
 390     }
 391
 392   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 393   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 394                          BVAR (current_buffer, case_eqv_table));
 395
 396   bufp = compile_pattern (regexp,
 397                           (NILP (Vinhibit_changing_match_data)
 398                            ? &search_regs : NULL),
 399                           (!NILP (BVAR (current_buffer, case_fold_search))
 400                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 401                           posix,
 402                           STRING_MULTIBYTE (string));
 403   immediate_quit = 1;
 404   re_match_object = string;
 405
 406   val = re_search (bufp, SSDATA (string),
 407                    SBYTES (string), pos_byte,
 408                    SBYTES (string) - pos_byte,
 409                    (NILP (Vinhibit_changing_match_data)
 410                     ? &search_regs : NULL));
 411   immediate_quit = 0;
 412
 413   /* Set last_thing_searched only when match data is changed.  */
 414   if (NILP (Vinhibit_changing_match_data))
 415     last_thing_searched = Qt;
 416
 417   if (val == -2)
 418     matcher_overflow ();
 419   if (val < 0) return Qnil;
 420
 421   if (NILP (Vinhibit_changing_match_data))
 422     for (i = 0; i < search_regs.num_regs; i++)
 423       if (search_regs.start[i] >= 0)
 424         {
 425           search_regs.start[i]
 426             = string_byte_to_char (string, search_regs.start[i]);
 427           search_regs.end[i]
 428             = string_byte_to_char (string, search_regs.end[i]);
 429         }
 430
 431   return make_number (string_byte_to_char (string, val));
 432 }
 433
 434 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 435        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 436 Matching ignores case if `case-fold-search' is non-nil.
 437 If third arg START is non-nil, start search at that index in STRING.
 438 For index of first char beyond the match, do (match-end 0).
 439 `match-end' and `match-beginning' also give indices of substrings
 440 matched by parenthesis constructs in the pattern.
 441
 442 You can use the function `match-string' to extract the substrings
 443 matched by the parenthesis constructions in REGEXP. */)
 444   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 445 {
 446   return string_match_1 (regexp, string, start, 0);
 447 }
 448
 449 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 450        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 451 Find the longest match, in accord with Posix regular expression rules.
 452 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 453 If third arg START is non-nil, start search at that index in STRING.
 454 For index of first char beyond the match, do (match-end 0).
 455 `match-end' and `match-beginning' also give indices of substrings
 456 matched by parenthesis constructs in the pattern.  */)
 457   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 458 {
 459   return string_match_1 (regexp, string, start, 1);
 460 }
 461
 462 /* Match REGEXP against STRING using translation table TABLE,
 463    searching all of STRING, and return the index of the match,
 464    or negative on failure.  This does not clobber the match data.  */
 465
 466 ptrdiff_t
 467 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
 468                             Lisp_Object table)
 469 {
 470   ptrdiff_t val;
 471   struct re_pattern_buffer *bufp;
 472
 473   bufp = compile_pattern (regexp, 0, table,
 474                           0, STRING_MULTIBYTE (string));
 475   immediate_quit = 1;
 476   re_match_object = string;
 477
 478   val = re_search (bufp, SSDATA (string),
 479                    SBYTES (string), 0,
 480                    SBYTES (string), 0);
 481   immediate_quit = 0;
 482   return val;
 483 }
 484
 485 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 486    and return the index of the match, or negative on failure.
 487    This does not clobber the match data.
 488    We assume that STRING contains single-byte characters.  */
 489
 490 ptrdiff_t
 491 fast_c_string_match_ignore_case (Lisp_Object regexp,
 492                                  const char *string, ptrdiff_t len)
 493 {
 494   ptrdiff_t val;
 495   struct re_pattern_buffer *bufp;
 496
 497   regexp = string_make_unibyte (regexp);
 498   re_match_object = Qt;
 499   bufp = compile_pattern (regexp, 0,
 500                           Vascii_canon_table, 0,
 501                           0);
 502   immediate_quit = 1;
 503   val = re_search (bufp, string, len, 0, len, 0);
 504   immediate_quit = 0;
 505   return val;
 506 }
 507
 508 /* Match REGEXP against the characters after POS to LIMIT, and return
 509    the number of matched characters.  If STRING is non-nil, match
 510    against the characters in it.  In that case, POS and LIMIT are
 511    indices into the string.  This function doesn't modify the match
 512    data.  */
 513
 514 ptrdiff_t
 515 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
 516                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 517 {
 518   bool multibyte;
 519   struct re_pattern_buffer *buf;
 520   unsigned char *p1, *p2;
 521   ptrdiff_t s1, s2;
 522   ptrdiff_t len;
 523
 524   if (STRINGP (string))
 525     {
 526       if (pos_byte < 0)
 527         pos_byte = string_char_to_byte (string, pos);
 528       if (limit_byte < 0)
 529         limit_byte = string_char_to_byte (string, limit);
 530       p1 = NULL;
 531       s1 = 0;
 532       p2 = SDATA (string);
 533       s2 = SBYTES (string);
 534       re_match_object = string;
 535       multibyte = STRING_MULTIBYTE (string);
 536     }
 537   else
 538     {
 539       if (pos_byte < 0)
 540         pos_byte = CHAR_TO_BYTE (pos);
 541       if (limit_byte < 0)
 542         limit_byte = CHAR_TO_BYTE (limit);
 543       pos_byte -= BEGV_BYTE;
 544       limit_byte -= BEGV_BYTE;
 545       p1 = BEGV_ADDR;
 546       s1 = GPT_BYTE - BEGV_BYTE;
 547       p2 = GAP_END_ADDR;
 548       s2 = ZV_BYTE - GPT_BYTE;
 549       if (s1 < 0)
 550         {
 551           p2 = p1;
 552           s2 = ZV_BYTE - BEGV_BYTE;
 553           s1 = 0;
 554         }
 555       if (s2 < 0)
 556         {
 557           s1 = ZV_BYTE - BEGV_BYTE;
 558           s2 = 0;
 559         }
 560       re_match_object = Qnil;
 561       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 562     }
 563
 564   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 565   immediate_quit = 1;
 566   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 567                     pos_byte, NULL, limit_byte);
 568   immediate_quit = 0;
 569
 570   return len;
 571 }
 572
 573 \f
 574 /* The newline cache: remembering which sections of text have no newlines.  */
 575
 576 /* If the user has requested the long scans caching, make sure it's on.
 577    Otherwise, make sure it's off.
 578    This is our cheezy way of associating an action with the change of
 579    state of a buffer-local variable.  */
 580 static struct region_cache *
 581 newline_cache_on_off (struct buffer *buf)
 582 {
 583   struct buffer *base_buf = buf;
 584   bool indirect_p = false;
 585
 586   if (buf->base_buffer)
 587     {
 588       base_buf = buf->base_buffer;
 589       indirect_p = true;
 590     }
 591
 592   /* Don't turn on or off the cache in the base buffer, if the value
 593      of cache-long-scans of the base buffer is inconsistent with that.
 594      This is because doing so will just make the cache pure overhead,
 595      since if we turn it on via indirect buffer, it will be
 596      immediately turned off by its base buffer.  */
 597   if (NILP (BVAR (buf, cache_long_scans)))
 598     {
 599       if (!indirect_p
 600           || NILP (BVAR (base_buf, cache_long_scans)))
 601         {
 602           /* It should be off.  */
 603           if (base_buf->newline_cache)
 604             {
 605               free_region_cache (base_buf->newline_cache);
 606               base_buf->newline_cache = 0;
 607             }
 608         }
 609       return NULL;
 610     }
 611   else
 612     {
 613       if (!indirect_p
 614           || !NILP (BVAR (base_buf, cache_long_scans)))
 615         {
 616           /* It should be on.  */
 617           if (base_buf->newline_cache == 0)
 618             base_buf->newline_cache = new_region_cache ();
 619         }
 620       return base_buf->newline_cache;
 621     }
 622 }
 623
 624 \f
 625 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
 626
 627    If COUNT is positive, search forwards; END must be >= START.
 628    If COUNT is negative, search backwards for the -COUNTth instance;
 629       END must be <= START.
 630    If COUNT is zero, do anything you please; run rogue, for all I care.
 631
 632    If END is zero, use BEGV or ZV instead, as appropriate for the
 633    direction indicated by COUNT.
 634
 635    If we find COUNT instances, set *SHORTAGE to zero, and return the
 636    position past the COUNTth match.  Note that for reverse motion
 637    this is not the same as the usual convention for Emacs motion commands.
 638
 639    If we don't find COUNT instances before reaching END, set *SHORTAGE
 640    to the number of newlines left unfound, and return END.
 641
 642    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
 643    to the returned character position.
 644
 645    If ALLOW_QUIT, set immediate_quit.  That's good to do
 646    except when inside redisplay.  */
 647
 648 ptrdiff_t
 649 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 650               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
 651               ptrdiff_t *bytepos, bool allow_quit)
 652 {
 653   struct region_cache *newline_cache;
 654   int direction;
 655   struct buffer *cache_buffer;
 656
 657   if (count > 0)
 658     {
 659       direction = 1;
 660       if (!end)
 661         end = ZV, end_byte = ZV_BYTE;
 662     }
 663   else
 664     {
 665       direction = -1;
 666       if (!end)
 667         end = BEGV, end_byte = BEGV_BYTE;
 668     }
 669   if (end_byte == -1)
 670     end_byte = CHAR_TO_BYTE (end);
 671
 672   newline_cache = newline_cache_on_off (current_buffer);
 673   if (current_buffer->base_buffer)
 674     cache_buffer = current_buffer->base_buffer;
 675   else
 676     cache_buffer = current_buffer;
 677
 678   if (shortage != 0)
 679     *shortage = 0;
 680
 681   immediate_quit = allow_quit;
 682
 683   if (count > 0)
 684     while (start != end)
 685       {
 686         /* Our innermost scanning loop is very simple; it doesn't know
 687            about gaps, buffer ends, or the newline cache.  ceiling is
 688            the position of the last character before the next such
 689            obstacle --- the last character the dumb search loop should
 690            examine.  */
 691         ptrdiff_t tem, ceiling_byte = end_byte - 1;
 692
 693         /* If we're using the newline cache, consult it to see whether
 694            we can avoid some scanning.  */
 695         if (newline_cache)
 696           {
 697             ptrdiff_t next_change;
 698             int result = 1;
 699
 700             immediate_quit = 0;
 701             while (start < end && result)
 702               {
 703                 ptrdiff_t lim1;
 704
 705                 result = region_cache_forward (cache_buffer, newline_cache,
 706                                                start, &next_change);
 707                 if (result)
 708                   {
 709                     /* When the cache revalidation is deferred,
 710                        next-change might point beyond ZV, which will
 711                        cause assertion violation in CHAR_TO_BYTE below.
 712                        Limit next_change to ZV to avoid that.  */
 713                     if (next_change > ZV)
 714                       next_change = ZV;
 715                     start = next_change;
 716                     lim1 = next_change = end;
 717                   }
 718                 else
 719                   lim1 = min (next_change, end);
 720
 721                 /* The cache returned zero for this region; see if
 722                    this is because the region is known and includes
 723                    only newlines.  While at that, count any newlines
 724                    we bump into, and exit if we found enough off them.  */
 725                 start_byte = CHAR_TO_BYTE (start);
 726                 while (start < lim1
 727                        && FETCH_BYTE (start_byte) == '\n')
 728                   {
 729                     start_byte++;
 730                     start++;
 731                     if (--count == 0)
 732                       {
 733                         if (bytepos)
 734                           *bytepos = start_byte;
 735                         return start;
 736                       }
 737                   }
 738                 /* If we found a non-newline character before hitting
 739                    position where the cache will again return non-zero
 740                    (i.e. no newlines beyond that position), it means
 741                    this region is not yet known to the cache, and we
 742                    must resort to the "dumb loop" method.  */
 743                 if (start < next_change && !result)
 744                   break;
 745                 result = 1;
 746               }
 747             if (start >= end)
 748               {
 749                 start = end;
 750                 start_byte = end_byte;
 751                 break;
 752               }
 753             immediate_quit = allow_quit;
 754
 755             /* START should never be after END.  */
 756             if (start_byte > ceiling_byte)
 757               start_byte = ceiling_byte;
 758
 759             /* Now the text after start is an unknown region, and
 760                next_change is the position of the next known region. */
 761             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 762           }
 763         else if (start_byte == -1)
 764           start_byte = CHAR_TO_BYTE (start);
 765
 766         /* The dumb loop can only scan text stored in contiguous
 767            bytes. BUFFER_CEILING_OF returns the last character
 768            position that is contiguous, so the ceiling is the
 769            position after that.  */
 770         tem = BUFFER_CEILING_OF (start_byte);
 771         ceiling_byte = min (tem, ceiling_byte);
 772
 773         {
 774           /* The termination address of the dumb loop.  */
 775           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 776           ptrdiff_t lim_byte = ceiling_byte + 1;
 777
 778           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
 779              of the base, the cursor, and the next line.  */
 780           ptrdiff_t base = start_byte - lim_byte;
 781           ptrdiff_t cursor, next;
 782
 783           for (cursor = base; cursor < 0; cursor = next)
 784             {
 785               /* The dumb loop.  */
 786               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
 787               next = nl ? nl - lim_addr : 0;
 788
 789               /* If we're using the newline cache, cache the fact that
 790                  the region we just traversed is free of newlines. */
 791               if (newline_cache && cursor != next)
 792                 {
 793                   know_region_cache (cache_buffer, newline_cache,
 794                                      BYTE_TO_CHAR (lim_byte + cursor),
 795                                      BYTE_TO_CHAR (lim_byte + next));
 796                   /* know_region_cache can relocate buffer text.  */
 797                   lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 798                 }
 799
 800               if (! nl)
 801                 break;
 802               next++;
 803
 804               if (--count == 0)
 805                 {
 806                   immediate_quit = 0;
 807                   if (bytepos)
 808                     *bytepos = lim_byte + next;
 809                   return BYTE_TO_CHAR (lim_byte + next);
 810                 }
 811             }
 812
 813           start_byte = lim_byte;
 814           start = BYTE_TO_CHAR (start_byte);
 815         }
 816       }
 817   else
 818     while (start > end)
 819       {
 820         /* The last character to check before the next obstacle.  */
 821         ptrdiff_t tem, ceiling_byte = end_byte;
 822
 823         /* Consult the newline cache, if appropriate.  */
 824         if (newline_cache)
 825           {
 826             ptrdiff_t next_change;
 827             int result = 1;
 828
 829             immediate_quit = 0;
 830             while (start > end && result)
 831               {
 832                 ptrdiff_t lim1;
 833
 834                 result = region_cache_backward (cache_buffer, newline_cache,
 835                                                 start, &next_change);
 836                 if (result)
 837                   {
 838                     start = next_change;
 839                     lim1 = next_change = end;
 840                   }
 841                 else
 842                   lim1 = max (next_change, end);
 843                 start_byte = CHAR_TO_BYTE (start);
 844                 while (start > lim1
 845                        && FETCH_BYTE (start_byte - 1) == '\n')
 846                   {
 847                     if (++count == 0)
 848                       {
 849                         if (bytepos)
 850                           *bytepos = start_byte;
 851                         return start;
 852                       }
 853                     start_byte--;
 854                     start--;
 855                   }
 856                 if (start > next_change && !result)
 857                   break;
 858                 result = 1;
 859               }
 860             if (start <= end)
 861               {
 862                 start = end;
 863                 start_byte = end_byte;
 864                 break;
 865               }
 866             immediate_quit = allow_quit;
 867
 868             /* Start should never be at or before end.  */
 869             if (start_byte <= ceiling_byte)
 870               start_byte = ceiling_byte + 1;
 871
 872             /* Now the text before start is an unknown region, and
 873                next_change is the position of the next known region. */
 874             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 875           }
 876         else if (start_byte == -1)
 877           start_byte = CHAR_TO_BYTE (start);
 878
 879         /* Stop scanning before the gap.  */
 880         tem = BUFFER_FLOOR_OF (start_byte - 1);
 881         ceiling_byte = max (tem, ceiling_byte);
 882
 883         {
 884           /* The termination address of the dumb loop.  */
 885           unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 886
 887           /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
 888              the base, the cursor, and the previous line.  These
 889              offsets are at least -1.  */
 890           ptrdiff_t base = start_byte - ceiling_byte;
 891           ptrdiff_t cursor, prev;
 892
 893           for (cursor = base; 0 < cursor; cursor = prev)
 894             {
 895               unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
 896               prev = nl ? nl - ceiling_addr : -1;
 897
 898               /* If we're looking for newlines, cache the fact that
 899                  this line's region is free of them. */
 900               if (newline_cache && cursor != prev + 1)
 901                 {
 902                   know_region_cache (cache_buffer, newline_cache,
 903                                      BYTE_TO_CHAR (ceiling_byte + prev + 1),
 904                                      BYTE_TO_CHAR (ceiling_byte + cursor));
 905                   /* know_region_cache can relocate buffer text.  */
 906                   ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 907                 }
 908
 909               if (! nl)
 910                 break;
 911
 912               if (++count >= 0)
 913                 {
 914                   immediate_quit = 0;
 915                   if (bytepos)
 916                     *bytepos = ceiling_byte + prev + 1;
 917                   return BYTE_TO_CHAR (ceiling_byte + prev + 1);
 918                 }
 919             }
 920
 921           start_byte = ceiling_byte;
 922           start = BYTE_TO_CHAR (start_byte);
 923         }
 924       }
 925
 926   immediate_quit = 0;
 927   if (shortage)
 928     *shortage = count * direction;
 929   if (bytepos)
 930     {
 931       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
 932       eassert (*bytepos == CHAR_TO_BYTE (start));
 933     }
 934   return start;
 935 }
 936 \f
 937 /* Search for COUNT instances of a line boundary.
 938    Start at START.  If COUNT is negative, search backwards.
 939
 940    We report the resulting position by calling TEMP_SET_PT_BOTH.
 941
 942    If we find COUNT instances. we position after (always after,
 943    even if scanning backwards) the COUNTth match, and return 0.
 944
 945    If we don't find COUNT instances before reaching the end of the
 946    buffer (or the beginning, if scanning backwards), we return
 947    the number of line boundaries left unfound, and position at
 948    the limit we bumped up against.
 949
 950    If ALLOW_QUIT, set immediate_quit.  That's good to do
 951    except in special cases.  */
 952
 953 ptrdiff_t
 954 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 955               ptrdiff_t limit, ptrdiff_t limit_byte,
 956               ptrdiff_t count, bool allow_quit)
 957 {
 958   ptrdiff_t charpos, bytepos, shortage;
 959
 960   charpos = find_newline (start, start_byte, limit, limit_byte,
 961                           count, &shortage, &bytepos, allow_quit);
 962   if (shortage)
 963     TEMP_SET_PT_BOTH (limit, limit_byte);
 964   else
 965     TEMP_SET_PT_BOTH (charpos, bytepos);
 966   return shortage;
 967 }
 968
 969 /* Like above, but always scan from point and report the
 970    resulting position in *CHARPOS and *BYTEPOS.  */
 971
 972 ptrdiff_t
 973 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
 974                          ptrdiff_t *bytepos)
 975 {
 976   ptrdiff_t shortage;
 977
 978   if (count <= 0)
 979     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
 980                              &shortage, bytepos, 1);
 981   else
 982     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
 983                              &shortage, bytepos, 1);
 984   return shortage;
 985 }
 986
 987 /* Like find_newline, but doesn't allow QUITting and doesn't return
 988    SHORTAGE.  */
 989 ptrdiff_t
 990 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
 991                       ptrdiff_t cnt, ptrdiff_t *bytepos)
 992 {
 993   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
 994 }
 995
 996 /* Like find_newline, but returns position before the newline, not
 997    after, and only search up to TO.
 998    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
 999
1000 ptrdiff_t
1001 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
1002                           ptrdiff_t cnt, ptrdiff_t *bytepos)
1003 {
1004   ptrdiff_t shortage;
1005   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
1006
1007   if (shortage == 0)
1008     {
1009       if (bytepos)
1010         DEC_BOTH (pos, *bytepos);
1011       else
1012         pos--;
1013     }
1014   return pos;
1015 }
1016 \f
1017 /* Subroutines of Lisp buffer search functions. */
1018
1019 static Lisp_Object
1020 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1021                 Lisp_Object count, int direction, int RE, bool posix)
1022 {
1023   EMACS_INT np;
1024   EMACS_INT lim;
1025   ptrdiff_t lim_byte;
1026   EMACS_INT n = direction;
1027
1028   if (!NILP (count))
1029     {
1030       CHECK_NUMBER (count);
1031       n *= XINT (count);
1032     }
1033
1034   CHECK_STRING (string);
1035   if (NILP (bound))
1036     {
1037       if (n > 0)
1038         lim = ZV, lim_byte = ZV_BYTE;
1039       else
1040         lim = BEGV, lim_byte = BEGV_BYTE;
1041     }
1042   else
1043     {
1044       CHECK_NUMBER_COERCE_MARKER (bound);
1045       lim = XINT (bound);
1046       if (n > 0 ? lim < PT : lim > PT)
1047         error ("Invalid search bound (wrong side of point)");
1048       if (lim > ZV)
1049         lim = ZV, lim_byte = ZV_BYTE;
1050       else if (lim < BEGV)
1051         lim = BEGV, lim_byte = BEGV_BYTE;
1052       else
1053         lim_byte = CHAR_TO_BYTE (lim);
1054     }
1055
1056   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1057   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1058                          BVAR (current_buffer, case_eqv_table));
1059
1060   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1061                       (!NILP (BVAR (current_buffer, case_fold_search))
1062                        ? BVAR (current_buffer, case_canon_table)
1063                        : Qnil),
1064                       (!NILP (BVAR (current_buffer, case_fold_search))
1065                        ? BVAR (current_buffer, case_eqv_table)
1066                        : Qnil),
1067                       posix);
1068   if (np <= 0)
1069     {
1070       if (NILP (noerror))
1071         xsignal1 (Qsearch_failed, string);
1072
1073       if (!EQ (noerror, Qt))
1074         {
1075           eassert (BEGV <= lim && lim <= ZV);
1076           SET_PT_BOTH (lim, lim_byte);
1077           return Qnil;
1078 #if 0 /* This would be clean, but maybe programs depend on
1079          a value of nil here.  */
1080           np = lim;
1081 #endif
1082         }
1083       else
1084         return Qnil;
1085     }
1086
1087   eassert (BEGV <= np && np <= ZV);
1088   SET_PT (np);
1089
1090   return make_number (np);
1091 }
1092 \f
1093 /* Return true if REGEXP it matches just one constant string.  */
1094
1095 static bool
1096 trivial_regexp_p (Lisp_Object regexp)
1097 {
1098   ptrdiff_t len = SBYTES (regexp);
1099   unsigned char *s = SDATA (regexp);
1100   while (--len >= 0)
1101     {
1102       switch (*s++)
1103         {
1104         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1105           return 0;
1106         case '\\':
1107           if (--len < 0)
1108             return 0;
1109           switch (*s++)
1110             {
1111             case '|': case '(': case ')': case '`': case '\'': case 'b':
1112             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1113             case 'S': case '=': case '{': case '}': case '_':
1114             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1115             case '1': case '2': case '3': case '4': case '5':
1116             case '6': case '7': case '8': case '9':
1117               return 0;
1118             }
1119         }
1120     }
1121   return 1;
1122 }
1123
1124 /* Search for the n'th occurrence of STRING in the current buffer,
1125    starting at position POS and stopping at position LIM,
1126    treating STRING as a literal string if RE is false or as
1127    a regular expression if RE is true.
1128
1129    If N is positive, searching is forward and LIM must be greater than POS.
1130    If N is negative, searching is backward and LIM must be less than POS.
1131
1132    Returns -x if x occurrences remain to be found (x > 0),
1133    or else the position at the beginning of the Nth occurrence
1134    (if searching backward) or the end (if searching forward).
1135
1136    POSIX is nonzero if we want full backtracking (POSIX style)
1137    for this pattern.  0 means backtrack only enough to get a valid match.  */
1138
1139 #define TRANSLATE(out, trt, d)                  \
1140 do                                              \
1141   {                                             \
1142     if (! NILP (trt))                           \
1143       {                                         \
1144         Lisp_Object temp;                       \
1145         temp = Faref (trt, make_number (d));    \
1146         if (INTEGERP (temp))                    \
1147           out = XINT (temp);                    \
1148         else                                    \
1149           out = d;                              \
1150       }                                         \
1151     else                                        \
1152       out = d;                                  \
1153   }                                             \
1154 while (0)
1155
1156 /* Only used in search_buffer, to record the end position of the match
1157    when searching regexps and SEARCH_REGS should not be changed
1158    (i.e. Vinhibit_changing_match_data is non-nil).  */
1159 static struct re_registers search_regs_1;
1160
1161 static EMACS_INT
1162 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1163                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1164                int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1165 {
1166   ptrdiff_t len = SCHARS (string);
1167   ptrdiff_t len_byte = SBYTES (string);
1168   register ptrdiff_t i;
1169
1170   if (running_asynch_code)
1171     save_search_regs ();
1172
1173   /* Searching 0 times means don't move.  */
1174   /* Null string is found at starting position.  */
1175   if (len == 0 || n == 0)
1176     {
1177       set_search_regs (pos_byte, 0);
1178       return pos;
1179     }
1180
1181   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1182     {
1183       unsigned char *p1, *p2;
1184       ptrdiff_t s1, s2;
1185       struct re_pattern_buffer *bufp;
1186
1187       bufp = compile_pattern (string,
1188                               (NILP (Vinhibit_changing_match_data)
1189                                ? &search_regs : &search_regs_1),
1190                               trt, posix,
1191                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1192
1193       immediate_quit = 1;       /* Quit immediately if user types ^G,
1194                                    because letting this function finish
1195                                    can take too long. */
1196       QUIT;                     /* Do a pending quit right away,
1197                                    to avoid paradoxical behavior */
1198       /* Get pointers and sizes of the two strings
1199          that make up the visible portion of the buffer. */
1200
1201       p1 = BEGV_ADDR;
1202       s1 = GPT_BYTE - BEGV_BYTE;
1203       p2 = GAP_END_ADDR;
1204       s2 = ZV_BYTE - GPT_BYTE;
1205       if (s1 < 0)
1206         {
1207           p2 = p1;
1208           s2 = ZV_BYTE - BEGV_BYTE;
1209           s1 = 0;
1210         }
1211       if (s2 < 0)
1212         {
1213           s1 = ZV_BYTE - BEGV_BYTE;
1214           s2 = 0;
1215         }
1216       re_match_object = Qnil;
1217
1218       while (n < 0)
1219         {
1220           ptrdiff_t val;
1221
1222           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1223                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1224                              (NILP (Vinhibit_changing_match_data)
1225                               ? &search_regs : &search_regs_1),
1226                              /* Don't allow match past current point */
1227                              pos_byte - BEGV_BYTE);
1228           if (val == -2)
1229             {
1230               matcher_overflow ();
1231             }
1232           if (val >= 0)
1233             {
1234               if (NILP (Vinhibit_changing_match_data))
1235                 {
1236                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1237                   for (i = 0; i < search_regs.num_regs; i++)
1238                     if (search_regs.start[i] >= 0)
1239                       {
1240                         search_regs.start[i]
1241                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1242                         search_regs.end[i]
1243                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1244                       }
1245                   XSETBUFFER (last_thing_searched, current_buffer);
1246                   /* Set pos to the new position. */
1247                   pos = search_regs.start[0];
1248                 }
1249               else
1250                 {
1251                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1252                   /* Set pos to the new position.  */
1253                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1254                 }
1255             }
1256           else
1257             {
1258               immediate_quit = 0;
1259               return (n);
1260             }
1261           n++;
1262         }
1263       while (n > 0)
1264         {
1265           ptrdiff_t val;
1266
1267           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1268                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1269                              (NILP (Vinhibit_changing_match_data)
1270                               ? &search_regs : &search_regs_1),
1271                              lim_byte - BEGV_BYTE);
1272           if (val == -2)
1273             {
1274               matcher_overflow ();
1275             }
1276           if (val >= 0)
1277             {
1278               if (NILP (Vinhibit_changing_match_data))
1279                 {
1280                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1281                   for (i = 0; i < search_regs.num_regs; i++)
1282                     if (search_regs.start[i] >= 0)
1283                       {
1284                         search_regs.start[i]
1285                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1286                         search_regs.end[i]
1287                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1288                       }
1289                   XSETBUFFER (last_thing_searched, current_buffer);
1290                   pos = search_regs.end[0];
1291                 }
1292               else
1293                 {
1294                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1295                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1296                 }
1297             }
1298           else
1299             {
1300               immediate_quit = 0;
1301               return (0 - n);
1302             }
1303           n--;
1304         }
1305       immediate_quit = 0;
1306       return (pos);
1307     }
1308   else                          /* non-RE case */
1309     {
1310       unsigned char *raw_pattern, *pat;
1311       ptrdiff_t raw_pattern_size;
1312       ptrdiff_t raw_pattern_size_byte;
1313       unsigned char *patbuf;
1314       bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1315       unsigned char *base_pat;
1316       /* Set to positive if we find a non-ASCII char that need
1317          translation.  Otherwise set to zero later.  */
1318       int char_base = -1;
1319       bool boyer_moore_ok = 1;
1320       USE_SAFE_ALLOCA;
1321
1322       /* MULTIBYTE says whether the text to be searched is multibyte.
1323          We must convert PATTERN to match that, or we will not really
1324          find things right.  */
1325
1326       if (multibyte == STRING_MULTIBYTE (string))
1327         {
1328           raw_pattern = SDATA (string);
1329           raw_pattern_size = SCHARS (string);
1330           raw_pattern_size_byte = SBYTES (string);
1331         }
1332       else if (multibyte)
1333         {
1334           raw_pattern_size = SCHARS (string);
1335           raw_pattern_size_byte
1336             = count_size_as_multibyte (SDATA (string),
1337                                        raw_pattern_size);
1338           raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1339           copy_text (SDATA (string), raw_pattern,
1340                      SCHARS (string), 0, 1);
1341         }
1342       else
1343         {
1344           /* Converting multibyte to single-byte.
1345
1346              ??? Perhaps this conversion should be done in a special way
1347              by subtracting nonascii-insert-offset from each non-ASCII char,
1348              so that only the multibyte chars which really correspond to
1349              the chosen single-byte character set can possibly match.  */
1350           raw_pattern_size = SCHARS (string);
1351           raw_pattern_size_byte = SCHARS (string);
1352           raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1353           copy_text (SDATA (string), raw_pattern,
1354                      SBYTES (string), 1, 0);
1355         }
1356
1357       /* Copy and optionally translate the pattern.  */
1358       len = raw_pattern_size;
1359       len_byte = raw_pattern_size_byte;
1360       SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1361       pat = patbuf;
1362       base_pat = raw_pattern;
1363       if (multibyte)
1364         {
1365           /* Fill patbuf by translated characters in STRING while
1366              checking if we can use boyer-moore search.  If TRT is
1367              non-nil, we can use boyer-moore search only if TRT can be
1368              represented by the byte array of 256 elements.  For that,
1369              all non-ASCII case-equivalents of all case-sensitive
1370              characters in STRING must belong to the same character
1371              group (two characters belong to the same group iff their
1372              multibyte forms are the same except for the last byte;
1373              i.e. every 64 characters form a group; U+0000..U+003F,
1374              U+0040..U+007F, U+0080..U+00BF, ...).  */
1375
1376           while (--len >= 0)
1377             {
1378               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1379               int c, translated, inverse;
1380               int in_charlen, charlen;
1381
1382               /* If we got here and the RE flag is set, it's because we're
1383                  dealing with a regexp known to be trivial, so the backslash
1384                  just quotes the next character.  */
1385               if (RE && *base_pat == '\\')
1386                 {
1387                   len--;
1388                   raw_pattern_size--;
1389                   len_byte--;
1390                   base_pat++;
1391                 }
1392
1393               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1394
1395               if (NILP (trt))
1396                 {
1397                   str = base_pat;
1398                   charlen = in_charlen;
1399                 }
1400               else
1401                 {
1402                   /* Translate the character.  */
1403                   TRANSLATE (translated, trt, c);
1404                   charlen = CHAR_STRING (translated, str_base);
1405                   str = str_base;
1406
1407                   /* Check if C has any other case-equivalents.  */
1408                   TRANSLATE (inverse, inverse_trt, c);
1409                   /* If so, check if we can use boyer-moore.  */
1410                   if (c != inverse && boyer_moore_ok)
1411                     {
1412                       /* Check if all equivalents belong to the same
1413                          group of characters.  Note that the check of C
1414                          itself is done by the last iteration.  */
1415                       int this_char_base = -1;
1416
1417                       while (boyer_moore_ok)
1418                         {
1419                           if (ASCII_CHAR_P (inverse))
1420                             {
1421                               if (this_char_base > 0)
1422                                 boyer_moore_ok = 0;
1423                               else
1424                                 this_char_base = 0;
1425                             }
1426                           else if (CHAR_BYTE8_P (inverse))
1427                             /* Boyer-moore search can't handle a
1428                                translation of an eight-bit
1429                                character.  */
1430                             boyer_moore_ok = 0;
1431                           else if (this_char_base < 0)
1432                             {
1433                               this_char_base = inverse & ~0x3F;
1434                               if (char_base < 0)
1435                                 char_base = this_char_base;
1436                               else if (this_char_base != char_base)
1437                                 boyer_moore_ok = 0;
1438                             }
1439                           else if ((inverse & ~0x3F) != this_char_base)
1440                             boyer_moore_ok = 0;
1441                           if (c == inverse)
1442                             break;
1443                           TRANSLATE (inverse, inverse_trt, inverse);
1444                         }
1445                     }
1446                 }
1447
1448               /* Store this character into the translated pattern.  */
1449               memcpy (pat, str, charlen);
1450               pat += charlen;
1451               base_pat += in_charlen;
1452               len_byte -= in_charlen;
1453             }
1454
1455           /* If char_base is still negative we didn't find any translated
1456              non-ASCII characters.  */
1457           if (char_base < 0)
1458             char_base = 0;
1459         }
1460       else
1461         {
1462           /* Unibyte buffer.  */
1463           char_base = 0;
1464           while (--len >= 0)
1465             {
1466               int c, translated, inverse;
1467
1468               /* If we got here and the RE flag is set, it's because we're
1469                  dealing with a regexp known to be trivial, so the backslash
1470                  just quotes the next character.  */
1471               if (RE && *base_pat == '\\')
1472                 {
1473                   len--;
1474                   raw_pattern_size--;
1475                   base_pat++;
1476                 }
1477               c = *base_pat++;
1478               TRANSLATE (translated, trt, c);
1479               *pat++ = translated;
1480               /* Check that none of C's equivalents violates the
1481                  assumptions of boyer_moore.  */
1482               TRANSLATE (inverse, inverse_trt, c);
1483               while (1)
1484                 {
1485                   if (inverse >= 0200)
1486                     {
1487                       boyer_moore_ok = 0;
1488                       break;
1489                     }
1490                   if (c == inverse)
1491                     break;
1492                   TRANSLATE (inverse, inverse_trt, inverse);
1493                 }
1494             }
1495         }
1496
1497       len_byte = pat - patbuf;
1498       pat = base_pat = patbuf;
1499
1500       EMACS_INT result
1501         = (boyer_moore_ok
1502            ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1503                           pos_byte, lim_byte,
1504                           char_base)
1505            : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1506                             pos, pos_byte, lim, lim_byte));
1507       SAFE_FREE ();
1508       return result;
1509     }
1510 }
1511 \f
1512 /* Do a simple string search N times for the string PAT,
1513    whose length is LEN/LEN_BYTE,
1514    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1515    TRT is the translation table.
1516
1517    Return the character position where the match is found.
1518    Otherwise, if M matches remained to be found, return -M.
1519
1520    This kind of search works regardless of what is in PAT and
1521    regardless of what is in TRT.  It is used in cases where
1522    boyer_moore cannot work.  */
1523
1524 static EMACS_INT
1525 simple_search (EMACS_INT n, unsigned char *pat,
1526                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1527                ptrdiff_t pos, ptrdiff_t pos_byte,
1528                ptrdiff_t lim, ptrdiff_t lim_byte)
1529 {
1530   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1531   bool forward = n > 0;
1532   /* Number of buffer bytes matched.  Note that this may be different
1533      from len_byte in a multibyte buffer.  */
1534   ptrdiff_t match_byte = PTRDIFF_MIN;
1535
1536   if (lim > pos && multibyte)
1537     while (n > 0)
1538       {
1539         while (1)
1540           {
1541             /* Try matching at position POS.  */
1542             ptrdiff_t this_pos = pos;
1543             ptrdiff_t this_pos_byte = pos_byte;
1544             ptrdiff_t this_len = len;
1545             unsigned char *p = pat;
1546             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1547               goto stop;
1548
1549             while (this_len > 0)
1550               {
1551                 int charlen, buf_charlen;
1552                 int pat_ch, buf_ch;
1553
1554                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1555                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1556                                                  buf_charlen);
1557                 TRANSLATE (buf_ch, trt, buf_ch);
1558
1559                 if (buf_ch != pat_ch)
1560                   break;
1561
1562                 this_len--;
1563                 p += charlen;
1564
1565                 this_pos_byte += buf_charlen;
1566                 this_pos++;
1567               }
1568
1569             if (this_len == 0)
1570               {
1571                 match_byte = this_pos_byte - pos_byte;
1572                 pos += len;
1573                 pos_byte += match_byte;
1574                 break;
1575               }
1576
1577             INC_BOTH (pos, pos_byte);
1578           }
1579
1580         n--;
1581       }
1582   else if (lim > pos)
1583     while (n > 0)
1584       {
1585         while (1)
1586           {
1587             /* Try matching at position POS.  */
1588             ptrdiff_t this_pos = pos;
1589             ptrdiff_t this_len = len;
1590             unsigned char *p = pat;
1591
1592             if (pos + len > lim)
1593               goto stop;
1594
1595             while (this_len > 0)
1596               {
1597                 int pat_ch = *p++;
1598                 int buf_ch = FETCH_BYTE (this_pos);
1599                 TRANSLATE (buf_ch, trt, buf_ch);
1600
1601                 if (buf_ch != pat_ch)
1602                   break;
1603
1604                 this_len--;
1605                 this_pos++;
1606               }
1607
1608             if (this_len == 0)
1609               {
1610                 match_byte = len;
1611                 pos += len;
1612                 break;
1613               }
1614
1615             pos++;
1616           }
1617
1618         n--;
1619       }
1620   /* Backwards search.  */
1621   else if (lim < pos && multibyte)
1622     while (n < 0)
1623       {
1624         while (1)
1625           {
1626             /* Try matching at position POS.  */
1627             ptrdiff_t this_pos = pos;
1628             ptrdiff_t this_pos_byte = pos_byte;
1629             ptrdiff_t this_len = len;
1630             const unsigned char *p = pat + len_byte;
1631
1632             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1633               goto stop;
1634
1635             while (this_len > 0)
1636               {
1637                 int pat_ch, buf_ch;
1638
1639                 DEC_BOTH (this_pos, this_pos_byte);
1640                 PREV_CHAR_BOUNDARY (p, pat);
1641                 pat_ch = STRING_CHAR (p);
1642                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1643                 TRANSLATE (buf_ch, trt, buf_ch);
1644
1645                 if (buf_ch != pat_ch)
1646                   break;
1647
1648                 this_len--;
1649               }
1650
1651             if (this_len == 0)
1652               {
1653                 match_byte = pos_byte - this_pos_byte;
1654                 pos = this_pos;
1655                 pos_byte = this_pos_byte;
1656                 break;
1657               }
1658
1659             DEC_BOTH (pos, pos_byte);
1660           }
1661
1662         n++;
1663       }
1664   else if (lim < pos)
1665     while (n < 0)
1666       {
1667         while (1)
1668           {
1669             /* Try matching at position POS.  */
1670             ptrdiff_t this_pos = pos - len;
1671             ptrdiff_t this_len = len;
1672             unsigned char *p = pat;
1673
1674             if (this_pos < lim)
1675               goto stop;
1676
1677             while (this_len > 0)
1678               {
1679                 int pat_ch = *p++;
1680                 int buf_ch = FETCH_BYTE (this_pos);
1681                 TRANSLATE (buf_ch, trt, buf_ch);
1682
1683                 if (buf_ch != pat_ch)
1684                   break;
1685                 this_len--;
1686                 this_pos++;
1687               }
1688
1689             if (this_len == 0)
1690               {
1691                 match_byte = len;
1692                 pos -= len;
1693                 break;
1694               }
1695
1696             pos--;
1697           }
1698
1699         n++;
1700       }
1701
1702  stop:
1703   if (n == 0)
1704     {
1705       eassert (match_byte != PTRDIFF_MIN);
1706       if (forward)
1707         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1708       else
1709         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1710
1711       return pos;
1712     }
1713   else if (n > 0)
1714     return -n;
1715   else
1716     return n;
1717 }
1718 \f
1719 /* Do Boyer-Moore search N times for the string BASE_PAT,
1720    whose length is LEN_BYTE,
1721    from buffer position POS_BYTE until LIM_BYTE.
1722    DIRECTION says which direction we search in.
1723    TRT and INVERSE_TRT are translation tables.
1724    Characters in PAT are already translated by TRT.
1725
1726    This kind of search works if all the characters in BASE_PAT that
1727    have nontrivial translation are the same aside from the last byte.
1728    This makes it possible to translate just the last byte of a
1729    character, and do so after just a simple test of the context.
1730    CHAR_BASE is nonzero if there is such a non-ASCII character.
1731
1732    If that criterion is not satisfied, do not call this function.  */
1733
1734 static EMACS_INT
1735 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1736              ptrdiff_t len_byte,
1737              Lisp_Object trt, Lisp_Object inverse_trt,
1738              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1739              int char_base)
1740 {
1741   int direction = ((n > 0) ? 1 : -1);
1742   register ptrdiff_t dirlen;
1743   ptrdiff_t limit;
1744   int stride_for_teases = 0;
1745   int BM_tab[0400];
1746   register unsigned char *cursor, *p_limit;
1747   register ptrdiff_t i;
1748   register int j;
1749   unsigned char *pat, *pat_end;
1750   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1751
1752   unsigned char simple_translate[0400];
1753   /* These are set to the preceding bytes of a byte to be translated
1754      if char_base is nonzero.  As the maximum byte length of a
1755      multibyte character is 5, we have to check at most four previous
1756      bytes.  */
1757   int translate_prev_byte1 = 0;
1758   int translate_prev_byte2 = 0;
1759   int translate_prev_byte3 = 0;
1760
1761   /* The general approach is that we are going to maintain that we know
1762      the first (closest to the present position, in whatever direction
1763      we're searching) character that could possibly be the last
1764      (furthest from present position) character of a valid match.  We
1765      advance the state of our knowledge by looking at that character
1766      and seeing whether it indeed matches the last character of the
1767      pattern.  If it does, we take a closer look.  If it does not, we
1768      move our pointer (to putative last characters) as far as is
1769      logically possible.  This amount of movement, which I call a
1770      stride, will be the length of the pattern if the actual character
1771      appears nowhere in the pattern, otherwise it will be the distance
1772      from the last occurrence of that character to the end of the
1773      pattern.  If the amount is zero we have a possible match.  */
1774
1775   /* Here we make a "mickey mouse" BM table.  The stride of the search
1776      is determined only by the last character of the putative match.
1777      If that character does not match, we will stride the proper
1778      distance to propose a match that superimposes it on the last
1779      instance of a character that matches it (per trt), or misses
1780      it entirely if there is none. */
1781
1782   dirlen = len_byte * direction;
1783
1784   /* Record position after the end of the pattern.  */
1785   pat_end = base_pat + len_byte;
1786   /* BASE_PAT points to a character that we start scanning from.
1787      It is the first character in a forward search,
1788      the last character in a backward search.  */
1789   if (direction < 0)
1790     base_pat = pat_end - 1;
1791
1792   /* A character that does not appear in the pattern induces a
1793      stride equal to the pattern length.  */
1794   for (i = 0; i < 0400; i++)
1795     BM_tab[i] = dirlen;
1796
1797   /* We use this for translation, instead of TRT itself.
1798      We fill this in to handle the characters that actually
1799      occur in the pattern.  Others don't matter anyway!  */
1800   for (i = 0; i < 0400; i++)
1801     simple_translate[i] = i;
1802
1803   if (char_base)
1804     {
1805       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1806          byte following them are the target of translation.  */
1807       unsigned char str[MAX_MULTIBYTE_LENGTH];
1808       int cblen = CHAR_STRING (char_base, str);
1809
1810       translate_prev_byte1 = str[cblen - 2];
1811       if (cblen > 2)
1812         {
1813           translate_prev_byte2 = str[cblen - 3];
1814           if (cblen > 3)
1815             translate_prev_byte3 = str[cblen - 4];
1816         }
1817     }
1818
1819   i = 0;
1820   while (i != dirlen)
1821     {
1822       unsigned char *ptr = base_pat + i;
1823       i += direction;
1824       if (! NILP (trt))
1825         {
1826           /* If the byte currently looking at is the last of a
1827              character to check case-equivalents, set CH to that
1828              character.  An ASCII character and a non-ASCII character
1829              matching with CHAR_BASE are to be checked.  */
1830           int ch = -1;
1831
1832           if (ASCII_CHAR_P (*ptr) || ! multibyte)
1833             ch = *ptr;
1834           else if (char_base
1835                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1836             {
1837               unsigned char *charstart = ptr - 1;
1838
1839               while (! (CHAR_HEAD_P (*charstart)))
1840                 charstart--;
1841               ch = STRING_CHAR (charstart);
1842               if (char_base != (ch & ~0x3F))
1843                 ch = -1;
1844             }
1845
1846           if (ch >= 0200 && multibyte)
1847             j = (ch & 0x3F) | 0200;
1848           else
1849             j = *ptr;
1850
1851           if (i == dirlen)
1852             stride_for_teases = BM_tab[j];
1853
1854           BM_tab[j] = dirlen - i;
1855           /* A translation table is accompanied by its inverse -- see
1856              comment following downcase_table for details.  */
1857           if (ch >= 0)
1858             {
1859               int starting_ch = ch;
1860               int starting_j = j;
1861
1862               while (1)
1863                 {
1864                   TRANSLATE (ch, inverse_trt, ch);
1865                   if (ch >= 0200 && multibyte)
1866                     j = (ch & 0x3F) | 0200;
1867                   else
1868                     j = ch;
1869
1870                   /* For all the characters that map into CH,
1871                      set up simple_translate to map the last byte
1872                      into STARTING_J.  */
1873                   simple_translate[j] = starting_j;
1874                   if (ch == starting_ch)
1875                     break;
1876                   BM_tab[j] = dirlen - i;
1877                 }
1878             }
1879         }
1880       else
1881         {
1882           j = *ptr;
1883
1884           if (i == dirlen)
1885             stride_for_teases = BM_tab[j];
1886           BM_tab[j] = dirlen - i;
1887         }
1888       /* stride_for_teases tells how much to stride if we get a
1889          match on the far character but are subsequently
1890          disappointed, by recording what the stride would have been
1891          for that character if the last character had been
1892          different.  */
1893     }
1894   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1895   /* loop invariant - POS_BYTE points at where last char (first
1896      char if reverse) of pattern would align in a possible match.  */
1897   while (n != 0)
1898     {
1899       ptrdiff_t tail_end;
1900       unsigned char *tail_end_ptr;
1901
1902       /* It's been reported that some (broken) compiler thinks that
1903          Boolean expressions in an arithmetic context are unsigned.
1904          Using an explicit ?1:0 prevents this.  */
1905       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1906           < 0)
1907         return (n * (0 - direction));
1908       /* First we do the part we can by pointers (maybe nothing) */
1909       QUIT;
1910       pat = base_pat;
1911       limit = pos_byte - dirlen + direction;
1912       if (direction > 0)
1913         {
1914           limit = BUFFER_CEILING_OF (limit);
1915           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1916              can take on without hitting edge of buffer or the gap.  */
1917           limit = min (limit, pos_byte + 20000);
1918           limit = min (limit, lim_byte - 1);
1919         }
1920       else
1921         {
1922           limit = BUFFER_FLOOR_OF (limit);
1923           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1924              can take on without hitting edge of buffer or the gap.  */
1925           limit = max (limit, pos_byte - 20000);
1926           limit = max (limit, lim_byte);
1927         }
1928       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1929       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1930
1931       if ((limit - pos_byte) * direction > 20)
1932         {
1933           unsigned char *p2;
1934
1935           p_limit = BYTE_POS_ADDR (limit);
1936           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1937           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1938           while (1)             /* use one cursor setting as long as i can */
1939             {
1940               if (direction > 0) /* worth duplicating */
1941                 {
1942                   while (cursor <= p_limit)
1943                     {
1944                       if (BM_tab[*cursor] == 0)
1945                         goto hit;
1946                       cursor += BM_tab[*cursor];
1947                     }
1948                 }
1949               else
1950                 {
1951                   while (cursor >= p_limit)
1952                     {
1953                       if (BM_tab[*cursor] == 0)
1954                         goto hit;
1955                       cursor += BM_tab[*cursor];
1956                     }
1957                 }
1958               /* If you are here, cursor is beyond the end of the
1959                  searched region.  You fail to match within the
1960                  permitted region and would otherwise try a character
1961                  beyond that region.  */
1962               break;
1963
1964             hit:
1965               i = dirlen - direction;
1966               if (! NILP (trt))
1967                 {
1968                   while ((i -= direction) + direction != 0)
1969                     {
1970                       int ch;
1971                       cursor -= direction;
1972                       /* Translate only the last byte of a character.  */
1973                       if (! multibyte
1974                           || ((cursor == tail_end_ptr
1975                                || CHAR_HEAD_P (cursor[1]))
1976                               && (CHAR_HEAD_P (cursor[0])
1977                                   /* Check if this is the last byte of
1978                                      a translatable character.  */
1979                                   || (translate_prev_byte1 == cursor[-1]
1980                                       && (CHAR_HEAD_P (translate_prev_byte1)
1981                                           || (translate_prev_byte2 == cursor[-2]
1982                                               && (CHAR_HEAD_P (translate_prev_byte2)
1983                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1984                         ch = simple_translate[*cursor];
1985                       else
1986                         ch = *cursor;
1987                       if (pat[i] != ch)
1988                         break;
1989                     }
1990                 }
1991               else
1992                 {
1993                   while ((i -= direction) + direction != 0)
1994                     {
1995                       cursor -= direction;
1996                       if (pat[i] != *cursor)
1997                         break;
1998                     }
1999                 }
2000               cursor += dirlen - i - direction; /* fix cursor */
2001               if (i + direction == 0)
2002                 {
2003                   ptrdiff_t position, start, end;
2004
2005                   cursor -= direction;
2006
2007                   position = pos_byte + cursor - p2 + ((direction > 0)
2008                                                        ? 1 - len_byte : 0);
2009                   set_search_regs (position, len_byte);
2010
2011                   if (NILP (Vinhibit_changing_match_data))
2012                     {
2013                       start = search_regs.start[0];
2014                       end = search_regs.end[0];
2015                     }
2016                   else
2017                     /* If Vinhibit_changing_match_data is non-nil,
2018                        search_regs will not be changed.  So let's
2019                        compute start and end here.  */
2020                     {
2021                       start = BYTE_TO_CHAR (position);
2022                       end = BYTE_TO_CHAR (position + len_byte);
2023                     }
2024
2025                   if ((n -= direction) != 0)
2026                     cursor += dirlen; /* to resume search */
2027                   else
2028                     return direction > 0 ? end : start;
2029                 }
2030               else
2031                 cursor += stride_for_teases; /* <sigh> we lose -  */
2032             }
2033           pos_byte += cursor - p2;
2034         }
2035       else
2036         /* Now we'll pick up a clump that has to be done the hard
2037            way because it covers a discontinuity.  */
2038         {
2039           limit = ((direction > 0)
2040                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2041                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2042           limit = ((direction > 0)
2043                    ? min (limit + len_byte, lim_byte - 1)
2044                    : max (limit - len_byte, lim_byte));
2045           /* LIMIT is now the last value POS_BYTE can have
2046              and still be valid for a possible match.  */
2047           while (1)
2048             {
2049               /* This loop can be coded for space rather than
2050                  speed because it will usually run only once.
2051                  (the reach is at most len + 21, and typically
2052                  does not exceed len).  */
2053               while ((limit - pos_byte) * direction >= 0)
2054                 {
2055                   int ch = FETCH_BYTE (pos_byte);
2056                   if (BM_tab[ch] == 0)
2057                     goto hit2;
2058                   pos_byte += BM_tab[ch];
2059                 }
2060               break;    /* ran off the end */
2061
2062             hit2:
2063               /* Found what might be a match.  */
2064               i = dirlen - direction;
2065               while ((i -= direction) + direction != 0)
2066                 {
2067                   int ch;
2068                   unsigned char *ptr;
2069                   pos_byte -= direction;
2070                   ptr = BYTE_POS_ADDR (pos_byte);
2071                   /* Translate only the last byte of a character.  */
2072                   if (! multibyte
2073                       || ((ptr == tail_end_ptr
2074                            || CHAR_HEAD_P (ptr[1]))
2075                           && (CHAR_HEAD_P (ptr[0])
2076                               /* Check if this is the last byte of a
2077                                  translatable character.  */
2078                               || (translate_prev_byte1 == ptr[-1]
2079                                   && (CHAR_HEAD_P (translate_prev_byte1)
2080                                       || (translate_prev_byte2 == ptr[-2]
2081                                           && (CHAR_HEAD_P (translate_prev_byte2)
2082                                               || translate_prev_byte3 == ptr[-3])))))))
2083                     ch = simple_translate[*ptr];
2084                   else
2085                     ch = *ptr;
2086                   if (pat[i] != ch)
2087                     break;
2088                 }
2089               /* Above loop has moved POS_BYTE part or all the way
2090                  back to the first pos (last pos if reverse).
2091                  Set it once again at the last (first if reverse) char.  */
2092               pos_byte += dirlen - i - direction;
2093               if (i + direction == 0)
2094                 {
2095                   ptrdiff_t position, start, end;
2096                   pos_byte -= direction;
2097
2098                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2099                   set_search_regs (position, len_byte);
2100
2101                   if (NILP (Vinhibit_changing_match_data))
2102                     {
2103                       start = search_regs.start[0];
2104                       end = search_regs.end[0];
2105                     }
2106                   else
2107                     /* If Vinhibit_changing_match_data is non-nil,
2108                        search_regs will not be changed.  So let's
2109                        compute start and end here.  */
2110                     {
2111                       start = BYTE_TO_CHAR (position);
2112                       end = BYTE_TO_CHAR (position + len_byte);
2113                     }
2114
2115                   if ((n -= direction) != 0)
2116                     pos_byte += dirlen; /* to resume search */
2117                   else
2118                     return direction > 0 ? end : start;
2119                 }
2120               else
2121                 pos_byte += stride_for_teases;
2122             }
2123           }
2124       /* We have done one clump.  Can we continue? */
2125       if ((lim_byte - pos_byte) * direction < 0)
2126         return ((0 - n) * direction);
2127     }
2128   return BYTE_TO_CHAR (pos_byte);
2129 }
2130
2131 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2132    for the overall match just found in the current buffer.
2133    Also clear out the match data for registers 1 and up.  */
2134
2135 static void
2136 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2137 {
2138   ptrdiff_t i;
2139
2140   if (!NILP (Vinhibit_changing_match_data))
2141     return;
2142
2143   /* Make sure we have registers in which to store
2144      the match position.  */
2145   if (search_regs.num_regs == 0)
2146     {
2147       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2148       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2149       search_regs.num_regs = 2;
2150     }
2151
2152   /* Clear out the other registers.  */
2153   for (i = 1; i < search_regs.num_regs; i++)
2154     {
2155       search_regs.start[i] = -1;
2156       search_regs.end[i] = -1;
2157     }
2158
2159   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2160   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2161   XSETBUFFER (last_thing_searched, current_buffer);
2162 }
2163 \f
2164 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2165        "MSearch backward: ",
2166        doc: /* Search backward from point for STRING.
2167 Set point to the beginning of the occurrence found, and return point.
2168 An optional second argument bounds the search; it is a buffer position.
2169 The match found must not extend before that position.
2170 Optional third argument, if t, means if fail just return nil (no error).
2171  If not nil and not t, position at limit of search and return nil.
2172 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2173  successive occurrences.  If COUNT is negative, search forward,
2174  instead of backward, for -COUNT occurrences.
2175
2176 Search case-sensitivity is determined by the value of the variable
2177 `case-fold-search', which see.
2178
2179 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2180   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2181 {
2182   return search_command (string, bound, noerror, count, -1, 0, 0);
2183 }
2184
2185 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2186        doc: /* Search forward from point for STRING.
2187 Set point to the end of the occurrence found, and return point.
2188 An optional second argument bounds the search; it is a buffer position.
2189 The match found must not extend after that position.  A value of nil is
2190   equivalent to (point-max).
2191 Optional third argument, if t, means if fail just return nil (no error).
2192   If not nil and not t, move to limit of search and return nil.
2193 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2194  successive occurrences.  If COUNT is negative, search backward,
2195  instead of forward, for -COUNT occurrences.
2196
2197 Search case-sensitivity is determined by the value of the variable
2198 `case-fold-search', which see.
2199
2200 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2201   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2202 {
2203   return search_command (string, bound, noerror, count, 1, 0, 0);
2204 }
2205
2206 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2207        "sRE search backward: ",
2208        doc: /* Search backward from point for match for regular expression REGEXP.
2209 Set point to the beginning of the match, and return point.
2210 The match found is the one starting last in the buffer
2211 and yet ending before the origin of the search.
2212 An optional second argument bounds the search; it is a buffer position.
2213 The match found must start at or after that position.
2214 Optional third argument, if t, means if fail just return nil (no error).
2215   If not nil and not t, move to limit of search and return nil.
2216 Optional fourth argument is repeat count--search for successive occurrences.
2217
2218 Search case-sensitivity is determined by the value of the variable
2219 `case-fold-search', which see.
2220
2221 See also the functions `match-beginning', `match-end', `match-string',
2222 and `replace-match'.  */)
2223   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2224 {
2225   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2226 }
2227
2228 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2229        "sRE search: ",
2230        doc: /* Search forward from point for regular expression REGEXP.
2231 Set point to the end of the occurrence found, and return point.
2232 An optional second argument bounds the search; it is a buffer position.
2233 The match found must not extend after that position.
2234 Optional third argument, if t, means if fail just return nil (no error).
2235   If not nil and not t, move to limit of search and return nil.
2236 Optional fourth argument is repeat count--search for successive occurrences.
2237
2238 Search case-sensitivity is determined by the value of the variable
2239 `case-fold-search', which see.
2240
2241 See also the functions `match-beginning', `match-end', `match-string',
2242 and `replace-match'.  */)
2243   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2244 {
2245   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2246 }
2247
2248 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2249        "sPosix search backward: ",
2250        doc: /* Search backward from point for match for regular expression REGEXP.
2251 Find the longest match in accord with Posix regular expression rules.
2252 Set point to the beginning of the match, and return point.
2253 The match found is the one starting last in the buffer
2254 and yet ending before the origin of the search.
2255 An optional second argument bounds the search; it is a buffer position.
2256 The match found must start at or after that position.
2257 Optional third argument, if t, means if fail just return nil (no error).
2258   If not nil and not t, move to limit of search and return nil.
2259 Optional fourth argument is repeat count--search for successive occurrences.
2260
2261 Search case-sensitivity is determined by the value of the variable
2262 `case-fold-search', which see.
2263
2264 See also the functions `match-beginning', `match-end', `match-string',
2265 and `replace-match'.  */)
2266   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2267 {
2268   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2269 }
2270
2271 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2272        "sPosix search: ",
2273        doc: /* Search forward from point for regular expression REGEXP.
2274 Find the longest match in accord with Posix regular expression rules.
2275 Set point to the end of the occurrence found, and return point.
2276 An optional second argument bounds the search; it is a buffer position.
2277 The match found must not extend after that position.
2278 Optional third argument, if t, means if fail just return nil (no error).
2279   If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences.
2281
2282 Search case-sensitivity is determined by the value of the variable
2283 `case-fold-search', which see.
2284
2285 See also the functions `match-beginning', `match-end', `match-string',
2286 and `replace-match'.  */)
2287   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2288 {
2289   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2290 }
2291 \f
2292 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2293        doc: /* Replace text matched by last search with NEWTEXT.
2294 Leave point at the end of the replacement text.
2295
2296 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2297 the replacement text.  Otherwise, maybe capitalize the whole text, or
2298 maybe just word initials, based on the replaced text.  If the replaced
2299 text has only capital letters and has at least one multiletter word,
2300 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2301 in the replaced text, capitalize each word in NEWTEXT.
2302
2303 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2304 Otherwise treat `\\' as special:
2305   `\\&' in NEWTEXT means substitute original matched text.
2306   `\\N' means substitute what matched the Nth `\\(...\\)'.
2307        If Nth parens didn't match, substitute nothing.
2308   `\\\\' means insert one `\\'.
2309   `\\?' is treated literally
2310        (for compatibility with `query-replace-regexp').
2311   Any other character following `\\' signals an error.
2312 Case conversion does not apply to these substitutions.
2313
2314 If optional fourth argument STRING is non-nil, it should be a string
2315 to act on; this should be the string on which the previous match was
2316 done via `string-match'.  In this case, `replace-match' creates and
2317 returns a new string, made by copying STRING and replacing the part of
2318 STRING that was matched (the original STRING itself is not altered).
2319
2320 The optional fifth argument SUBEXP specifies a subexpression;
2321 it says to replace just that subexpression with NEWTEXT,
2322 rather than replacing the entire matched text.
2323 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2324 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2325 NEWTEXT in place of subexp N.
2326 This is useful only after a regular expression search or match,
2327 since only regular expressions have distinguished subexpressions.  */)
2328   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2329 {
2330   enum { nochange, all_caps, cap_initial } case_action;
2331   ptrdiff_t pos, pos_byte;
2332   bool some_multiletter_word;
2333   bool some_lowercase;
2334   bool some_uppercase;
2335   bool some_nonuppercase_initial;
2336   int c, prevc;
2337   ptrdiff_t sub;
2338   ptrdiff_t opoint, newpoint;
2339
2340   CHECK_STRING (newtext);
2341
2342   if (! NILP (string))
2343     CHECK_STRING (string);
2344
2345   case_action = nochange;       /* We tried an initialization */
2346                                 /* but some C compilers blew it */
2347
2348   if (search_regs.num_regs <= 0)
2349     error ("`replace-match' called before any match found");
2350
2351   if (NILP (subexp))
2352     sub = 0;
2353   else
2354     {
2355       CHECK_NUMBER (subexp);
2356       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2357         args_out_of_range (subexp, make_number (search_regs.num_regs));
2358       sub = XINT (subexp);
2359     }
2360
2361   if (NILP (string))
2362     {
2363       if (search_regs.start[sub] < BEGV
2364           || search_regs.start[sub] > search_regs.end[sub]
2365           || search_regs.end[sub] > ZV)
2366         args_out_of_range (make_number (search_regs.start[sub]),
2367                            make_number (search_regs.end[sub]));
2368     }
2369   else
2370     {
2371       if (search_regs.start[sub] < 0
2372           || search_regs.start[sub] > search_regs.end[sub]
2373           || search_regs.end[sub] > SCHARS (string))
2374         args_out_of_range (make_number (search_regs.start[sub]),
2375                            make_number (search_regs.end[sub]));
2376     }
2377
2378   if (NILP (fixedcase))
2379     {
2380       /* Decide how to casify by examining the matched text. */
2381       ptrdiff_t last;
2382
2383       pos = search_regs.start[sub];
2384       last = search_regs.end[sub];
2385
2386       if (NILP (string))
2387         pos_byte = CHAR_TO_BYTE (pos);
2388       else
2389         pos_byte = string_char_to_byte (string, pos);
2390
2391       prevc = '\n';
2392       case_action = all_caps;
2393
2394       /* some_multiletter_word is set nonzero if any original word
2395          is more than one letter long. */
2396       some_multiletter_word = 0;
2397       some_lowercase = 0;
2398       some_nonuppercase_initial = 0;
2399       some_uppercase = 0;
2400
2401       while (pos < last)
2402         {
2403           if (NILP (string))
2404             {
2405               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2406               INC_BOTH (pos, pos_byte);
2407             }
2408           else
2409             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2410
2411           if (lowercasep (c))
2412             {
2413               /* Cannot be all caps if any original char is lower case */
2414
2415               some_lowercase = 1;
2416               if (SYNTAX (prevc) != Sword)
2417                 some_nonuppercase_initial = 1;
2418               else
2419                 some_multiletter_word = 1;
2420             }
2421           else if (uppercasep (c))
2422             {
2423               some_uppercase = 1;
2424               if (SYNTAX (prevc) != Sword)
2425                 ;
2426               else
2427                 some_multiletter_word = 1;
2428             }
2429           else
2430             {
2431               /* If the initial is a caseless word constituent,
2432                  treat that like a lowercase initial.  */
2433               if (SYNTAX (prevc) != Sword)
2434                 some_nonuppercase_initial = 1;
2435             }
2436
2437           prevc = c;
2438         }
2439
2440       /* Convert to all caps if the old text is all caps
2441          and has at least one multiletter word.  */
2442       if (! some_lowercase && some_multiletter_word)
2443         case_action = all_caps;
2444       /* Capitalize each word, if the old text has all capitalized words.  */
2445       else if (!some_nonuppercase_initial && some_multiletter_word)
2446         case_action = cap_initial;
2447       else if (!some_nonuppercase_initial && some_uppercase)
2448         /* Should x -> yz, operating on X, give Yz or YZ?
2449            We'll assume the latter.  */
2450         case_action = all_caps;
2451       else
2452         case_action = nochange;
2453     }
2454
2455   /* Do replacement in a string.  */
2456   if (!NILP (string))
2457     {
2458       Lisp_Object before, after;
2459
2460       before = Fsubstring (string, make_number (0),
2461                            make_number (search_regs.start[sub]));
2462       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2463
2464       /* Substitute parts of the match into NEWTEXT
2465          if desired.  */
2466       if (NILP (literal))
2467         {
2468           ptrdiff_t lastpos = 0;
2469           ptrdiff_t lastpos_byte = 0;
2470           /* We build up the substituted string in ACCUM.  */
2471           Lisp_Object accum;
2472           Lisp_Object middle;
2473           ptrdiff_t length = SBYTES (newtext);
2474
2475           accum = Qnil;
2476
2477           for (pos_byte = 0, pos = 0; pos_byte < length;)
2478             {
2479               ptrdiff_t substart = -1;
2480               ptrdiff_t subend = 0;
2481               bool delbackslash = 0;
2482
2483               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2484
2485               if (c == '\\')
2486                 {
2487                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2488
2489                   if (c == '&')
2490                     {
2491                       substart = search_regs.start[sub];
2492                       subend = search_regs.end[sub];
2493                     }
2494                   else if (c >= '1' && c <= '9')
2495                     {
2496                       if (c - '0' < search_regs.num_regs
2497                           && search_regs.start[c - '0'] >= 0)
2498                         {
2499                           substart = search_regs.start[c - '0'];
2500                           subend = search_regs.end[c - '0'];
2501                         }
2502                       else
2503                         {
2504                           /* If that subexp did not match,
2505                              replace \\N with nothing.  */
2506                           substart = 0;
2507                           subend = 0;
2508                         }
2509                     }
2510                   else if (c == '\\')
2511                     delbackslash = 1;
2512                   else if (c != '?')
2513                     error ("Invalid use of `\\' in replacement text");
2514                 }
2515               if (substart >= 0)
2516                 {
2517                   if (pos - 2 != lastpos)
2518                     middle = substring_both (newtext, lastpos,
2519                                              lastpos_byte,
2520                                              pos - 2, pos_byte - 2);
2521                   else
2522                     middle = Qnil;
2523                   accum = concat3 (accum, middle,
2524                                    Fsubstring (string,
2525                                                make_number (substart),
2526                                                make_number (subend)));
2527                   lastpos = pos;
2528                   lastpos_byte = pos_byte;
2529                 }
2530               else if (delbackslash)
2531                 {
2532                   middle = substring_both (newtext, lastpos,
2533                                            lastpos_byte,
2534                                            pos - 1, pos_byte - 1);
2535
2536                   accum = concat2 (accum, middle);
2537                   lastpos = pos;
2538                   lastpos_byte = pos_byte;
2539                 }
2540             }
2541
2542           if (pos != lastpos)
2543             middle = substring_both (newtext, lastpos,
2544                                      lastpos_byte,
2545                                      pos, pos_byte);
2546           else
2547             middle = Qnil;
2548
2549           newtext = concat2 (accum, middle);
2550         }
2551
2552       /* Do case substitution in NEWTEXT if desired.  */
2553       if (case_action == all_caps)
2554         newtext = Fupcase (newtext);
2555       else if (case_action == cap_initial)
2556         newtext = Fupcase_initials (newtext);
2557
2558       return concat3 (before, newtext, after);
2559     }
2560
2561   /* Record point, then move (quietly) to the start of the match.  */
2562   if (PT >= search_regs.end[sub])
2563     opoint = PT - ZV;
2564   else if (PT > search_regs.start[sub])
2565     opoint = search_regs.end[sub] - ZV;
2566   else
2567     opoint = PT;
2568
2569   /* If we want non-literal replacement,
2570      perform substitution on the replacement string.  */
2571   if (NILP (literal))
2572     {
2573       ptrdiff_t length = SBYTES (newtext);
2574       unsigned char *substed;
2575       ptrdiff_t substed_alloc_size, substed_len;
2576       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2577       bool str_multibyte = STRING_MULTIBYTE (newtext);
2578       bool really_changed = 0;
2579
2580       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2581                             ? length * 2 + 100
2582                             : STRING_BYTES_BOUND);
2583       substed = xmalloc (substed_alloc_size);
2584       substed_len = 0;
2585
2586       /* Go thru NEWTEXT, producing the actual text to insert in
2587          SUBSTED while adjusting multibyteness to that of the current
2588          buffer.  */
2589
2590       for (pos_byte = 0, pos = 0; pos_byte < length;)
2591         {
2592           unsigned char str[MAX_MULTIBYTE_LENGTH];
2593           const unsigned char *add_stuff = NULL;
2594           ptrdiff_t add_len = 0;
2595           ptrdiff_t idx = -1;
2596
2597           if (str_multibyte)
2598             {
2599               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2600               if (!buf_multibyte)
2601                 c = CHAR_TO_BYTE8 (c);
2602             }
2603           else
2604             {
2605               /* Note that we don't have to increment POS.  */
2606               c = SREF (newtext, pos_byte++);
2607               if (buf_multibyte)
2608                 MAKE_CHAR_MULTIBYTE (c);
2609             }
2610
2611           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2612              or set IDX to a match index, which means put that part
2613              of the buffer text into SUBSTED.  */
2614
2615           if (c == '\\')
2616             {
2617               really_changed = 1;
2618
2619               if (str_multibyte)
2620                 {
2621                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2622                                                       pos, pos_byte);
2623                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2624                     c = CHAR_TO_BYTE8 (c);
2625                 }
2626               else
2627                 {
2628                   c = SREF (newtext, pos_byte++);
2629                   if (buf_multibyte)
2630                     MAKE_CHAR_MULTIBYTE (c);
2631                 }
2632
2633               if (c == '&')
2634                 idx = sub;
2635               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2636                 {
2637                   if (search_regs.start[c - '0'] >= 1)
2638                     idx = c - '0';
2639                 }
2640               else if (c == '\\')
2641                 add_len = 1, add_stuff = (unsigned char *) "\\";
2642               else
2643                 {
2644                   xfree (substed);
2645                   error ("Invalid use of `\\' in replacement text");
2646                 }
2647             }
2648           else
2649             {
2650               add_len = CHAR_STRING (c, str);
2651               add_stuff = str;
2652             }
2653
2654           /* If we want to copy part of a previous match,
2655              set up ADD_STUFF and ADD_LEN to point to it.  */
2656           if (idx >= 0)
2657             {
2658               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2659               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2660               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2661                 move_gap_both (search_regs.start[idx], begbyte);
2662               add_stuff = BYTE_POS_ADDR (begbyte);
2663             }
2664
2665           /* Now the stuff we want to add to SUBSTED
2666              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2667
2668           /* Make sure SUBSTED is big enough.  */
2669           if (substed_alloc_size - substed_len < add_len)
2670             substed =
2671               xpalloc (substed, &substed_alloc_size,
2672                        add_len - (substed_alloc_size - substed_len),
2673                        STRING_BYTES_BOUND, 1);
2674
2675           /* Now add to the end of SUBSTED.  */
2676           if (add_stuff)
2677             {
2678               memcpy (substed + substed_len, add_stuff, add_len);
2679               substed_len += add_len;
2680             }
2681         }
2682
2683       if (really_changed)
2684         newtext = make_specified_string ((const char *) substed, -1,
2685                                          substed_len, buf_multibyte);
2686       xfree (substed);
2687     }
2688
2689   /* Replace the old text with the new in the cleanest possible way.  */
2690   replace_range (search_regs.start[sub], search_regs.end[sub],
2691                  newtext, 1, 0, 1);
2692   newpoint = search_regs.start[sub] + SCHARS (newtext);
2693
2694   if (case_action == all_caps)
2695     Fupcase_region (make_number (search_regs.start[sub]),
2696                     make_number (newpoint));
2697   else if (case_action == cap_initial)
2698     Fupcase_initials_region (make_number (search_regs.start[sub]),
2699                              make_number (newpoint));
2700
2701   /* Adjust search data for this change.  */
2702   {
2703     ptrdiff_t oldend = search_regs.end[sub];
2704     ptrdiff_t oldstart = search_regs.start[sub];
2705     ptrdiff_t change = newpoint - search_regs.end[sub];
2706     ptrdiff_t i;
2707
2708     for (i = 0; i < search_regs.num_regs; i++)
2709       {
2710         if (search_regs.start[i] >= oldend)
2711           search_regs.start[i] += change;
2712         else if (search_regs.start[i] > oldstart)
2713           search_regs.start[i] = oldstart;
2714         if (search_regs.end[i] >= oldend)
2715           search_regs.end[i] += change;
2716         else if (search_regs.end[i] > oldstart)
2717           search_regs.end[i] = oldstart;
2718       }
2719   }
2720
2721   /* Put point back where it was in the text.  */
2722   if (opoint <= 0)
2723     TEMP_SET_PT (opoint + ZV);
2724   else
2725     TEMP_SET_PT (opoint);
2726
2727   /* Now move point "officially" to the start of the inserted replacement.  */
2728   move_if_not_intangible (newpoint);
2729
2730   return Qnil;
2731 }
2732 \f
2733 static Lisp_Object
2734 match_limit (Lisp_Object num, bool beginningp)
2735 {
2736   EMACS_INT n;
2737
2738   CHECK_NUMBER (num);
2739   n = XINT (num);
2740   if (n < 0)
2741     args_out_of_range (num, make_number (0));
2742   if (search_regs.num_regs <= 0)
2743     error ("No match data, because no search succeeded");
2744   if (n >= search_regs.num_regs
2745       || search_regs.start[n] < 0)
2746     return Qnil;
2747   return (make_number ((beginningp) ? search_regs.start[n]
2748                                     : search_regs.end[n]));
2749 }
2750
2751 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2752        doc: /* Return position of start of text matched by last search.
2753 SUBEXP, a number, specifies which parenthesized expression in the last
2754   regexp.
2755 Value is nil if SUBEXPth pair didn't match, or there were less than
2756   SUBEXP pairs.
2757 Zero means the entire text matched by the whole regexp or whole string.  */)
2758   (Lisp_Object subexp)
2759 {
2760   return match_limit (subexp, 1);
2761 }
2762
2763 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2764        doc: /* Return position of end of text matched by last search.
2765 SUBEXP, a number, specifies which parenthesized expression in the last
2766   regexp.
2767 Value is nil if SUBEXPth pair didn't match, or there were less than
2768   SUBEXP pairs.
2769 Zero means the entire text matched by the whole regexp or whole string.  */)
2770   (Lisp_Object subexp)
2771 {
2772   return match_limit (subexp, 0);
2773 }
2774
2775 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2776        doc: /* Return a list containing all info on what the last search matched.
2777 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2778 All the elements are markers or nil (nil if the Nth pair didn't match)
2779 if the last match was on a buffer; integers or nil if a string was matched.
2780 Use `set-match-data' to reinstate the data in this list.
2781
2782 If INTEGERS (the optional first argument) is non-nil, always use
2783 integers \(rather than markers) to represent buffer positions.  In
2784 this case, and if the last match was in a buffer, the buffer will get
2785 stored as one additional element at the end of the list.
2786
2787 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2788 enough to hold all the values, and if INTEGERS is non-nil, no consing
2789 is done.
2790
2791 If optional third arg RESEAT is non-nil, any previous markers on the
2792 REUSE list will be modified to point to nowhere.
2793
2794 Return value is undefined if the last search failed.  */)
2795   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2796 {
2797   Lisp_Object tail, prev;
2798   Lisp_Object *data;
2799   ptrdiff_t i, len;
2800
2801   if (!NILP (reseat))
2802     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2803       if (MARKERP (XCAR (tail)))
2804         {
2805           unchain_marker (XMARKER (XCAR (tail)));
2806           XSETCAR (tail, Qnil);
2807         }
2808
2809   if (NILP (last_thing_searched))
2810     return Qnil;
2811
2812   prev = Qnil;
2813
2814   USE_SAFE_ALLOCA;
2815   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2816
2817   len = 0;
2818   for (i = 0; i < search_regs.num_regs; i++)
2819     {
2820       ptrdiff_t start = search_regs.start[i];
2821       if (start >= 0)
2822         {
2823           if (EQ (last_thing_searched, Qt)
2824               || ! NILP (integers))
2825             {
2826               XSETFASTINT (data[2 * i], start);
2827               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2828             }
2829           else if (BUFFERP (last_thing_searched))
2830             {
2831               data[2 * i] = Fmake_marker ();
2832               Fset_marker (data[2 * i],
2833                            make_number (start),
2834                            last_thing_searched);
2835               data[2 * i + 1] = Fmake_marker ();
2836               Fset_marker (data[2 * i + 1],
2837                            make_number (search_regs.end[i]),
2838                            last_thing_searched);
2839             }
2840           else
2841             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2842             emacs_abort ();
2843
2844           len = 2 * i + 2;
2845         }
2846       else
2847         data[2 * i] = data[2 * i + 1] = Qnil;
2848     }
2849
2850   if (BUFFERP (last_thing_searched) && !NILP (integers))
2851     {
2852       data[len] = last_thing_searched;
2853       len++;
2854     }
2855
2856   /* If REUSE is not usable, cons up the values and return them.  */
2857   if (! CONSP (reuse))
2858     reuse = Flist (len, data);
2859   else
2860     {
2861       /* If REUSE is a list, store as many value elements as will fit
2862          into the elements of REUSE.  */
2863       for (i = 0, tail = reuse; CONSP (tail);
2864            i++, tail = XCDR (tail))
2865         {
2866           if (i < len)
2867             XSETCAR (tail, data[i]);
2868           else
2869             XSETCAR (tail, Qnil);
2870           prev = tail;
2871         }
2872
2873       /* If we couldn't fit all value elements into REUSE,
2874          cons up the rest of them and add them to the end of REUSE.  */
2875       if (i < len)
2876         XSETCDR (prev, Flist (len - i, data + i));
2877     }
2878
2879   SAFE_FREE ();
2880   return reuse;
2881 }
2882
2883 /* We used to have an internal use variant of `reseat' described as:
2884
2885       If RESEAT is `evaporate', put the markers back on the free list
2886       immediately.  No other references to the markers must exist in this
2887       case, so it is used only internally on the unwind stack and
2888       save-match-data from Lisp.
2889
2890    But it was ill-conceived: those supposedly-internal markers get exposed via
2891    the undo-list, so freeing them here is unsafe.  */
2892
2893 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2894        doc: /* Set internal data on last search match from elements of LIST.
2895 LIST should have been created by calling `match-data' previously.
2896
2897 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2898   (register Lisp_Object list, Lisp_Object reseat)
2899 {
2900   ptrdiff_t i;
2901   register Lisp_Object marker;
2902
2903   if (running_asynch_code)
2904     save_search_regs ();
2905
2906   CHECK_LIST (list);
2907
2908   /* Unless we find a marker with a buffer or an explicit buffer
2909      in LIST, assume that this match data came from a string.  */
2910   last_thing_searched = Qt;
2911
2912   /* Allocate registers if they don't already exist.  */
2913   {
2914     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2915
2916     if (length > search_regs.num_regs)
2917       {
2918         ptrdiff_t num_regs = search_regs.num_regs;
2919         if (PTRDIFF_MAX < length)
2920           memory_full (SIZE_MAX);
2921         search_regs.start =
2922           xpalloc (search_regs.start, &num_regs, length - num_regs,
2923                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2924         search_regs.end =
2925           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2926
2927         for (i = search_regs.num_regs; i < num_regs; i++)
2928           search_regs.start[i] = -1;
2929
2930         search_regs.num_regs = num_regs;
2931       }
2932
2933     for (i = 0; CONSP (list); i++)
2934       {
2935         marker = XCAR (list);
2936         if (BUFFERP (marker))
2937           {
2938             last_thing_searched = marker;
2939             break;
2940           }
2941         if (i >= length)
2942           break;
2943         if (NILP (marker))
2944           {
2945             search_regs.start[i] = -1;
2946             list = XCDR (list);
2947           }
2948         else
2949           {
2950             Lisp_Object from;
2951             Lisp_Object m;
2952
2953             m = marker;
2954             if (MARKERP (marker))
2955               {
2956                 if (XMARKER (marker)->buffer == 0)
2957                   XSETFASTINT (marker, 0);
2958                 else
2959                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2960               }
2961
2962             CHECK_NUMBER_COERCE_MARKER (marker);
2963             from = marker;
2964
2965             if (!NILP (reseat) && MARKERP (m))
2966               {
2967                 unchain_marker (XMARKER (m));
2968                 XSETCAR (list, Qnil);
2969               }
2970
2971             if ((list = XCDR (list), !CONSP (list)))
2972               break;
2973
2974             m = marker = XCAR (list);
2975
2976             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2977               XSETFASTINT (marker, 0);
2978
2979             CHECK_NUMBER_COERCE_MARKER (marker);
2980             if ((XINT (from) < 0
2981                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2982                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2983                 && (XINT (marker) < 0
2984                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2985                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2986               {
2987                 search_regs.start[i] = XINT (from);
2988                 search_regs.end[i] = XINT (marker);
2989               }
2990             else
2991               {
2992                 search_regs.start[i] = -1;
2993               }
2994
2995             if (!NILP (reseat) && MARKERP (m))
2996               {
2997                 unchain_marker (XMARKER (m));
2998                 XSETCAR (list, Qnil);
2999               }
3000           }
3001         list = XCDR (list);
3002       }
3003
3004     for (; i < search_regs.num_regs; i++)
3005       search_regs.start[i] = -1;
3006   }
3007
3008   return Qnil;
3009 }
3010
3011 /* If true the match data have been saved in saved_search_regs
3012    during the execution of a sentinel or filter. */
3013 static bool search_regs_saved;
3014 static struct re_registers saved_search_regs;
3015 static Lisp_Object saved_last_thing_searched;
3016
3017 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3018    if asynchronous code (filter or sentinel) is running. */
3019 static void
3020 save_search_regs (void)
3021 {
3022   if (!search_regs_saved)
3023     {
3024       saved_search_regs.num_regs = search_regs.num_regs;
3025       saved_search_regs.start = search_regs.start;
3026       saved_search_regs.end = search_regs.end;
3027       saved_last_thing_searched = last_thing_searched;
3028       last_thing_searched = Qnil;
3029       search_regs.num_regs = 0;
3030       search_regs.start = 0;
3031       search_regs.end = 0;
3032
3033       search_regs_saved = 1;
3034     }
3035 }
3036
3037 /* Called upon exit from filters and sentinels. */
3038 void
3039 restore_search_regs (void)
3040 {
3041   if (search_regs_saved)
3042     {
3043       if (search_regs.num_regs > 0)
3044         {
3045           xfree (search_regs.start);
3046           xfree (search_regs.end);
3047         }
3048       search_regs.num_regs = saved_search_regs.num_regs;
3049       search_regs.start = saved_search_regs.start;
3050       search_regs.end = saved_search_regs.end;
3051       last_thing_searched = saved_last_thing_searched;
3052       saved_last_thing_searched = Qnil;
3053       search_regs_saved = 0;
3054     }
3055 }
3056
3057 static void
3058 unwind_set_match_data (Lisp_Object list)
3059 {
3060   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3061   Fset_match_data (list, Qt);
3062 }
3063
3064 /* Called to unwind protect the match data.  */
3065 void
3066 record_unwind_save_match_data (void)
3067 {
3068   record_unwind_protect (unwind_set_match_data,
3069                          Fmatch_data (Qnil, Qnil, Qnil));
3070 }
3071
3072 /* Quote a string to deactivate reg-expr chars */
3073
3074 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3075        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3076   (Lisp_Object string)
3077 {
3078   char *in, *out, *end;
3079   char *temp;
3080   ptrdiff_t backslashes_added = 0;
3081
3082   CHECK_STRING (string);
3083
3084   USE_SAFE_ALLOCA;
3085   SAFE_NALLOCA (temp, 2, SBYTES (string));
3086
3087   /* Now copy the data into the new string, inserting escapes. */
3088
3089   in = SSDATA (string);
3090   end = in + SBYTES (string);
3091   out = temp;
3092
3093   for (; in != end; in++)
3094     {
3095       if (*in == '['
3096           || *in == '*' || *in == '.' || *in == '\\'
3097           || *in == '?' || *in == '+'
3098           || *in == '^' || *in == '$')
3099         *out++ = '\\', backslashes_added++;
3100       *out++ = *in;
3101     }
3102
3103   Lisp_Object result
3104     = make_specified_string (temp,
3105                              SCHARS (string) + backslashes_added,
3106                              out - temp,
3107                              STRING_MULTIBYTE (string));
3108   SAFE_FREE ();
3109   return result;
3110 }
3111
3112 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
3113 static ptrdiff_t
3114 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3115                ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
3116                ptrdiff_t *bytepos, bool allow_quit)
3117 {
3118   if (count > 0)
3119     {
3120       if (!end)
3121         end = ZV, end_byte = ZV_BYTE;
3122     }
3123   else
3124     {
3125       if (!end)
3126         end = BEGV, end_byte = BEGV_BYTE;
3127     }
3128   if (end_byte == -1)
3129     end_byte = CHAR_TO_BYTE (end);
3130
3131   if (shortage != 0)
3132     *shortage = 0;
3133
3134   immediate_quit = allow_quit;
3135
3136   if (count > 0)
3137     while (start != end)
3138       {
3139         /* Our innermost scanning loop is very simple; it doesn't know
3140            about gaps, buffer ends, or the newline cache.  ceiling is
3141            the position of the last character before the next such
3142            obstacle --- the last character the dumb search loop should
3143            examine.  */
3144         ptrdiff_t tem, ceiling_byte = end_byte - 1;
3145
3146         if (start_byte == -1)
3147           start_byte = CHAR_TO_BYTE (start);
3148
3149         /* The dumb loop can only scan text stored in contiguous
3150            bytes. BUFFER_CEILING_OF returns the last character
3151            position that is contiguous, so the ceiling is the
3152            position after that.  */
3153         tem = BUFFER_CEILING_OF (start_byte);
3154         ceiling_byte = min (tem, ceiling_byte);
3155
3156         {
3157           /* The termination address of the dumb loop.  */
3158           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3159           ptrdiff_t lim_byte = ceiling_byte + 1;
3160
3161           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3162              of the base, the cursor, and the next line.  */
3163           ptrdiff_t base = start_byte - lim_byte;
3164           ptrdiff_t cursor, next;
3165
3166           for (cursor = base; cursor < 0; cursor = next)
3167             {
3168               /* The dumb loop.  */
3169               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3170               next = nl ? nl - lim_addr : 0;
3171
3172               if (! nl)
3173                 break;
3174               next++;
3175
3176               if (--count == 0)
3177                 {
3178                   immediate_quit = 0;
3179                   if (bytepos)
3180                     *bytepos = lim_byte + next;
3181                   return BYTE_TO_CHAR (lim_byte + next);
3182                 }
3183             }
3184
3185           start_byte = lim_byte;
3186           start = BYTE_TO_CHAR (start_byte);
3187         }
3188       }
3189
3190   immediate_quit = 0;
3191   if (shortage)
3192     *shortage = count;
3193   if (bytepos)
3194     {
3195       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3196       eassert (*bytepos == CHAR_TO_BYTE (start));
3197     }
3198   return start;
3199 }
3200
3201 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3202        0, 1, 0,
3203        doc: /* Check the newline cache of BUFFER against buffer contents.
3204
3205 BUFFER defaults to the current buffer.
3206
3207 Value is an array of 2 sub-arrays of buffer positions for newlines,
3208 the first based on the cache, the second based on actually scanning
3209 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
3210   (Lisp_Object buffer)
3211 {
3212   struct buffer *buf, *old = NULL;
3213   ptrdiff_t shortage, nl_count_cache, nl_count_buf;
3214   Lisp_Object cache_newlines, buf_newlines, val;
3215   ptrdiff_t from, found, i;
3216
3217   if (NILP (buffer))
3218     buf = current_buffer;
3219   else
3220     {
3221       CHECK_BUFFER (buffer);
3222       buf = XBUFFER (buffer);
3223       old = current_buffer;
3224     }
3225   if (buf->base_buffer)
3226     buf = buf->base_buffer;
3227
3228   /* If the buffer doesn't have a newline cache, return nil.  */
3229   if (NILP (BVAR (buf, cache_long_scans))
3230       || buf->newline_cache == NULL)
3231     return Qnil;
3232
3233   /* find_newline can only work on the current buffer.  */
3234   if (old != NULL)
3235     set_buffer_internal_1 (buf);
3236
3237   /* How many newlines are there according to the cache?  */
3238   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3239                 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3240   nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3241
3242   /* Create vector and populate it.  */
3243   cache_newlines = make_uninit_vector (nl_count_cache);
3244
3245   if (nl_count_cache)
3246     {
3247       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3248         {
3249           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3250
3251           found = find_newline (from, from_byte, 0, -1, 1, &shortage,
3252                                 NULL, true);
3253           if (shortage != 0 || i >= nl_count_cache)
3254             break;
3255           ASET (cache_newlines, i, make_number (found - 1));
3256         }
3257       /* Fill the rest of slots with an invalid position.  */
3258       for ( ; i < nl_count_cache; i++)
3259         ASET (cache_newlines, i, make_number (-1));
3260     }
3261
3262   /* Now do the same, but without using the cache.  */
3263   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3264                  TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3265   nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3266   buf_newlines = make_uninit_vector (nl_count_buf);
3267   if (nl_count_buf)
3268     {
3269       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3270         {
3271           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3272
3273           found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
3274                                  NULL, true);
3275           if (shortage != 0 || i >= nl_count_buf)
3276             break;
3277           ASET (buf_newlines, i, make_number (found - 1));
3278         }
3279       for ( ; i < nl_count_buf; i++)
3280         ASET (buf_newlines, i, make_number (-1));
3281     }
3282
3283   /* Construct the value and return it.  */
3284   val = make_uninit_vector (2);
3285   ASET (val, 0, cache_newlines);
3286   ASET (val, 1, buf_newlines);
3287
3288   if (old != NULL)
3289     set_buffer_internal_1 (old);
3290   return val;
3291 }
3292 \f
3293 void
3294 syms_of_search (void)
3295 {
3296   register int i;
3297
3298   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3299     {
3300       searchbufs[i].buf.allocated = 100;
3301       searchbufs[i].buf.buffer = xmalloc (100);
3302       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3303       searchbufs[i].regexp = Qnil;
3304       searchbufs[i].whitespace_regexp = Qnil;
3305       searchbufs[i].syntax_table = Qnil;
3306       staticpro (&searchbufs[i].regexp);
3307       staticpro (&searchbufs[i].whitespace_regexp);
3308       staticpro (&searchbufs[i].syntax_table);
3309       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3310     }
3311   searchbuf_head = &searchbufs[0];
3312
3313   /* Error condition used for failing searches.  */
3314   DEFSYM (Qsearch_failed, "search-failed");
3315
3316   /* Error condition signaled when regexp compile_pattern fails.  */
3317   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3318
3319   Fput (Qsearch_failed, Qerror_conditions,
3320         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3321   Fput (Qsearch_failed, Qerror_message,
3322         build_pure_c_string ("Search failed"));
3323
3324   Fput (Qinvalid_regexp, Qerror_conditions,
3325         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3326   Fput (Qinvalid_regexp, Qerror_message,
3327         build_pure_c_string ("Invalid regexp"));
3328
3329   last_thing_searched = Qnil;
3330   staticpro (&last_thing_searched);
3331
3332   saved_last_thing_searched = Qnil;
3333   staticpro (&saved_last_thing_searched);
3334
3335   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3336       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3337 Some commands use this for user-specified regexps.
3338 Spaces that occur inside character classes or repetition operators
3339 or other such regexp constructs are not replaced with this.
3340 A value of nil (which is the normal value) means treat spaces literally.  */);
3341   Vsearch_spaces_regexp = Qnil;
3342
3343   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3344       doc: /* Internal use only.
3345 If non-nil, the primitive searching and matching functions
3346 such as `looking-at', `string-match', `re-search-forward', etc.,
3347 do not set the match data.  The proper way to use this variable
3348 is to bind it with `let' around a small expression.  */);
3349   Vinhibit_changing_match_data = Qnil;
3350
3351   defsubr (&Slooking_at);
3352   defsubr (&Sposix_looking_at);
3353   defsubr (&Sstring_match);
3354   defsubr (&Sposix_string_match);
3355   defsubr (&Ssearch_forward);
3356   defsubr (&Ssearch_backward);
3357   defsubr (&Sre_search_forward);
3358   defsubr (&Sre_search_backward);
3359   defsubr (&Sposix_search_forward);
3360   defsubr (&Sposix_search_backward);
3361   defsubr (&Sreplace_match);
3362   defsubr (&Smatch_beginning);
3363   defsubr (&Smatch_end);
3364   defsubr (&Smatch_data);
3365   defsubr (&Sset_match_data);
3366   defsubr (&Sregexp_quote);
3367   defsubr (&Snewline_cache_check);
3368 }