src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2016 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or (at
  11 your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "character.h"
  26 #include "buffer.h"
  27 #include "syntax.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp, whitespace_regexp;
  44   /* Syntax table for which the regexp applies.  We need this because
  45      of character classes.  If this is t, then the compiled pattern is valid
  46      for any syntax-table.  */
  47   Lisp_Object syntax_table;
  48   struct re_pattern_buffer buf;
  49   char fastmap[0400];
  50   /* True means regexp was compiled to do full POSIX backtracking.  */
  51   bool posix;
  52 };
  53
  54 /* The instances of that struct.  */
  55 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  56
  57 /* The head of the linked list; points to the most recently used buffer.  */
  58 static struct regexp_cache *searchbuf_head;
  59
  60
  61 /* Every call to re_match, etc., must pass &search_regs as the regs
  62    argument unless you can show it is unnecessary (i.e., if re_match
  63    is certainly going to be called again before region-around-match
  64    can be called).
  65
  66    Since the registers are now dynamically allocated, we need to make
  67    sure not to refer to the Nth register before checking that it has
  68    been allocated by checking search_regs.num_regs.
  69
  70    The regex code keeps track of whether it has allocated the search
  71    buffer using bits in the re_pattern_buffer.  This means that whenever
  72    you compile a new pattern, it completely forgets whether it has
  73    allocated any registers, and will allocate new registers the next
  74    time you call a searching or matching function.  Therefore, we need
  75    to call re_set_registers after compiling a new pattern or after
  76    setting the match registers, so that the regex functions will be
  77    able to free or re-allocate it properly.  */
  78 static struct re_registers search_regs;
  79
  80 /* The buffer in which the last search was performed, or
  81    Qt if the last search was done in a string;
  82    Qnil if no searching has been done yet.  */
  83 static Lisp_Object last_thing_searched;
  84
  85 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  86 static void save_search_regs (void);
  87 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  88                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  89                                 ptrdiff_t, ptrdiff_t);
  90 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  91                               Lisp_Object, Lisp_Object, ptrdiff_t,
  92                               ptrdiff_t, int);
  93 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
  94                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
  95                                 Lisp_Object, Lisp_Object, bool);
  96
  97 static _Noreturn void
  98 matcher_overflow (void)
  99 {
 100   error ("Stack overflow in regexp matcher");
 101 }
 102
 103 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 104    PATTERN is the pattern to compile.
 105    CP is the place to put the result.
 106    TRANSLATE is a translation table for ignoring case, or nil for none.
 107    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 108    False means backtrack only enough to get a valid match.
 109
 110    The behavior also depends on Vsearch_spaces_regexp.  */
 111
 112 static void
 113 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 114                    Lisp_Object translate, bool posix)
 115 {
 116   const char *whitespace_regexp;
 117   char *val;
 118
 119   cp->regexp = Qnil;
 120   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 121   cp->posix = posix;
 122   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 123   cp->buf.charset_unibyte = charset_unibyte;
 124   if (STRINGP (Vsearch_spaces_regexp))
 125     cp->whitespace_regexp = Vsearch_spaces_regexp;
 126   else
 127     cp->whitespace_regexp = Qnil;
 128
 129   /* rms: I think BLOCK_INPUT is not needed here any more,
 130      because regex.c defines malloc to call xmalloc.
 131      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 132      So let's turn it off.  */
 133   /*  BLOCK_INPUT;  */
 134
 135   whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
 136     SSDATA (Vsearch_spaces_regexp) : NULL;
 137
 138   val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
 139                                      posix, whitespace_regexp, &cp->buf);
 140
 141   /* If the compiled pattern hard codes some of the contents of the
 142      syntax-table, it can only be reused with *this* syntax table.  */
 143   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 144
 145   /* unblock_input ();  */
 146   if (val)
 147     xsignal1 (Qinvalid_regexp, build_string (val));
 148
 149   cp->regexp = Fcopy_sequence (pattern);
 150 }
 151
 152 /* Shrink each compiled regexp buffer in the cache
 153    to the size actually used right now.
 154    This is called from garbage collection.  */
 155
 156 void
 157 shrink_regexp_cache (void)
 158 {
 159   struct regexp_cache *cp;
 160
 161   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 162     {
 163       cp->buf.allocated = cp->buf.used;
 164       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 165     }
 166 }
 167
 168 /* Clear the regexp cache w.r.t. a particular syntax table,
 169    because it was changed.
 170    There is no danger of memory leak here because re_compile_pattern
 171    automagically manages the memory in each re_pattern_buffer struct,
 172    based on its `allocated' and `buffer' values.  */
 173 void
 174 clear_regexp_cache (void)
 175 {
 176   int i;
 177
 178   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 179     /* It's tempting to compare with the syntax-table we've actually changed,
 180        but it's not sufficient because char-table inheritance means that
 181        modifying one syntax-table can change others at the same time.  */
 182     if (!EQ (searchbufs[i].syntax_table, Qt))
 183       searchbufs[i].regexp = Qnil;
 184 }
 185
 186 /* Compile a regexp if necessary, but first check to see if there's one in
 187    the cache.
 188    PATTERN is the pattern to compile.
 189    TRANSLATE is a translation table for ignoring case, or nil for none.
 190    REGP is the structure that says where to store the "register"
 191    values that will result from matching this pattern.
 192    If it is 0, we should compile the pattern not to record any
 193    subexpression bounds.
 194    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 195    False means backtrack only enough to get a valid match.  */
 196
 197 struct re_pattern_buffer *
 198 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 199                  Lisp_Object translate, bool posix, bool multibyte)
 200 {
 201   struct regexp_cache *cp, **cpp;
 202
 203   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 204     {
 205       cp = *cpp;
 206       /* Entries are initialized to nil, and may be set to nil by
 207          compile_pattern_1 if the pattern isn't valid.  Don't apply
 208          string accessors in those cases.  However, compile_pattern_1
 209          is only applied to the cache entry we pick here to reuse.  So
 210          nil should never appear before a non-nil entry.  */
 211       if (NILP (cp->regexp))
 212         goto compile_it;
 213       if (SCHARS (cp->regexp) == SCHARS (pattern)
 214           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 215           && !NILP (Fstring_equal (cp->regexp, pattern))
 216           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 217           && cp->posix == posix
 218           && (EQ (cp->syntax_table, Qt)
 219               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 220           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 221           && cp->buf.charset_unibyte == charset_unibyte)
 222         break;
 223
 224       /* If we're at the end of the cache, compile into the nil cell
 225          we found, or the last (least recently used) cell with a
 226          string value.  */
 227       if (cp->next == 0)
 228         {
 229         compile_it:
 230           compile_pattern_1 (cp, pattern, translate, posix);
 231           break;
 232         }
 233     }
 234
 235   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 236      either because we found it in the cache or because we just compiled it.
 237      Move it to the front of the queue to mark it as most recently used.  */
 238   *cpp = cp->next;
 239   cp->next = searchbuf_head;
 240   searchbuf_head = cp;
 241
 242   /* Advise the searching functions about the space we have allocated
 243      for register data.  */
 244   if (regp)
 245     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 246
 247   /* The compiled pattern can be used both for multibyte and unibyte
 248      target.  But, we have to tell which the pattern is used for. */
 249   cp->buf.target_multibyte = multibyte;
 250
 251   return &cp->buf;
 252 }
 253
 254 \f
 255 static Lisp_Object
 256 looking_at_1 (Lisp_Object string, bool posix)
 257 {
 258   Lisp_Object val;
 259   unsigned char *p1, *p2;
 260   ptrdiff_t s1, s2;
 261   register ptrdiff_t i;
 262   struct re_pattern_buffer *bufp;
 263
 264   if (running_asynch_code)
 265     save_search_regs ();
 266
 267   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 268   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 269                          BVAR (current_buffer, case_eqv_table));
 270
 271   CHECK_STRING (string);
 272   bufp = compile_pattern (string,
 273                           (NILP (Vinhibit_changing_match_data)
 274                            ? &search_regs : NULL),
 275                           (!NILP (BVAR (current_buffer, case_fold_search))
 276                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 277                           posix,
 278                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 279
 280   immediate_quit = 1;
 281   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 282
 283   /* Get pointers and sizes of the two strings
 284      that make up the visible portion of the buffer. */
 285
 286   p1 = BEGV_ADDR;
 287   s1 = GPT_BYTE - BEGV_BYTE;
 288   p2 = GAP_END_ADDR;
 289   s2 = ZV_BYTE - GPT_BYTE;
 290   if (s1 < 0)
 291     {
 292       p2 = p1;
 293       s2 = ZV_BYTE - BEGV_BYTE;
 294       s1 = 0;
 295     }
 296   if (s2 < 0)
 297     {
 298       s1 = ZV_BYTE - BEGV_BYTE;
 299       s2 = 0;
 300     }
 301
 302   re_match_object = Qnil;
 303
 304   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 305                   PT_BYTE - BEGV_BYTE,
 306                   (NILP (Vinhibit_changing_match_data)
 307                    ? &search_regs : NULL),
 308                   ZV_BYTE - BEGV_BYTE);
 309   immediate_quit = 0;
 310
 311   if (i == -2)
 312     matcher_overflow ();
 313
 314   val = (i >= 0 ? Qt : Qnil);
 315   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 316   {
 317     for (i = 0; i < search_regs.num_regs; i++)
 318       if (search_regs.start[i] >= 0)
 319         {
 320           search_regs.start[i]
 321             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 322          search_regs.end[i]
 323            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 324        }
 325     /* Set last_thing_searched only when match data is changed.  */
 326     XSETBUFFER (last_thing_searched, current_buffer);
 327   }
 328
 329   return val;
 330 }
 331
 332 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 333        doc: /* Return t if text after point matches regular expression REGEXP.
 334 This function modifies the match data that `match-beginning',
 335 `match-end' and `match-data' access; save and restore the match
 336 data if you want to preserve them.  */)
 337   (Lisp_Object regexp)
 338 {
 339   return looking_at_1 (regexp, 0);
 340 }
 341
 342 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 343        doc: /* Return t if text after point matches regular expression REGEXP.
 344 Find the longest match, in accord with Posix regular expression rules.
 345 This function modifies the match data that `match-beginning',
 346 `match-end' and `match-data' access; save and restore the match
 347 data if you want to preserve them.  */)
 348   (Lisp_Object regexp)
 349 {
 350   return looking_at_1 (regexp, 1);
 351 }
 352 \f
 353 static Lisp_Object
 354 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 355                 bool posix)
 356 {
 357   ptrdiff_t val;
 358   struct re_pattern_buffer *bufp;
 359   EMACS_INT pos;
 360   ptrdiff_t pos_byte, i;
 361
 362   if (running_asynch_code)
 363     save_search_regs ();
 364
 365   CHECK_STRING (regexp);
 366   CHECK_STRING (string);
 367
 368   if (NILP (start))
 369     pos = 0, pos_byte = 0;
 370   else
 371     {
 372       ptrdiff_t len = SCHARS (string);
 373
 374       CHECK_NUMBER (start);
 375       pos = XINT (start);
 376       if (pos < 0 && -pos <= len)
 377         pos = len + pos;
 378       else if (0 > pos || pos > len)
 379         args_out_of_range (string, start);
 380       pos_byte = string_char_to_byte (string, pos);
 381     }
 382
 383   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 384   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 385                          BVAR (current_buffer, case_eqv_table));
 386
 387   bufp = compile_pattern (regexp,
 388                           (NILP (Vinhibit_changing_match_data)
 389                            ? &search_regs : NULL),
 390                           (!NILP (BVAR (current_buffer, case_fold_search))
 391                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 392                           posix,
 393                           STRING_MULTIBYTE (string));
 394   immediate_quit = 1;
 395   re_match_object = string;
 396
 397   val = re_search (bufp, SSDATA (string),
 398                    SBYTES (string), pos_byte,
 399                    SBYTES (string) - pos_byte,
 400                    (NILP (Vinhibit_changing_match_data)
 401                     ? &search_regs : NULL));
 402   immediate_quit = 0;
 403
 404   /* Set last_thing_searched only when match data is changed.  */
 405   if (NILP (Vinhibit_changing_match_data))
 406     last_thing_searched = Qt;
 407
 408   if (val == -2)
 409     matcher_overflow ();
 410   if (val < 0) return Qnil;
 411
 412   if (NILP (Vinhibit_changing_match_data))
 413     for (i = 0; i < search_regs.num_regs; i++)
 414       if (search_regs.start[i] >= 0)
 415         {
 416           search_regs.start[i]
 417             = string_byte_to_char (string, search_regs.start[i]);
 418           search_regs.end[i]
 419             = string_byte_to_char (string, search_regs.end[i]);
 420         }
 421
 422   return make_number (string_byte_to_char (string, val));
 423 }
 424
 425 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 426        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 427 Matching ignores case if `case-fold-search' is non-nil.
 428 If third arg START is non-nil, start search at that index in STRING.
 429 For index of first char beyond the match, do (match-end 0).
 430 `match-end' and `match-beginning' also give indices of substrings
 431 matched by parenthesis constructs in the pattern.
 432
 433 You can use the function `match-string' to extract the substrings
 434 matched by the parenthesis constructions in REGEXP. */)
 435   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 436 {
 437   return string_match_1 (regexp, string, start, 0);
 438 }
 439
 440 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 441        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 442 Find the longest match, in accord with Posix regular expression rules.
 443 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 444 If third arg START is non-nil, start search at that index in STRING.
 445 For index of first char beyond the match, do (match-end 0).
 446 `match-end' and `match-beginning' also give indices of substrings
 447 matched by parenthesis constructs in the pattern.  */)
 448   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 449 {
 450   return string_match_1 (regexp, string, start, 1);
 451 }
 452
 453 /* Match REGEXP against STRING using translation table TABLE,
 454    searching all of STRING, and return the index of the match,
 455    or negative on failure.  This does not clobber the match data.  */
 456
 457 ptrdiff_t
 458 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
 459                             Lisp_Object table)
 460 {
 461   ptrdiff_t val;
 462   struct re_pattern_buffer *bufp;
 463
 464   bufp = compile_pattern (regexp, 0, table,
 465                           0, STRING_MULTIBYTE (string));
 466   immediate_quit = 1;
 467   re_match_object = string;
 468
 469   val = re_search (bufp, SSDATA (string),
 470                    SBYTES (string), 0,
 471                    SBYTES (string), 0);
 472   immediate_quit = 0;
 473   return val;
 474 }
 475
 476 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 477    and return the index of the match, or negative on failure.
 478    This does not clobber the match data.
 479    We assume that STRING contains single-byte characters.  */
 480
 481 ptrdiff_t
 482 fast_c_string_match_ignore_case (Lisp_Object regexp,
 483                                  const char *string, ptrdiff_t len)
 484 {
 485   ptrdiff_t val;
 486   struct re_pattern_buffer *bufp;
 487
 488   regexp = string_make_unibyte (regexp);
 489   re_match_object = Qt;
 490   bufp = compile_pattern (regexp, 0,
 491                           Vascii_canon_table, 0,
 492                           0);
 493   immediate_quit = 1;
 494   val = re_search (bufp, string, len, 0, len, 0);
 495   immediate_quit = 0;
 496   return val;
 497 }
 498
 499 /* Match REGEXP against the characters after POS to LIMIT, and return
 500    the number of matched characters.  If STRING is non-nil, match
 501    against the characters in it.  In that case, POS and LIMIT are
 502    indices into the string.  This function doesn't modify the match
 503    data.  */
 504
 505 ptrdiff_t
 506 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
 507                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 508 {
 509   bool multibyte;
 510   struct re_pattern_buffer *buf;
 511   unsigned char *p1, *p2;
 512   ptrdiff_t s1, s2;
 513   ptrdiff_t len;
 514
 515   if (STRINGP (string))
 516     {
 517       if (pos_byte < 0)
 518         pos_byte = string_char_to_byte (string, pos);
 519       if (limit_byte < 0)
 520         limit_byte = string_char_to_byte (string, limit);
 521       p1 = NULL;
 522       s1 = 0;
 523       p2 = SDATA (string);
 524       s2 = SBYTES (string);
 525       re_match_object = string;
 526       multibyte = STRING_MULTIBYTE (string);
 527     }
 528   else
 529     {
 530       if (pos_byte < 0)
 531         pos_byte = CHAR_TO_BYTE (pos);
 532       if (limit_byte < 0)
 533         limit_byte = CHAR_TO_BYTE (limit);
 534       pos_byte -= BEGV_BYTE;
 535       limit_byte -= BEGV_BYTE;
 536       p1 = BEGV_ADDR;
 537       s1 = GPT_BYTE - BEGV_BYTE;
 538       p2 = GAP_END_ADDR;
 539       s2 = ZV_BYTE - GPT_BYTE;
 540       if (s1 < 0)
 541         {
 542           p2 = p1;
 543           s2 = ZV_BYTE - BEGV_BYTE;
 544           s1 = 0;
 545         }
 546       if (s2 < 0)
 547         {
 548           s1 = ZV_BYTE - BEGV_BYTE;
 549           s2 = 0;
 550         }
 551       re_match_object = Qnil;
 552       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 553     }
 554
 555   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 556   immediate_quit = 1;
 557   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 558                     pos_byte, NULL, limit_byte);
 559   immediate_quit = 0;
 560
 561   return len;
 562 }
 563
 564 \f
 565 /* The newline cache: remembering which sections of text have no newlines.  */
 566
 567 /* If the user has requested the long scans caching, make sure it's on.
 568    Otherwise, make sure it's off.
 569    This is our cheezy way of associating an action with the change of
 570    state of a buffer-local variable.  */
 571 static struct region_cache *
 572 newline_cache_on_off (struct buffer *buf)
 573 {
 574   struct buffer *base_buf = buf;
 575   bool indirect_p = false;
 576
 577   if (buf->base_buffer)
 578     {
 579       base_buf = buf->base_buffer;
 580       indirect_p = true;
 581     }
 582
 583   /* Don't turn on or off the cache in the base buffer, if the value
 584      of cache-long-scans of the base buffer is inconsistent with that.
 585      This is because doing so will just make the cache pure overhead,
 586      since if we turn it on via indirect buffer, it will be
 587      immediately turned off by its base buffer.  */
 588   if (NILP (BVAR (buf, cache_long_scans)))
 589     {
 590       if (!indirect_p
 591           || NILP (BVAR (base_buf, cache_long_scans)))
 592         {
 593           /* It should be off.  */
 594           if (base_buf->newline_cache)
 595             {
 596               free_region_cache (base_buf->newline_cache);
 597               base_buf->newline_cache = 0;
 598             }
 599         }
 600       return NULL;
 601     }
 602   else
 603     {
 604       if (!indirect_p
 605           || !NILP (BVAR (base_buf, cache_long_scans)))
 606         {
 607           /* It should be on.  */
 608           if (base_buf->newline_cache == 0)
 609             base_buf->newline_cache = new_region_cache ();
 610         }
 611       return base_buf->newline_cache;
 612     }
 613 }
 614
 615 \f
 616 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
 617
 618    If COUNT is positive, search forwards; END must be >= START.
 619    If COUNT is negative, search backwards for the -COUNTth instance;
 620       END must be <= START.
 621    If COUNT is zero, do anything you please; run rogue, for all I care.
 622
 623    If END is zero, use BEGV or ZV instead, as appropriate for the
 624    direction indicated by COUNT.
 625
 626    If we find COUNT instances, set *SHORTAGE to zero, and return the
 627    position past the COUNTth match.  Note that for reverse motion
 628    this is not the same as the usual convention for Emacs motion commands.
 629
 630    If we don't find COUNT instances before reaching END, set *SHORTAGE
 631    to the number of newlines left unfound, and return END.
 632
 633    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
 634    to the returned character position.
 635
 636    If ALLOW_QUIT, set immediate_quit.  That's good to do
 637    except when inside redisplay.  */
 638
 639 ptrdiff_t
 640 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 641               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
 642               ptrdiff_t *bytepos, bool allow_quit)
 643 {
 644   struct region_cache *newline_cache;
 645   int direction;
 646   struct buffer *cache_buffer;
 647
 648   if (count > 0)
 649     {
 650       direction = 1;
 651       if (!end)
 652         end = ZV, end_byte = ZV_BYTE;
 653     }
 654   else
 655     {
 656       direction = -1;
 657       if (!end)
 658         end = BEGV, end_byte = BEGV_BYTE;
 659     }
 660   if (end_byte == -1)
 661     end_byte = CHAR_TO_BYTE (end);
 662
 663   newline_cache = newline_cache_on_off (current_buffer);
 664   if (current_buffer->base_buffer)
 665     cache_buffer = current_buffer->base_buffer;
 666   else
 667     cache_buffer = current_buffer;
 668
 669   if (shortage != 0)
 670     *shortage = 0;
 671
 672   immediate_quit = allow_quit;
 673
 674   if (count > 0)
 675     while (start != end)
 676       {
 677         /* Our innermost scanning loop is very simple; it doesn't know
 678            about gaps, buffer ends, or the newline cache.  ceiling is
 679            the position of the last character before the next such
 680            obstacle --- the last character the dumb search loop should
 681            examine.  */
 682         ptrdiff_t tem, ceiling_byte = end_byte - 1;
 683
 684         /* If we're using the newline cache, consult it to see whether
 685            we can avoid some scanning.  */
 686         if (newline_cache)
 687           {
 688             ptrdiff_t next_change;
 689             int result = 1;
 690
 691             immediate_quit = 0;
 692             while (start < end && result)
 693               {
 694                 ptrdiff_t lim1;
 695
 696                 result = region_cache_forward (cache_buffer, newline_cache,
 697                                                start, &next_change);
 698                 if (result)
 699                   {
 700                     /* When the cache revalidation is deferred,
 701                        next-change might point beyond ZV, which will
 702                        cause assertion violation in CHAR_TO_BYTE below.
 703                        Limit next_change to ZV to avoid that.  */
 704                     if (next_change > ZV)
 705                       next_change = ZV;
 706                     start = next_change;
 707                     lim1 = next_change = end;
 708                   }
 709                 else
 710                   lim1 = min (next_change, end);
 711
 712                 /* The cache returned zero for this region; see if
 713                    this is because the region is known and includes
 714                    only newlines.  While at that, count any newlines
 715                    we bump into, and exit if we found enough off them.  */
 716                 start_byte = CHAR_TO_BYTE (start);
 717                 while (start < lim1
 718                        && FETCH_BYTE (start_byte) == '\n')
 719                   {
 720                     start_byte++;
 721                     start++;
 722                     if (--count == 0)
 723                       {
 724                         if (bytepos)
 725                           *bytepos = start_byte;
 726                         return start;
 727                       }
 728                   }
 729                 /* If we found a non-newline character before hitting
 730                    position where the cache will again return non-zero
 731                    (i.e. no newlines beyond that position), it means
 732                    this region is not yet known to the cache, and we
 733                    must resort to the "dumb loop" method.  */
 734                 if (start < next_change && !result)
 735                   break;
 736                 result = 1;
 737               }
 738             if (start >= end)
 739               {
 740                 start = end;
 741                 start_byte = end_byte;
 742                 break;
 743               }
 744             immediate_quit = allow_quit;
 745
 746             /* START should never be after END.  */
 747             if (start_byte > ceiling_byte)
 748               start_byte = ceiling_byte;
 749
 750             /* Now the text after start is an unknown region, and
 751                next_change is the position of the next known region. */
 752             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 753           }
 754         else if (start_byte == -1)
 755           start_byte = CHAR_TO_BYTE (start);
 756
 757         /* The dumb loop can only scan text stored in contiguous
 758            bytes. BUFFER_CEILING_OF returns the last character
 759            position that is contiguous, so the ceiling is the
 760            position after that.  */
 761         tem = BUFFER_CEILING_OF (start_byte);
 762         ceiling_byte = min (tem, ceiling_byte);
 763
 764         {
 765           /* The termination address of the dumb loop.  */
 766           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 767           ptrdiff_t lim_byte = ceiling_byte + 1;
 768
 769           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
 770              of the base, the cursor, and the next line.  */
 771           ptrdiff_t base = start_byte - lim_byte;
 772           ptrdiff_t cursor, next;
 773
 774           for (cursor = base; cursor < 0; cursor = next)
 775             {
 776               /* The dumb loop.  */
 777               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
 778               next = nl ? nl - lim_addr : 0;
 779
 780               /* If we're using the newline cache, cache the fact that
 781                  the region we just traversed is free of newlines. */
 782               if (newline_cache && cursor != next)
 783                 {
 784                   know_region_cache (cache_buffer, newline_cache,
 785                                      BYTE_TO_CHAR (lim_byte + cursor),
 786                                      BYTE_TO_CHAR (lim_byte + next));
 787                   /* know_region_cache can relocate buffer text.  */
 788                   lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 789                 }
 790
 791               if (! nl)
 792                 break;
 793               next++;
 794
 795               if (--count == 0)
 796                 {
 797                   immediate_quit = 0;
 798                   if (bytepos)
 799                     *bytepos = lim_byte + next;
 800                   return BYTE_TO_CHAR (lim_byte + next);
 801                 }
 802             }
 803
 804           start_byte = lim_byte;
 805           start = BYTE_TO_CHAR (start_byte);
 806         }
 807       }
 808   else
 809     while (start > end)
 810       {
 811         /* The last character to check before the next obstacle.  */
 812         ptrdiff_t tem, ceiling_byte = end_byte;
 813
 814         /* Consult the newline cache, if appropriate.  */
 815         if (newline_cache)
 816           {
 817             ptrdiff_t next_change;
 818             int result = 1;
 819
 820             immediate_quit = 0;
 821             while (start > end && result)
 822               {
 823                 ptrdiff_t lim1;
 824
 825                 result = region_cache_backward (cache_buffer, newline_cache,
 826                                                 start, &next_change);
 827                 if (result)
 828                   {
 829                     start = next_change;
 830                     lim1 = next_change = end;
 831                   }
 832                 else
 833                   lim1 = max (next_change, end);
 834                 start_byte = CHAR_TO_BYTE (start);
 835                 while (start > lim1
 836                        && FETCH_BYTE (start_byte - 1) == '\n')
 837                   {
 838                     if (++count == 0)
 839                       {
 840                         if (bytepos)
 841                           *bytepos = start_byte;
 842                         return start;
 843                       }
 844                     start_byte--;
 845                     start--;
 846                   }
 847                 if (start > next_change && !result)
 848                   break;
 849                 result = 1;
 850               }
 851             if (start <= end)
 852               {
 853                 start = end;
 854                 start_byte = end_byte;
 855                 break;
 856               }
 857             immediate_quit = allow_quit;
 858
 859             /* Start should never be at or before end.  */
 860             if (start_byte <= ceiling_byte)
 861               start_byte = ceiling_byte + 1;
 862
 863             /* Now the text before start is an unknown region, and
 864                next_change is the position of the next known region. */
 865             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 866           }
 867         else if (start_byte == -1)
 868           start_byte = CHAR_TO_BYTE (start);
 869
 870         /* Stop scanning before the gap.  */
 871         tem = BUFFER_FLOOR_OF (start_byte - 1);
 872         ceiling_byte = max (tem, ceiling_byte);
 873
 874         {
 875           /* The termination address of the dumb loop.  */
 876           unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 877
 878           /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
 879              the base, the cursor, and the previous line.  These
 880              offsets are at least -1.  */
 881           ptrdiff_t base = start_byte - ceiling_byte;
 882           ptrdiff_t cursor, prev;
 883
 884           for (cursor = base; 0 < cursor; cursor = prev)
 885             {
 886               unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
 887               prev = nl ? nl - ceiling_addr : -1;
 888
 889               /* If we're looking for newlines, cache the fact that
 890                  this line's region is free of them. */
 891               if (newline_cache && cursor != prev + 1)
 892                 {
 893                   know_region_cache (cache_buffer, newline_cache,
 894                                      BYTE_TO_CHAR (ceiling_byte + prev + 1),
 895                                      BYTE_TO_CHAR (ceiling_byte + cursor));
 896                   /* know_region_cache can relocate buffer text.  */
 897                   ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 898                 }
 899
 900               if (! nl)
 901                 break;
 902
 903               if (++count >= 0)
 904                 {
 905                   immediate_quit = 0;
 906                   if (bytepos)
 907                     *bytepos = ceiling_byte + prev + 1;
 908                   return BYTE_TO_CHAR (ceiling_byte + prev + 1);
 909                 }
 910             }
 911
 912           start_byte = ceiling_byte;
 913           start = BYTE_TO_CHAR (start_byte);
 914         }
 915       }
 916
 917   immediate_quit = 0;
 918   if (shortage)
 919     *shortage = count * direction;
 920   if (bytepos)
 921     {
 922       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
 923       eassert (*bytepos == CHAR_TO_BYTE (start));
 924     }
 925   return start;
 926 }
 927 \f
 928 /* Search for COUNT instances of a line boundary.
 929    Start at START.  If COUNT is negative, search backwards.
 930
 931    We report the resulting position by calling TEMP_SET_PT_BOTH.
 932
 933    If we find COUNT instances. we position after (always after,
 934    even if scanning backwards) the COUNTth match, and return 0.
 935
 936    If we don't find COUNT instances before reaching the end of the
 937    buffer (or the beginning, if scanning backwards), we return
 938    the number of line boundaries left unfound, and position at
 939    the limit we bumped up against.
 940
 941    If ALLOW_QUIT, set immediate_quit.  That's good to do
 942    except in special cases.  */
 943
 944 ptrdiff_t
 945 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 946               ptrdiff_t limit, ptrdiff_t limit_byte,
 947               ptrdiff_t count, bool allow_quit)
 948 {
 949   ptrdiff_t charpos, bytepos, shortage;
 950
 951   charpos = find_newline (start, start_byte, limit, limit_byte,
 952                           count, &shortage, &bytepos, allow_quit);
 953   if (shortage)
 954     TEMP_SET_PT_BOTH (limit, limit_byte);
 955   else
 956     TEMP_SET_PT_BOTH (charpos, bytepos);
 957   return shortage;
 958 }
 959
 960 /* Like above, but always scan from point and report the
 961    resulting position in *CHARPOS and *BYTEPOS.  */
 962
 963 ptrdiff_t
 964 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
 965                          ptrdiff_t *bytepos)
 966 {
 967   ptrdiff_t shortage;
 968
 969   if (count <= 0)
 970     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
 971                              &shortage, bytepos, 1);
 972   else
 973     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
 974                              &shortage, bytepos, 1);
 975   return shortage;
 976 }
 977
 978 /* Like find_newline, but doesn't allow QUITting and doesn't return
 979    SHORTAGE.  */
 980 ptrdiff_t
 981 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
 982                       ptrdiff_t cnt, ptrdiff_t *bytepos)
 983 {
 984   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
 985 }
 986
 987 /* Like find_newline, but returns position before the newline, not
 988    after, and only search up to TO.
 989    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
 990
 991 ptrdiff_t
 992 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
 993                           ptrdiff_t cnt, ptrdiff_t *bytepos)
 994 {
 995   ptrdiff_t shortage;
 996   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
 997
 998   if (shortage == 0)
 999     {
1000       if (bytepos)
1001         DEC_BOTH (pos, *bytepos);
1002       else
1003         pos--;
1004     }
1005   return pos;
1006 }
1007 \f
1008 /* Subroutines of Lisp buffer search functions. */
1009
1010 static Lisp_Object
1011 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1012                 Lisp_Object count, int direction, int RE, bool posix)
1013 {
1014   EMACS_INT np;
1015   EMACS_INT lim;
1016   ptrdiff_t lim_byte;
1017   EMACS_INT n = direction;
1018
1019   if (!NILP (count))
1020     {
1021       CHECK_NUMBER (count);
1022       n *= XINT (count);
1023     }
1024
1025   CHECK_STRING (string);
1026   if (NILP (bound))
1027     {
1028       if (n > 0)
1029         lim = ZV, lim_byte = ZV_BYTE;
1030       else
1031         lim = BEGV, lim_byte = BEGV_BYTE;
1032     }
1033   else
1034     {
1035       CHECK_NUMBER_COERCE_MARKER (bound);
1036       lim = XINT (bound);
1037       if (n > 0 ? lim < PT : lim > PT)
1038         error ("Invalid search bound (wrong side of point)");
1039       if (lim > ZV)
1040         lim = ZV, lim_byte = ZV_BYTE;
1041       else if (lim < BEGV)
1042         lim = BEGV, lim_byte = BEGV_BYTE;
1043       else
1044         lim_byte = CHAR_TO_BYTE (lim);
1045     }
1046
1047   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1048   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1049                          BVAR (current_buffer, case_eqv_table));
1050
1051   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1052                       (!NILP (BVAR (current_buffer, case_fold_search))
1053                        ? BVAR (current_buffer, case_canon_table)
1054                        : Qnil),
1055                       (!NILP (BVAR (current_buffer, case_fold_search))
1056                        ? BVAR (current_buffer, case_eqv_table)
1057                        : Qnil),
1058                       posix);
1059   if (np <= 0)
1060     {
1061       if (NILP (noerror))
1062         xsignal1 (Qsearch_failed, string);
1063
1064       if (!EQ (noerror, Qt))
1065         {
1066           eassert (BEGV <= lim && lim <= ZV);
1067           SET_PT_BOTH (lim, lim_byte);
1068           return Qnil;
1069 #if 0 /* This would be clean, but maybe programs depend on
1070          a value of nil here.  */
1071           np = lim;
1072 #endif
1073         }
1074       else
1075         return Qnil;
1076     }
1077
1078   eassert (BEGV <= np && np <= ZV);
1079   SET_PT (np);
1080
1081   return make_number (np);
1082 }
1083 \f
1084 /* Return true if REGEXP it matches just one constant string.  */
1085
1086 static bool
1087 trivial_regexp_p (Lisp_Object regexp)
1088 {
1089   ptrdiff_t len = SBYTES (regexp);
1090   unsigned char *s = SDATA (regexp);
1091   while (--len >= 0)
1092     {
1093       switch (*s++)
1094         {
1095         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1096           return 0;
1097         case '\\':
1098           if (--len < 0)
1099             return 0;
1100           switch (*s++)
1101             {
1102             case '|': case '(': case ')': case '`': case '\'': case 'b':
1103             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1104             case 'S': case '=': case '{': case '}': case '_':
1105             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1106             case '1': case '2': case '3': case '4': case '5':
1107             case '6': case '7': case '8': case '9':
1108               return 0;
1109             }
1110         }
1111     }
1112   return 1;
1113 }
1114
1115 /* Search for the n'th occurrence of STRING in the current buffer,
1116    starting at position POS and stopping at position LIM,
1117    treating STRING as a literal string if RE is false or as
1118    a regular expression if RE is true.
1119
1120    If N is positive, searching is forward and LIM must be greater than POS.
1121    If N is negative, searching is backward and LIM must be less than POS.
1122
1123    Returns -x if x occurrences remain to be found (x > 0),
1124    or else the position at the beginning of the Nth occurrence
1125    (if searching backward) or the end (if searching forward).
1126
1127    POSIX is nonzero if we want full backtracking (POSIX style)
1128    for this pattern.  0 means backtrack only enough to get a valid match.  */
1129
1130 #define TRANSLATE(out, trt, d)                  \
1131 do                                              \
1132   {                                             \
1133     if (! NILP (trt))                           \
1134       {                                         \
1135         Lisp_Object temp;                       \
1136         temp = Faref (trt, make_number (d));    \
1137         if (INTEGERP (temp))                    \
1138           out = XINT (temp);                    \
1139         else                                    \
1140           out = d;                              \
1141       }                                         \
1142     else                                        \
1143       out = d;                                  \
1144   }                                             \
1145 while (0)
1146
1147 /* Only used in search_buffer, to record the end position of the match
1148    when searching regexps and SEARCH_REGS should not be changed
1149    (i.e. Vinhibit_changing_match_data is non-nil).  */
1150 static struct re_registers search_regs_1;
1151
1152 static EMACS_INT
1153 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1154                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1155                int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1156 {
1157   ptrdiff_t len = SCHARS (string);
1158   ptrdiff_t len_byte = SBYTES (string);
1159   register ptrdiff_t i;
1160
1161   if (running_asynch_code)
1162     save_search_regs ();
1163
1164   /* Searching 0 times means don't move.  */
1165   /* Null string is found at starting position.  */
1166   if (len == 0 || n == 0)
1167     {
1168       set_search_regs (pos_byte, 0);
1169       return pos;
1170     }
1171
1172   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1173     {
1174       unsigned char *p1, *p2;
1175       ptrdiff_t s1, s2;
1176       struct re_pattern_buffer *bufp;
1177
1178       bufp = compile_pattern (string,
1179                               (NILP (Vinhibit_changing_match_data)
1180                                ? &search_regs : &search_regs_1),
1181                               trt, posix,
1182                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1183
1184       immediate_quit = 1;       /* Quit immediately if user types ^G,
1185                                    because letting this function finish
1186                                    can take too long. */
1187       QUIT;                     /* Do a pending quit right away,
1188                                    to avoid paradoxical behavior */
1189       /* Get pointers and sizes of the two strings
1190          that make up the visible portion of the buffer. */
1191
1192       p1 = BEGV_ADDR;
1193       s1 = GPT_BYTE - BEGV_BYTE;
1194       p2 = GAP_END_ADDR;
1195       s2 = ZV_BYTE - GPT_BYTE;
1196       if (s1 < 0)
1197         {
1198           p2 = p1;
1199           s2 = ZV_BYTE - BEGV_BYTE;
1200           s1 = 0;
1201         }
1202       if (s2 < 0)
1203         {
1204           s1 = ZV_BYTE - BEGV_BYTE;
1205           s2 = 0;
1206         }
1207       re_match_object = Qnil;
1208
1209       while (n < 0)
1210         {
1211           ptrdiff_t val;
1212
1213           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1214                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1215                              (NILP (Vinhibit_changing_match_data)
1216                               ? &search_regs : &search_regs_1),
1217                              /* Don't allow match past current point */
1218                              pos_byte - BEGV_BYTE);
1219           if (val == -2)
1220             {
1221               matcher_overflow ();
1222             }
1223           if (val >= 0)
1224             {
1225               if (NILP (Vinhibit_changing_match_data))
1226                 {
1227                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1228                   for (i = 0; i < search_regs.num_regs; i++)
1229                     if (search_regs.start[i] >= 0)
1230                       {
1231                         search_regs.start[i]
1232                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1233                         search_regs.end[i]
1234                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1235                       }
1236                   XSETBUFFER (last_thing_searched, current_buffer);
1237                   /* Set pos to the new position. */
1238                   pos = search_regs.start[0];
1239                 }
1240               else
1241                 {
1242                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1243                   /* Set pos to the new position.  */
1244                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1245                 }
1246             }
1247           else
1248             {
1249               immediate_quit = 0;
1250               return (n);
1251             }
1252           n++;
1253         }
1254       while (n > 0)
1255         {
1256           ptrdiff_t val;
1257
1258           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1259                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1260                              (NILP (Vinhibit_changing_match_data)
1261                               ? &search_regs : &search_regs_1),
1262                              lim_byte - BEGV_BYTE);
1263           if (val == -2)
1264             {
1265               matcher_overflow ();
1266             }
1267           if (val >= 0)
1268             {
1269               if (NILP (Vinhibit_changing_match_data))
1270                 {
1271                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1272                   for (i = 0; i < search_regs.num_regs; i++)
1273                     if (search_regs.start[i] >= 0)
1274                       {
1275                         search_regs.start[i]
1276                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1277                         search_regs.end[i]
1278                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1279                       }
1280                   XSETBUFFER (last_thing_searched, current_buffer);
1281                   pos = search_regs.end[0];
1282                 }
1283               else
1284                 {
1285                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1286                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1287                 }
1288             }
1289           else
1290             {
1291               immediate_quit = 0;
1292               return (0 - n);
1293             }
1294           n--;
1295         }
1296       immediate_quit = 0;
1297       return (pos);
1298     }
1299   else                          /* non-RE case */
1300     {
1301       unsigned char *raw_pattern, *pat;
1302       ptrdiff_t raw_pattern_size;
1303       ptrdiff_t raw_pattern_size_byte;
1304       unsigned char *patbuf;
1305       bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1306       unsigned char *base_pat;
1307       /* Set to positive if we find a non-ASCII char that need
1308          translation.  Otherwise set to zero later.  */
1309       int char_base = -1;
1310       bool boyer_moore_ok = 1;
1311       USE_SAFE_ALLOCA;
1312
1313       /* MULTIBYTE says whether the text to be searched is multibyte.
1314          We must convert PATTERN to match that, or we will not really
1315          find things right.  */
1316
1317       if (multibyte == STRING_MULTIBYTE (string))
1318         {
1319           raw_pattern = SDATA (string);
1320           raw_pattern_size = SCHARS (string);
1321           raw_pattern_size_byte = SBYTES (string);
1322         }
1323       else if (multibyte)
1324         {
1325           raw_pattern_size = SCHARS (string);
1326           raw_pattern_size_byte
1327             = count_size_as_multibyte (SDATA (string),
1328                                        raw_pattern_size);
1329           raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1330           copy_text (SDATA (string), raw_pattern,
1331                      SCHARS (string), 0, 1);
1332         }
1333       else
1334         {
1335           /* Converting multibyte to single-byte.
1336
1337              ??? Perhaps this conversion should be done in a special way
1338              by subtracting nonascii-insert-offset from each non-ASCII char,
1339              so that only the multibyte chars which really correspond to
1340              the chosen single-byte character set can possibly match.  */
1341           raw_pattern_size = SCHARS (string);
1342           raw_pattern_size_byte = SCHARS (string);
1343           raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1344           copy_text (SDATA (string), raw_pattern,
1345                      SBYTES (string), 1, 0);
1346         }
1347
1348       /* Copy and optionally translate the pattern.  */
1349       len = raw_pattern_size;
1350       len_byte = raw_pattern_size_byte;
1351       SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1352       pat = patbuf;
1353       base_pat = raw_pattern;
1354       if (multibyte)
1355         {
1356           /* Fill patbuf by translated characters in STRING while
1357              checking if we can use boyer-moore search.  If TRT is
1358              non-nil, we can use boyer-moore search only if TRT can be
1359              represented by the byte array of 256 elements.  For that,
1360              all non-ASCII case-equivalents of all case-sensitive
1361              characters in STRING must belong to the same character
1362              group (two characters belong to the same group iff their
1363              multibyte forms are the same except for the last byte;
1364              i.e. every 64 characters form a group; U+0000..U+003F,
1365              U+0040..U+007F, U+0080..U+00BF, ...).  */
1366
1367           while (--len >= 0)
1368             {
1369               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1370               int c, translated, inverse;
1371               int in_charlen, charlen;
1372
1373               /* If we got here and the RE flag is set, it's because we're
1374                  dealing with a regexp known to be trivial, so the backslash
1375                  just quotes the next character.  */
1376               if (RE && *base_pat == '\\')
1377                 {
1378                   len--;
1379                   raw_pattern_size--;
1380                   len_byte--;
1381                   base_pat++;
1382                 }
1383
1384               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1385
1386               if (NILP (trt))
1387                 {
1388                   str = base_pat;
1389                   charlen = in_charlen;
1390                 }
1391               else
1392                 {
1393                   /* Translate the character.  */
1394                   TRANSLATE (translated, trt, c);
1395                   charlen = CHAR_STRING (translated, str_base);
1396                   str = str_base;
1397
1398                   /* Check if C has any other case-equivalents.  */
1399                   TRANSLATE (inverse, inverse_trt, c);
1400                   /* If so, check if we can use boyer-moore.  */
1401                   if (c != inverse && boyer_moore_ok)
1402                     {
1403                       /* Check if all equivalents belong to the same
1404                          group of characters.  Note that the check of C
1405                          itself is done by the last iteration.  */
1406                       int this_char_base = -1;
1407
1408                       while (boyer_moore_ok)
1409                         {
1410                           if (ASCII_CHAR_P (inverse))
1411                             {
1412                               if (this_char_base > 0)
1413                                 boyer_moore_ok = 0;
1414                               else
1415                                 this_char_base = 0;
1416                             }
1417                           else if (CHAR_BYTE8_P (inverse))
1418                             /* Boyer-moore search can't handle a
1419                                translation of an eight-bit
1420                                character.  */
1421                             boyer_moore_ok = 0;
1422                           else if (this_char_base < 0)
1423                             {
1424                               this_char_base = inverse & ~0x3F;
1425                               if (char_base < 0)
1426                                 char_base = this_char_base;
1427                               else if (this_char_base != char_base)
1428                                 boyer_moore_ok = 0;
1429                             }
1430                           else if ((inverse & ~0x3F) != this_char_base)
1431                             boyer_moore_ok = 0;
1432                           if (c == inverse)
1433                             break;
1434                           TRANSLATE (inverse, inverse_trt, inverse);
1435                         }
1436                     }
1437                 }
1438
1439               /* Store this character into the translated pattern.  */
1440               memcpy (pat, str, charlen);
1441               pat += charlen;
1442               base_pat += in_charlen;
1443               len_byte -= in_charlen;
1444             }
1445
1446           /* If char_base is still negative we didn't find any translated
1447              non-ASCII characters.  */
1448           if (char_base < 0)
1449             char_base = 0;
1450         }
1451       else
1452         {
1453           /* Unibyte buffer.  */
1454           char_base = 0;
1455           while (--len >= 0)
1456             {
1457               int c, translated, inverse;
1458
1459               /* If we got here and the RE flag is set, it's because we're
1460                  dealing with a regexp known to be trivial, so the backslash
1461                  just quotes the next character.  */
1462               if (RE && *base_pat == '\\')
1463                 {
1464                   len--;
1465                   raw_pattern_size--;
1466                   base_pat++;
1467                 }
1468               c = *base_pat++;
1469               TRANSLATE (translated, trt, c);
1470               *pat++ = translated;
1471               /* Check that none of C's equivalents violates the
1472                  assumptions of boyer_moore.  */
1473               TRANSLATE (inverse, inverse_trt, c);
1474               while (1)
1475                 {
1476                   if (inverse >= 0200)
1477                     {
1478                       boyer_moore_ok = 0;
1479                       break;
1480                     }
1481                   if (c == inverse)
1482                     break;
1483                   TRANSLATE (inverse, inverse_trt, inverse);
1484                 }
1485             }
1486         }
1487
1488       len_byte = pat - patbuf;
1489       pat = base_pat = patbuf;
1490
1491       EMACS_INT result
1492         = (boyer_moore_ok
1493            ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1494                           pos_byte, lim_byte,
1495                           char_base)
1496            : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1497                             pos, pos_byte, lim, lim_byte));
1498       SAFE_FREE ();
1499       return result;
1500     }
1501 }
1502 \f
1503 /* Do a simple string search N times for the string PAT,
1504    whose length is LEN/LEN_BYTE,
1505    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1506    TRT is the translation table.
1507
1508    Return the character position where the match is found.
1509    Otherwise, if M matches remained to be found, return -M.
1510
1511    This kind of search works regardless of what is in PAT and
1512    regardless of what is in TRT.  It is used in cases where
1513    boyer_moore cannot work.  */
1514
1515 static EMACS_INT
1516 simple_search (EMACS_INT n, unsigned char *pat,
1517                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1518                ptrdiff_t pos, ptrdiff_t pos_byte,
1519                ptrdiff_t lim, ptrdiff_t lim_byte)
1520 {
1521   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1522   bool forward = n > 0;
1523   /* Number of buffer bytes matched.  Note that this may be different
1524      from len_byte in a multibyte buffer.  */
1525   ptrdiff_t match_byte = PTRDIFF_MIN;
1526
1527   if (lim > pos && multibyte)
1528     while (n > 0)
1529       {
1530         while (1)
1531           {
1532             /* Try matching at position POS.  */
1533             ptrdiff_t this_pos = pos;
1534             ptrdiff_t this_pos_byte = pos_byte;
1535             ptrdiff_t this_len = len;
1536             unsigned char *p = pat;
1537             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1538               goto stop;
1539
1540             while (this_len > 0)
1541               {
1542                 int charlen, buf_charlen;
1543                 int pat_ch, buf_ch;
1544
1545                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1546                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1547                                                  buf_charlen);
1548                 TRANSLATE (buf_ch, trt, buf_ch);
1549
1550                 if (buf_ch != pat_ch)
1551                   break;
1552
1553                 this_len--;
1554                 p += charlen;
1555
1556                 this_pos_byte += buf_charlen;
1557                 this_pos++;
1558               }
1559
1560             if (this_len == 0)
1561               {
1562                 match_byte = this_pos_byte - pos_byte;
1563                 pos += len;
1564                 pos_byte += match_byte;
1565                 break;
1566               }
1567
1568             INC_BOTH (pos, pos_byte);
1569           }
1570
1571         n--;
1572       }
1573   else if (lim > pos)
1574     while (n > 0)
1575       {
1576         while (1)
1577           {
1578             /* Try matching at position POS.  */
1579             ptrdiff_t this_pos = pos;
1580             ptrdiff_t this_len = len;
1581             unsigned char *p = pat;
1582
1583             if (pos + len > lim)
1584               goto stop;
1585
1586             while (this_len > 0)
1587               {
1588                 int pat_ch = *p++;
1589                 int buf_ch = FETCH_BYTE (this_pos);
1590                 TRANSLATE (buf_ch, trt, buf_ch);
1591
1592                 if (buf_ch != pat_ch)
1593                   break;
1594
1595                 this_len--;
1596                 this_pos++;
1597               }
1598
1599             if (this_len == 0)
1600               {
1601                 match_byte = len;
1602                 pos += len;
1603                 break;
1604               }
1605
1606             pos++;
1607           }
1608
1609         n--;
1610       }
1611   /* Backwards search.  */
1612   else if (lim < pos && multibyte)
1613     while (n < 0)
1614       {
1615         while (1)
1616           {
1617             /* Try matching at position POS.  */
1618             ptrdiff_t this_pos = pos;
1619             ptrdiff_t this_pos_byte = pos_byte;
1620             ptrdiff_t this_len = len;
1621             const unsigned char *p = pat + len_byte;
1622
1623             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1624               goto stop;
1625
1626             while (this_len > 0)
1627               {
1628                 int pat_ch, buf_ch;
1629
1630                 DEC_BOTH (this_pos, this_pos_byte);
1631                 PREV_CHAR_BOUNDARY (p, pat);
1632                 pat_ch = STRING_CHAR (p);
1633                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1634                 TRANSLATE (buf_ch, trt, buf_ch);
1635
1636                 if (buf_ch != pat_ch)
1637                   break;
1638
1639                 this_len--;
1640               }
1641
1642             if (this_len == 0)
1643               {
1644                 match_byte = pos_byte - this_pos_byte;
1645                 pos = this_pos;
1646                 pos_byte = this_pos_byte;
1647                 break;
1648               }
1649
1650             DEC_BOTH (pos, pos_byte);
1651           }
1652
1653         n++;
1654       }
1655   else if (lim < pos)
1656     while (n < 0)
1657       {
1658         while (1)
1659           {
1660             /* Try matching at position POS.  */
1661             ptrdiff_t this_pos = pos - len;
1662             ptrdiff_t this_len = len;
1663             unsigned char *p = pat;
1664
1665             if (this_pos < lim)
1666               goto stop;
1667
1668             while (this_len > 0)
1669               {
1670                 int pat_ch = *p++;
1671                 int buf_ch = FETCH_BYTE (this_pos);
1672                 TRANSLATE (buf_ch, trt, buf_ch);
1673
1674                 if (buf_ch != pat_ch)
1675                   break;
1676                 this_len--;
1677                 this_pos++;
1678               }
1679
1680             if (this_len == 0)
1681               {
1682                 match_byte = len;
1683                 pos -= len;
1684                 break;
1685               }
1686
1687             pos--;
1688           }
1689
1690         n++;
1691       }
1692
1693  stop:
1694   if (n == 0)
1695     {
1696       eassert (match_byte != PTRDIFF_MIN);
1697       if (forward)
1698         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1699       else
1700         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1701
1702       return pos;
1703     }
1704   else if (n > 0)
1705     return -n;
1706   else
1707     return n;
1708 }
1709 \f
1710 /* Do Boyer-Moore search N times for the string BASE_PAT,
1711    whose length is LEN_BYTE,
1712    from buffer position POS_BYTE until LIM_BYTE.
1713    DIRECTION says which direction we search in.
1714    TRT and INVERSE_TRT are translation tables.
1715    Characters in PAT are already translated by TRT.
1716
1717    This kind of search works if all the characters in BASE_PAT that
1718    have nontrivial translation are the same aside from the last byte.
1719    This makes it possible to translate just the last byte of a
1720    character, and do so after just a simple test of the context.
1721    CHAR_BASE is nonzero if there is such a non-ASCII character.
1722
1723    If that criterion is not satisfied, do not call this function.  */
1724
1725 static EMACS_INT
1726 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1727              ptrdiff_t len_byte,
1728              Lisp_Object trt, Lisp_Object inverse_trt,
1729              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1730              int char_base)
1731 {
1732   int direction = ((n > 0) ? 1 : -1);
1733   register ptrdiff_t dirlen;
1734   ptrdiff_t limit;
1735   int stride_for_teases = 0;
1736   int BM_tab[0400];
1737   register unsigned char *cursor, *p_limit;
1738   register ptrdiff_t i;
1739   register int j;
1740   unsigned char *pat, *pat_end;
1741   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1742
1743   unsigned char simple_translate[0400];
1744   /* These are set to the preceding bytes of a byte to be translated
1745      if char_base is nonzero.  As the maximum byte length of a
1746      multibyte character is 5, we have to check at most four previous
1747      bytes.  */
1748   int translate_prev_byte1 = 0;
1749   int translate_prev_byte2 = 0;
1750   int translate_prev_byte3 = 0;
1751
1752   /* The general approach is that we are going to maintain that we know
1753      the first (closest to the present position, in whatever direction
1754      we're searching) character that could possibly be the last
1755      (furthest from present position) character of a valid match.  We
1756      advance the state of our knowledge by looking at that character
1757      and seeing whether it indeed matches the last character of the
1758      pattern.  If it does, we take a closer look.  If it does not, we
1759      move our pointer (to putative last characters) as far as is
1760      logically possible.  This amount of movement, which I call a
1761      stride, will be the length of the pattern if the actual character
1762      appears nowhere in the pattern, otherwise it will be the distance
1763      from the last occurrence of that character to the end of the
1764      pattern.  If the amount is zero we have a possible match.  */
1765
1766   /* Here we make a "mickey mouse" BM table.  The stride of the search
1767      is determined only by the last character of the putative match.
1768      If that character does not match, we will stride the proper
1769      distance to propose a match that superimposes it on the last
1770      instance of a character that matches it (per trt), or misses
1771      it entirely if there is none. */
1772
1773   dirlen = len_byte * direction;
1774
1775   /* Record position after the end of the pattern.  */
1776   pat_end = base_pat + len_byte;
1777   /* BASE_PAT points to a character that we start scanning from.
1778      It is the first character in a forward search,
1779      the last character in a backward search.  */
1780   if (direction < 0)
1781     base_pat = pat_end - 1;
1782
1783   /* A character that does not appear in the pattern induces a
1784      stride equal to the pattern length.  */
1785   for (i = 0; i < 0400; i++)
1786     BM_tab[i] = dirlen;
1787
1788   /* We use this for translation, instead of TRT itself.
1789      We fill this in to handle the characters that actually
1790      occur in the pattern.  Others don't matter anyway!  */
1791   for (i = 0; i < 0400; i++)
1792     simple_translate[i] = i;
1793
1794   if (char_base)
1795     {
1796       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1797          byte following them are the target of translation.  */
1798       unsigned char str[MAX_MULTIBYTE_LENGTH];
1799       int cblen = CHAR_STRING (char_base, str);
1800
1801       translate_prev_byte1 = str[cblen - 2];
1802       if (cblen > 2)
1803         {
1804           translate_prev_byte2 = str[cblen - 3];
1805           if (cblen > 3)
1806             translate_prev_byte3 = str[cblen - 4];
1807         }
1808     }
1809
1810   i = 0;
1811   while (i != dirlen)
1812     {
1813       unsigned char *ptr = base_pat + i;
1814       i += direction;
1815       if (! NILP (trt))
1816         {
1817           /* If the byte currently looking at is the last of a
1818              character to check case-equivalents, set CH to that
1819              character.  An ASCII character and a non-ASCII character
1820              matching with CHAR_BASE are to be checked.  */
1821           int ch = -1;
1822
1823           if (ASCII_CHAR_P (*ptr) || ! multibyte)
1824             ch = *ptr;
1825           else if (char_base
1826                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1827             {
1828               unsigned char *charstart = ptr - 1;
1829
1830               while (! (CHAR_HEAD_P (*charstart)))
1831                 charstart--;
1832               ch = STRING_CHAR (charstart);
1833               if (char_base != (ch & ~0x3F))
1834                 ch = -1;
1835             }
1836
1837           if (ch >= 0200 && multibyte)
1838             j = (ch & 0x3F) | 0200;
1839           else
1840             j = *ptr;
1841
1842           if (i == dirlen)
1843             stride_for_teases = BM_tab[j];
1844
1845           BM_tab[j] = dirlen - i;
1846           /* A translation table is accompanied by its inverse -- see
1847              comment following downcase_table for details.  */
1848           if (ch >= 0)
1849             {
1850               int starting_ch = ch;
1851               int starting_j = j;
1852
1853               while (1)
1854                 {
1855                   TRANSLATE (ch, inverse_trt, ch);
1856                   if (ch >= 0200 && multibyte)
1857                     j = (ch & 0x3F) | 0200;
1858                   else
1859                     j = ch;
1860
1861                   /* For all the characters that map into CH,
1862                      set up simple_translate to map the last byte
1863                      into STARTING_J.  */
1864                   simple_translate[j] = starting_j;
1865                   if (ch == starting_ch)
1866                     break;
1867                   BM_tab[j] = dirlen - i;
1868                 }
1869             }
1870         }
1871       else
1872         {
1873           j = *ptr;
1874
1875           if (i == dirlen)
1876             stride_for_teases = BM_tab[j];
1877           BM_tab[j] = dirlen - i;
1878         }
1879       /* stride_for_teases tells how much to stride if we get a
1880          match on the far character but are subsequently
1881          disappointed, by recording what the stride would have been
1882          for that character if the last character had been
1883          different.  */
1884     }
1885   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1886   /* loop invariant - POS_BYTE points at where last char (first
1887      char if reverse) of pattern would align in a possible match.  */
1888   while (n != 0)
1889     {
1890       ptrdiff_t tail_end;
1891       unsigned char *tail_end_ptr;
1892
1893       /* It's been reported that some (broken) compiler thinks that
1894          Boolean expressions in an arithmetic context are unsigned.
1895          Using an explicit ?1:0 prevents this.  */
1896       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1897           < 0)
1898         return (n * (0 - direction));
1899       /* First we do the part we can by pointers (maybe nothing) */
1900       QUIT;
1901       pat = base_pat;
1902       limit = pos_byte - dirlen + direction;
1903       if (direction > 0)
1904         {
1905           limit = BUFFER_CEILING_OF (limit);
1906           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1907              can take on without hitting edge of buffer or the gap.  */
1908           limit = min (limit, pos_byte + 20000);
1909           limit = min (limit, lim_byte - 1);
1910         }
1911       else
1912         {
1913           limit = BUFFER_FLOOR_OF (limit);
1914           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1915              can take on without hitting edge of buffer or the gap.  */
1916           limit = max (limit, pos_byte - 20000);
1917           limit = max (limit, lim_byte);
1918         }
1919       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1920       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1921
1922       if ((limit - pos_byte) * direction > 20)
1923         {
1924           unsigned char *p2;
1925
1926           p_limit = BYTE_POS_ADDR (limit);
1927           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1928           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1929           while (1)             /* use one cursor setting as long as i can */
1930             {
1931               if (direction > 0) /* worth duplicating */
1932                 {
1933                   while (cursor <= p_limit)
1934                     {
1935                       if (BM_tab[*cursor] == 0)
1936                         goto hit;
1937                       cursor += BM_tab[*cursor];
1938                     }
1939                 }
1940               else
1941                 {
1942                   while (cursor >= p_limit)
1943                     {
1944                       if (BM_tab[*cursor] == 0)
1945                         goto hit;
1946                       cursor += BM_tab[*cursor];
1947                     }
1948                 }
1949               /* If you are here, cursor is beyond the end of the
1950                  searched region.  You fail to match within the
1951                  permitted region and would otherwise try a character
1952                  beyond that region.  */
1953               break;
1954
1955             hit:
1956               i = dirlen - direction;
1957               if (! NILP (trt))
1958                 {
1959                   while ((i -= direction) + direction != 0)
1960                     {
1961                       int ch;
1962                       cursor -= direction;
1963                       /* Translate only the last byte of a character.  */
1964                       if (! multibyte
1965                           || ((cursor == tail_end_ptr
1966                                || CHAR_HEAD_P (cursor[1]))
1967                               && (CHAR_HEAD_P (cursor[0])
1968                                   /* Check if this is the last byte of
1969                                      a translatable character.  */
1970                                   || (translate_prev_byte1 == cursor[-1]
1971                                       && (CHAR_HEAD_P (translate_prev_byte1)
1972                                           || (translate_prev_byte2 == cursor[-2]
1973                                               && (CHAR_HEAD_P (translate_prev_byte2)
1974                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1975                         ch = simple_translate[*cursor];
1976                       else
1977                         ch = *cursor;
1978                       if (pat[i] != ch)
1979                         break;
1980                     }
1981                 }
1982               else
1983                 {
1984                   while ((i -= direction) + direction != 0)
1985                     {
1986                       cursor -= direction;
1987                       if (pat[i] != *cursor)
1988                         break;
1989                     }
1990                 }
1991               cursor += dirlen - i - direction; /* fix cursor */
1992               if (i + direction == 0)
1993                 {
1994                   ptrdiff_t position, start, end;
1995
1996                   cursor -= direction;
1997
1998                   position = pos_byte + cursor - p2 + ((direction > 0)
1999                                                        ? 1 - len_byte : 0);
2000                   set_search_regs (position, len_byte);
2001
2002                   if (NILP (Vinhibit_changing_match_data))
2003                     {
2004                       start = search_regs.start[0];
2005                       end = search_regs.end[0];
2006                     }
2007                   else
2008                     /* If Vinhibit_changing_match_data is non-nil,
2009                        search_regs will not be changed.  So let's
2010                        compute start and end here.  */
2011                     {
2012                       start = BYTE_TO_CHAR (position);
2013                       end = BYTE_TO_CHAR (position + len_byte);
2014                     }
2015
2016                   if ((n -= direction) != 0)
2017                     cursor += dirlen; /* to resume search */
2018                   else
2019                     return direction > 0 ? end : start;
2020                 }
2021               else
2022                 cursor += stride_for_teases; /* <sigh> we lose -  */
2023             }
2024           pos_byte += cursor - p2;
2025         }
2026       else
2027         /* Now we'll pick up a clump that has to be done the hard
2028            way because it covers a discontinuity.  */
2029         {
2030           limit = ((direction > 0)
2031                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2032                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2033           limit = ((direction > 0)
2034                    ? min (limit + len_byte, lim_byte - 1)
2035                    : max (limit - len_byte, lim_byte));
2036           /* LIMIT is now the last value POS_BYTE can have
2037              and still be valid for a possible match.  */
2038           while (1)
2039             {
2040               /* This loop can be coded for space rather than
2041                  speed because it will usually run only once.
2042                  (the reach is at most len + 21, and typically
2043                  does not exceed len).  */
2044               while ((limit - pos_byte) * direction >= 0)
2045                 {
2046                   int ch = FETCH_BYTE (pos_byte);
2047                   if (BM_tab[ch] == 0)
2048                     goto hit2;
2049                   pos_byte += BM_tab[ch];
2050                 }
2051               break;    /* ran off the end */
2052
2053             hit2:
2054               /* Found what might be a match.  */
2055               i = dirlen - direction;
2056               while ((i -= direction) + direction != 0)
2057                 {
2058                   int ch;
2059                   unsigned char *ptr;
2060                   pos_byte -= direction;
2061                   ptr = BYTE_POS_ADDR (pos_byte);
2062                   /* Translate only the last byte of a character.  */
2063                   if (! multibyte
2064                       || ((ptr == tail_end_ptr
2065                            || CHAR_HEAD_P (ptr[1]))
2066                           && (CHAR_HEAD_P (ptr[0])
2067                               /* Check if this is the last byte of a
2068                                  translatable character.  */
2069                               || (translate_prev_byte1 == ptr[-1]
2070                                   && (CHAR_HEAD_P (translate_prev_byte1)
2071                                       || (translate_prev_byte2 == ptr[-2]
2072                                           && (CHAR_HEAD_P (translate_prev_byte2)
2073                                               || translate_prev_byte3 == ptr[-3])))))))
2074                     ch = simple_translate[*ptr];
2075                   else
2076                     ch = *ptr;
2077                   if (pat[i] != ch)
2078                     break;
2079                 }
2080               /* Above loop has moved POS_BYTE part or all the way
2081                  back to the first pos (last pos if reverse).
2082                  Set it once again at the last (first if reverse) char.  */
2083               pos_byte += dirlen - i - direction;
2084               if (i + direction == 0)
2085                 {
2086                   ptrdiff_t position, start, end;
2087                   pos_byte -= direction;
2088
2089                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2090                   set_search_regs (position, len_byte);
2091
2092                   if (NILP (Vinhibit_changing_match_data))
2093                     {
2094                       start = search_regs.start[0];
2095                       end = search_regs.end[0];
2096                     }
2097                   else
2098                     /* If Vinhibit_changing_match_data is non-nil,
2099                        search_regs will not be changed.  So let's
2100                        compute start and end here.  */
2101                     {
2102                       start = BYTE_TO_CHAR (position);
2103                       end = BYTE_TO_CHAR (position + len_byte);
2104                     }
2105
2106                   if ((n -= direction) != 0)
2107                     pos_byte += dirlen; /* to resume search */
2108                   else
2109                     return direction > 0 ? end : start;
2110                 }
2111               else
2112                 pos_byte += stride_for_teases;
2113             }
2114           }
2115       /* We have done one clump.  Can we continue? */
2116       if ((lim_byte - pos_byte) * direction < 0)
2117         return ((0 - n) * direction);
2118     }
2119   return BYTE_TO_CHAR (pos_byte);
2120 }
2121
2122 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2123    for the overall match just found in the current buffer.
2124    Also clear out the match data for registers 1 and up.  */
2125
2126 static void
2127 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2128 {
2129   ptrdiff_t i;
2130
2131   if (!NILP (Vinhibit_changing_match_data))
2132     return;
2133
2134   /* Make sure we have registers in which to store
2135      the match position.  */
2136   if (search_regs.num_regs == 0)
2137     {
2138       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2139       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2140       search_regs.num_regs = 2;
2141     }
2142
2143   /* Clear out the other registers.  */
2144   for (i = 1; i < search_regs.num_regs; i++)
2145     {
2146       search_regs.start[i] = -1;
2147       search_regs.end[i] = -1;
2148     }
2149
2150   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2151   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2152   XSETBUFFER (last_thing_searched, current_buffer);
2153 }
2154 \f
2155 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2156        "MSearch backward: ",
2157        doc: /* Search backward from point for STRING.
2158 Set point to the beginning of the occurrence found, and return point.
2159 An optional second argument bounds the search; it is a buffer position.
2160   The match found must not begin before that position.  A value of nil
2161   means search to the beginning of the accessible portion of the buffer.
2162 Optional third argument, if t, means if fail just return nil (no error).
2163   If not nil and not t, position at limit of search and return nil.
2164 Optional fourth argument COUNT, if a positive number, means to search
2165   for COUNT successive occurrences.  If COUNT is negative, search
2166   forward, instead of backward, for -COUNT occurrences.  A value of
2167   nil means the same as 1.
2168 With COUNT positive, the match found is the COUNTth to last one (or
2169   last, if COUNT is 1 or nil) in the buffer located entirely before
2170   the origin of the search; correspondingly with COUNT negative.
2171
2172 Search case-sensitivity is determined by the value of the variable
2173 `case-fold-search', which see.
2174
2175 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2176   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2177 {
2178   return search_command (string, bound, noerror, count, -1, 0, 0);
2179 }
2180
2181 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2182        doc: /* Search forward from point for STRING.
2183 Set point to the end of the occurrence found, and return point.
2184 An optional second argument bounds the search; it is a buffer position.
2185   The match found must not end after that position.  A value of nil
2186   means search to the end of the accessible portion of the buffer.
2187 Optional third argument, if t, means if fail just return nil (no error).
2188   If not nil and not t, move to limit of search and return nil.
2189 Optional fourth argument COUNT, if a positive number, means to search
2190   for COUNT successive occurrences.  If COUNT is negative, search
2191   backward, instead of forward, for -COUNT occurrences.  A value of
2192   nil means the same as 1.
2193 With COUNT positive, the match found is the COUNTth one (or first,
2194   if COUNT is 1 or nil) in the buffer located entirely after the
2195   origin of the search; correspondingly with COUNT negative.
2196
2197 Search case-sensitivity is determined by the value of the variable
2198 `case-fold-search', which see.
2199
2200 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2201   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2202 {
2203   return search_command (string, bound, noerror, count, 1, 0, 0);
2204 }
2205
2206 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2207        "sRE search backward: ",
2208        doc: /* Search backward from point for match for regular expression REGEXP.
2209 Set point to the beginning of the occurrence found, and return point.
2210 An optional second argument bounds the search; it is a buffer position.
2211   The match found must not begin before that position.  A value of nil
2212   means search to the beginning of the accessible portion of the buffer.
2213 Optional third argument, if t, means if fail just return nil (no error).
2214   If not nil and not t, position at limit of search and return nil.
2215 Optional fourth argument COUNT, if a positive number, means to search
2216   for COUNT successive occurrences.  If COUNT is negative, search
2217   forward, instead of backward, for -COUNT occurrences.  A value of
2218   nil means the same as 1.
2219 With COUNT positive, the match found is the COUNTth to last one (or
2220   last, if COUNT is 1 or nil) in the buffer located entirely before
2221   the origin of the search; correspondingly with COUNT negative.
2222
2223 Search case-sensitivity is determined by the value of the variable
2224 `case-fold-search', which see.
2225
2226 See also the functions `match-beginning', `match-end', `match-string',
2227 and `replace-match'.  */)
2228   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2229 {
2230   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2231 }
2232
2233 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2234        "sRE search: ",
2235        doc: /* Search forward from point for regular expression REGEXP.
2236 Set point to the end of the occurrence found, and return point.
2237 An optional second argument bounds the search; it is a buffer position.
2238   The match found must not end after that position.  A value of nil
2239   means search to the end of the accessible portion of the buffer.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241   If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument COUNT, if a positive number, means to search
2243   for COUNT successive occurrences.  If COUNT is negative, search
2244   backward, instead of forward, for -COUNT occurrences.  A value of
2245   nil means the same as 1.
2246 With COUNT positive, the match found is the COUNTth one (or first,
2247   if COUNT is 1 or nil) in the buffer located entirely after the
2248   origin of the search; correspondingly with COUNT negative.
2249
2250 Search case-sensitivity is determined by the value of the variable
2251 `case-fold-search', which see.
2252
2253 See also the functions `match-beginning', `match-end', `match-string',
2254 and `replace-match'.  */)
2255   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2256 {
2257   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2258 }
2259
2260 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2261        "sPosix search backward: ",
2262        doc: /* Search backward from point for match for regular expression REGEXP.
2263 Find the longest match in accord with Posix regular expression rules.
2264 Set point to the beginning of the occurrence found, and return point.
2265 An optional second argument bounds the search; it is a buffer position.
2266   The match found must not begin before that position.  A value of nil
2267   means search to the beginning of the accessible portion of the buffer.
2268 Optional third argument, if t, means if fail just return nil (no error).
2269   If not nil and not t, position at limit of search and return nil.
2270 Optional fourth argument COUNT, if a positive number, means to search
2271   for COUNT successive occurrences.  If COUNT is negative, search
2272   forward, instead of backward, for -COUNT occurrences.  A value of
2273   nil means the same as 1.
2274 With COUNT positive, the match found is the COUNTth to last one (or
2275   last, if COUNT is 1 or nil) in the buffer located entirely before
2276   the origin of the search; correspondingly with COUNT negative.
2277
2278 Search case-sensitivity is determined by the value of the variable
2279 `case-fold-search', which see.
2280
2281 See also the functions `match-beginning', `match-end', `match-string',
2282 and `replace-match'.  */)
2283   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2284 {
2285   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2286 }
2287
2288 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2289        "sPosix search: ",
2290        doc: /* Search forward from point for regular expression REGEXP.
2291 Find the longest match in accord with Posix regular expression rules.
2292 Set point to the end of the occurrence found, and return point.
2293 An optional second argument bounds the search; it is a buffer position.
2294   The match found must not end after that position.  A value of nil
2295   means search to the end of the accessible portion of the buffer.
2296 Optional third argument, if t, means if fail just return nil (no error).
2297   If not nil and not t, move to limit of search and return nil.
2298 Optional fourth argument COUNT, if a positive number, means to search
2299   for COUNT successive occurrences.  If COUNT is negative, search
2300   backward, instead of forward, for -COUNT occurrences.  A value of
2301   nil means the same as 1.
2302 With COUNT positive, the match found is the COUNTth one (or first,
2303   if COUNT is 1 or nil) in the buffer located entirely after the
2304   origin of the search; correspondingly with COUNT negative.
2305
2306 Search case-sensitivity is determined by the value of the variable
2307 `case-fold-search', which see.
2308
2309 See also the functions `match-beginning', `match-end', `match-string',
2310 and `replace-match'.  */)
2311   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2312 {
2313   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2314 }
2315 \f
2316 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2317        doc: /* Replace text matched by last search with NEWTEXT.
2318 Leave point at the end of the replacement text.
2319
2320 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2321 the replacement text.  Otherwise, maybe capitalize the whole text, or
2322 maybe just word initials, based on the replaced text.  If the replaced
2323 text has only capital letters and has at least one multiletter word,
2324 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2325 in the replaced text, capitalize each word in NEWTEXT.
2326
2327 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2328 Otherwise treat `\\' as special:
2329   `\\&' in NEWTEXT means substitute original matched text.
2330   `\\N' means substitute what matched the Nth `\\(...\\)'.
2331        If Nth parens didn't match, substitute nothing.
2332   `\\\\' means insert one `\\'.
2333   `\\?' is treated literally
2334        (for compatibility with `query-replace-regexp').
2335   Any other character following `\\' signals an error.
2336 Case conversion does not apply to these substitutions.
2337
2338 If optional fourth argument STRING is non-nil, it should be a string
2339 to act on; this should be the string on which the previous match was
2340 done via `string-match'.  In this case, `replace-match' creates and
2341 returns a new string, made by copying STRING and replacing the part of
2342 STRING that was matched (the original STRING itself is not altered).
2343
2344 The optional fifth argument SUBEXP specifies a subexpression;
2345 it says to replace just that subexpression with NEWTEXT,
2346 rather than replacing the entire matched text.
2347 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2348 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2349 NEWTEXT in place of subexp N.
2350 This is useful only after a regular expression search or match,
2351 since only regular expressions have distinguished subexpressions.  */)
2352   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2353 {
2354   enum { nochange, all_caps, cap_initial } case_action;
2355   ptrdiff_t pos, pos_byte;
2356   bool some_multiletter_word;
2357   bool some_lowercase;
2358   bool some_uppercase;
2359   bool some_nonuppercase_initial;
2360   int c, prevc;
2361   ptrdiff_t sub;
2362   ptrdiff_t opoint, newpoint;
2363
2364   CHECK_STRING (newtext);
2365
2366   if (! NILP (string))
2367     CHECK_STRING (string);
2368
2369   case_action = nochange;       /* We tried an initialization */
2370                                 /* but some C compilers blew it */
2371
2372   if (search_regs.num_regs <= 0)
2373     error ("`replace-match' called before any match found");
2374
2375   if (NILP (subexp))
2376     sub = 0;
2377   else
2378     {
2379       CHECK_NUMBER (subexp);
2380       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2381         args_out_of_range (subexp, make_number (search_regs.num_regs));
2382       sub = XINT (subexp);
2383     }
2384
2385   if (NILP (string))
2386     {
2387       if (search_regs.start[sub] < BEGV
2388           || search_regs.start[sub] > search_regs.end[sub]
2389           || search_regs.end[sub] > ZV)
2390         args_out_of_range (make_number (search_regs.start[sub]),
2391                            make_number (search_regs.end[sub]));
2392     }
2393   else
2394     {
2395       if (search_regs.start[sub] < 0
2396           || search_regs.start[sub] > search_regs.end[sub]
2397           || search_regs.end[sub] > SCHARS (string))
2398         args_out_of_range (make_number (search_regs.start[sub]),
2399                            make_number (search_regs.end[sub]));
2400     }
2401
2402   if (NILP (fixedcase))
2403     {
2404       /* Decide how to casify by examining the matched text. */
2405       ptrdiff_t last;
2406
2407       pos = search_regs.start[sub];
2408       last = search_regs.end[sub];
2409
2410       if (NILP (string))
2411         pos_byte = CHAR_TO_BYTE (pos);
2412       else
2413         pos_byte = string_char_to_byte (string, pos);
2414
2415       prevc = '\n';
2416       case_action = all_caps;
2417
2418       /* some_multiletter_word is set nonzero if any original word
2419          is more than one letter long. */
2420       some_multiletter_word = 0;
2421       some_lowercase = 0;
2422       some_nonuppercase_initial = 0;
2423       some_uppercase = 0;
2424
2425       while (pos < last)
2426         {
2427           if (NILP (string))
2428             {
2429               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2430               INC_BOTH (pos, pos_byte);
2431             }
2432           else
2433             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2434
2435           if (lowercasep (c))
2436             {
2437               /* Cannot be all caps if any original char is lower case */
2438
2439               some_lowercase = 1;
2440               if (SYNTAX (prevc) != Sword)
2441                 some_nonuppercase_initial = 1;
2442               else
2443                 some_multiletter_word = 1;
2444             }
2445           else if (uppercasep (c))
2446             {
2447               some_uppercase = 1;
2448               if (SYNTAX (prevc) != Sword)
2449                 ;
2450               else
2451                 some_multiletter_word = 1;
2452             }
2453           else
2454             {
2455               /* If the initial is a caseless word constituent,
2456                  treat that like a lowercase initial.  */
2457               if (SYNTAX (prevc) != Sword)
2458                 some_nonuppercase_initial = 1;
2459             }
2460
2461           prevc = c;
2462         }
2463
2464       /* Convert to all caps if the old text is all caps
2465          and has at least one multiletter word.  */
2466       if (! some_lowercase && some_multiletter_word)
2467         case_action = all_caps;
2468       /* Capitalize each word, if the old text has all capitalized words.  */
2469       else if (!some_nonuppercase_initial && some_multiletter_word)
2470         case_action = cap_initial;
2471       else if (!some_nonuppercase_initial && some_uppercase)
2472         /* Should x -> yz, operating on X, give Yz or YZ?
2473            We'll assume the latter.  */
2474         case_action = all_caps;
2475       else
2476         case_action = nochange;
2477     }
2478
2479   /* Do replacement in a string.  */
2480   if (!NILP (string))
2481     {
2482       Lisp_Object before, after;
2483
2484       before = Fsubstring (string, make_number (0),
2485                            make_number (search_regs.start[sub]));
2486       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2487
2488       /* Substitute parts of the match into NEWTEXT
2489          if desired.  */
2490       if (NILP (literal))
2491         {
2492           ptrdiff_t lastpos = 0;
2493           ptrdiff_t lastpos_byte = 0;
2494           /* We build up the substituted string in ACCUM.  */
2495           Lisp_Object accum;
2496           Lisp_Object middle;
2497           ptrdiff_t length = SBYTES (newtext);
2498
2499           accum = Qnil;
2500
2501           for (pos_byte = 0, pos = 0; pos_byte < length;)
2502             {
2503               ptrdiff_t substart = -1;
2504               ptrdiff_t subend = 0;
2505               bool delbackslash = 0;
2506
2507               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2508
2509               if (c == '\\')
2510                 {
2511                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2512
2513                   if (c == '&')
2514                     {
2515                       substart = search_regs.start[sub];
2516                       subend = search_regs.end[sub];
2517                     }
2518                   else if (c >= '1' && c <= '9')
2519                     {
2520                       if (c - '0' < search_regs.num_regs
2521                           && search_regs.start[c - '0'] >= 0)
2522                         {
2523                           substart = search_regs.start[c - '0'];
2524                           subend = search_regs.end[c - '0'];
2525                         }
2526                       else
2527                         {
2528                           /* If that subexp did not match,
2529                              replace \\N with nothing.  */
2530                           substart = 0;
2531                           subend = 0;
2532                         }
2533                     }
2534                   else if (c == '\\')
2535                     delbackslash = 1;
2536                   else if (c != '?')
2537                     error ("Invalid use of `\\' in replacement text");
2538                 }
2539               if (substart >= 0)
2540                 {
2541                   if (pos - 2 != lastpos)
2542                     middle = substring_both (newtext, lastpos,
2543                                              lastpos_byte,
2544                                              pos - 2, pos_byte - 2);
2545                   else
2546                     middle = Qnil;
2547                   accum = concat3 (accum, middle,
2548                                    Fsubstring (string,
2549                                                make_number (substart),
2550                                                make_number (subend)));
2551                   lastpos = pos;
2552                   lastpos_byte = pos_byte;
2553                 }
2554               else if (delbackslash)
2555                 {
2556                   middle = substring_both (newtext, lastpos,
2557                                            lastpos_byte,
2558                                            pos - 1, pos_byte - 1);
2559
2560                   accum = concat2 (accum, middle);
2561                   lastpos = pos;
2562                   lastpos_byte = pos_byte;
2563                 }
2564             }
2565
2566           if (pos != lastpos)
2567             middle = substring_both (newtext, lastpos,
2568                                      lastpos_byte,
2569                                      pos, pos_byte);
2570           else
2571             middle = Qnil;
2572
2573           newtext = concat2 (accum, middle);
2574         }
2575
2576       /* Do case substitution in NEWTEXT if desired.  */
2577       if (case_action == all_caps)
2578         newtext = Fupcase (newtext);
2579       else if (case_action == cap_initial)
2580         newtext = Fupcase_initials (newtext);
2581
2582       return concat3 (before, newtext, after);
2583     }
2584
2585   /* Record point, then move (quietly) to the start of the match.  */
2586   if (PT >= search_regs.end[sub])
2587     opoint = PT - ZV;
2588   else if (PT > search_regs.start[sub])
2589     opoint = search_regs.end[sub] - ZV;
2590   else
2591     opoint = PT;
2592
2593   /* If we want non-literal replacement,
2594      perform substitution on the replacement string.  */
2595   if (NILP (literal))
2596     {
2597       ptrdiff_t length = SBYTES (newtext);
2598       unsigned char *substed;
2599       ptrdiff_t substed_alloc_size, substed_len;
2600       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2601       bool str_multibyte = STRING_MULTIBYTE (newtext);
2602       bool really_changed = 0;
2603
2604       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2605                             ? length * 2 + 100
2606                             : STRING_BYTES_BOUND);
2607       substed = xmalloc (substed_alloc_size);
2608       substed_len = 0;
2609
2610       /* Go thru NEWTEXT, producing the actual text to insert in
2611          SUBSTED while adjusting multibyteness to that of the current
2612          buffer.  */
2613
2614       for (pos_byte = 0, pos = 0; pos_byte < length;)
2615         {
2616           unsigned char str[MAX_MULTIBYTE_LENGTH];
2617           const unsigned char *add_stuff = NULL;
2618           ptrdiff_t add_len = 0;
2619           ptrdiff_t idx = -1;
2620
2621           if (str_multibyte)
2622             {
2623               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2624               if (!buf_multibyte)
2625                 c = CHAR_TO_BYTE8 (c);
2626             }
2627           else
2628             {
2629               /* Note that we don't have to increment POS.  */
2630               c = SREF (newtext, pos_byte++);
2631               if (buf_multibyte)
2632                 MAKE_CHAR_MULTIBYTE (c);
2633             }
2634
2635           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2636              or set IDX to a match index, which means put that part
2637              of the buffer text into SUBSTED.  */
2638
2639           if (c == '\\')
2640             {
2641               really_changed = 1;
2642
2643               if (str_multibyte)
2644                 {
2645                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2646                                                       pos, pos_byte);
2647                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2648                     c = CHAR_TO_BYTE8 (c);
2649                 }
2650               else
2651                 {
2652                   c = SREF (newtext, pos_byte++);
2653                   if (buf_multibyte)
2654                     MAKE_CHAR_MULTIBYTE (c);
2655                 }
2656
2657               if (c == '&')
2658                 idx = sub;
2659               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2660                 {
2661                   if (search_regs.start[c - '0'] >= 1)
2662                     idx = c - '0';
2663                 }
2664               else if (c == '\\')
2665                 add_len = 1, add_stuff = (unsigned char *) "\\";
2666               else
2667                 {
2668                   xfree (substed);
2669                   error ("Invalid use of `\\' in replacement text");
2670                 }
2671             }
2672           else
2673             {
2674               add_len = CHAR_STRING (c, str);
2675               add_stuff = str;
2676             }
2677
2678           /* If we want to copy part of a previous match,
2679              set up ADD_STUFF and ADD_LEN to point to it.  */
2680           if (idx >= 0)
2681             {
2682               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2683               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2684               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2685                 move_gap_both (search_regs.start[idx], begbyte);
2686               add_stuff = BYTE_POS_ADDR (begbyte);
2687             }
2688
2689           /* Now the stuff we want to add to SUBSTED
2690              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2691
2692           /* Make sure SUBSTED is big enough.  */
2693           if (substed_alloc_size - substed_len < add_len)
2694             substed =
2695               xpalloc (substed, &substed_alloc_size,
2696                        add_len - (substed_alloc_size - substed_len),
2697                        STRING_BYTES_BOUND, 1);
2698
2699           /* Now add to the end of SUBSTED.  */
2700           if (add_stuff)
2701             {
2702               memcpy (substed + substed_len, add_stuff, add_len);
2703               substed_len += add_len;
2704             }
2705         }
2706
2707       if (really_changed)
2708         newtext = make_specified_string ((const char *) substed, -1,
2709                                          substed_len, buf_multibyte);
2710       xfree (substed);
2711     }
2712
2713   /* The functions below modify the buffer, so they could trigger
2714      various modification hooks (see signal_before_change and
2715      signal_after_change).  If these hooks clobber the match data we
2716      error out since otherwise this will result in confusing bugs.  */
2717   ptrdiff_t sub_start = search_regs.start[sub];
2718   ptrdiff_t sub_end = search_regs.end[sub];
2719   unsigned  num_regs = search_regs.num_regs;
2720   newpoint = search_regs.start[sub] + SCHARS (newtext);
2721
2722   /* Replace the old text with the new in the cleanest possible way.  */
2723   replace_range (search_regs.start[sub], search_regs.end[sub],
2724                  newtext, 1, 0, 1, 1);
2725   /* Update saved data to match adjustment made by replace_range.  */
2726   {
2727     ptrdiff_t change = newpoint - sub_end;
2728     if (sub_start >= sub_end)
2729       sub_start += change;
2730     sub_end += change;
2731   }
2732
2733   if (case_action == all_caps)
2734     Fupcase_region (make_number (search_regs.start[sub]),
2735                     make_number (newpoint),
2736                     Qnil);
2737   else if (case_action == cap_initial)
2738     Fupcase_initials_region (make_number (search_regs.start[sub]),
2739                              make_number (newpoint));
2740
2741   if (search_regs.start[sub] != sub_start
2742       || search_regs.end[sub] != sub_end
2743       || search_regs.num_regs != num_regs)
2744     error ("Match data clobbered by buffer modification hooks");
2745
2746   /* Put point back where it was in the text.  */
2747   if (opoint <= 0)
2748     TEMP_SET_PT (opoint + ZV);
2749   else
2750     TEMP_SET_PT (opoint);
2751
2752   /* Now move point "officially" to the start of the inserted replacement.  */
2753   move_if_not_intangible (newpoint);
2754
2755   return Qnil;
2756 }
2757 \f
2758 static Lisp_Object
2759 match_limit (Lisp_Object num, bool beginningp)
2760 {
2761   EMACS_INT n;
2762
2763   CHECK_NUMBER (num);
2764   n = XINT (num);
2765   if (n < 0)
2766     args_out_of_range (num, make_number (0));
2767   if (search_regs.num_regs <= 0)
2768     error ("No match data, because no search succeeded");
2769   if (n >= search_regs.num_regs
2770       || search_regs.start[n] < 0)
2771     return Qnil;
2772   return (make_number ((beginningp) ? search_regs.start[n]
2773                                     : search_regs.end[n]));
2774 }
2775
2776 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2777        doc: /* Return position of start of text matched by last search.
2778 SUBEXP, a number, specifies which parenthesized expression in the last
2779   regexp.
2780 Value is nil if SUBEXPth pair didn't match, or there were less than
2781   SUBEXP pairs.
2782 Zero means the entire text matched by the whole regexp or whole string.
2783
2784 Return value is undefined if the last search failed.  */)
2785   (Lisp_Object subexp)
2786 {
2787   return match_limit (subexp, 1);
2788 }
2789
2790 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2791        doc: /* Return position of end of text matched by last search.
2792 SUBEXP, a number, specifies which parenthesized expression in the last
2793   regexp.
2794 Value is nil if SUBEXPth pair didn't match, or there were less than
2795   SUBEXP pairs.
2796 Zero means the entire text matched by the whole regexp or whole string.
2797
2798 Return value is undefined if the last search failed.  */)
2799   (Lisp_Object subexp)
2800 {
2801   return match_limit (subexp, 0);
2802 }
2803
2804 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2805        doc: /* Return a list describing what the last search matched.
2806 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2807 All the elements are markers or nil (nil if the Nth pair didn't match)
2808 if the last match was on a buffer; integers or nil if a string was matched.
2809 Use `set-match-data' to reinstate the data in this list.
2810
2811 If INTEGERS (the optional first argument) is non-nil, always use
2812 integers (rather than markers) to represent buffer positions.  In
2813 this case, and if the last match was in a buffer, the buffer will get
2814 stored as one additional element at the end of the list.
2815
2816 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2817 enough to hold all the values, and if INTEGERS is non-nil, no consing
2818 is done.
2819
2820 If optional third arg RESEAT is non-nil, any previous markers on the
2821 REUSE list will be modified to point to nowhere.
2822
2823 Return value is undefined if the last search failed.  */)
2824   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2825 {
2826   Lisp_Object tail, prev;
2827   Lisp_Object *data;
2828   ptrdiff_t i, len;
2829
2830   if (!NILP (reseat))
2831     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2832       if (MARKERP (XCAR (tail)))
2833         {
2834           unchain_marker (XMARKER (XCAR (tail)));
2835           XSETCAR (tail, Qnil);
2836         }
2837
2838   if (NILP (last_thing_searched))
2839     return Qnil;
2840
2841   prev = Qnil;
2842
2843   USE_SAFE_ALLOCA;
2844   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2845
2846   len = 0;
2847   for (i = 0; i < search_regs.num_regs; i++)
2848     {
2849       ptrdiff_t start = search_regs.start[i];
2850       if (start >= 0)
2851         {
2852           if (EQ (last_thing_searched, Qt)
2853               || ! NILP (integers))
2854             {
2855               XSETFASTINT (data[2 * i], start);
2856               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2857             }
2858           else if (BUFFERP (last_thing_searched))
2859             {
2860               data[2 * i] = Fmake_marker ();
2861               Fset_marker (data[2 * i],
2862                            make_number (start),
2863                            last_thing_searched);
2864               data[2 * i + 1] = Fmake_marker ();
2865               Fset_marker (data[2 * i + 1],
2866                            make_number (search_regs.end[i]),
2867                            last_thing_searched);
2868             }
2869           else
2870             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2871             emacs_abort ();
2872
2873           len = 2 * i + 2;
2874         }
2875       else
2876         data[2 * i] = data[2 * i + 1] = Qnil;
2877     }
2878
2879   if (BUFFERP (last_thing_searched) && !NILP (integers))
2880     {
2881       data[len] = last_thing_searched;
2882       len++;
2883     }
2884
2885   /* If REUSE is not usable, cons up the values and return them.  */
2886   if (! CONSP (reuse))
2887     reuse = Flist (len, data);
2888   else
2889     {
2890       /* If REUSE is a list, store as many value elements as will fit
2891          into the elements of REUSE.  */
2892       for (i = 0, tail = reuse; CONSP (tail);
2893            i++, tail = XCDR (tail))
2894         {
2895           if (i < len)
2896             XSETCAR (tail, data[i]);
2897           else
2898             XSETCAR (tail, Qnil);
2899           prev = tail;
2900         }
2901
2902       /* If we couldn't fit all value elements into REUSE,
2903          cons up the rest of them and add them to the end of REUSE.  */
2904       if (i < len)
2905         XSETCDR (prev, Flist (len - i, data + i));
2906     }
2907
2908   SAFE_FREE ();
2909   return reuse;
2910 }
2911
2912 /* We used to have an internal use variant of `reseat' described as:
2913
2914       If RESEAT is `evaporate', put the markers back on the free list
2915       immediately.  No other references to the markers must exist in this
2916       case, so it is used only internally on the unwind stack and
2917       save-match-data from Lisp.
2918
2919    But it was ill-conceived: those supposedly-internal markers get exposed via
2920    the undo-list, so freeing them here is unsafe.  */
2921
2922 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2923        doc: /* Set internal data on last search match from elements of LIST.
2924 LIST should have been created by calling `match-data' previously.
2925
2926 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2927   (register Lisp_Object list, Lisp_Object reseat)
2928 {
2929   ptrdiff_t i;
2930   register Lisp_Object marker;
2931
2932   if (running_asynch_code)
2933     save_search_regs ();
2934
2935   CHECK_LIST (list);
2936
2937   /* Unless we find a marker with a buffer or an explicit buffer
2938      in LIST, assume that this match data came from a string.  */
2939   last_thing_searched = Qt;
2940
2941   /* Allocate registers if they don't already exist.  */
2942   {
2943     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2944
2945     if (length > search_regs.num_regs)
2946       {
2947         ptrdiff_t num_regs = search_regs.num_regs;
2948         if (PTRDIFF_MAX < length)
2949           memory_full (SIZE_MAX);
2950         search_regs.start =
2951           xpalloc (search_regs.start, &num_regs, length - num_regs,
2952                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2953         search_regs.end =
2954           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2955
2956         for (i = search_regs.num_regs; i < num_regs; i++)
2957           search_regs.start[i] = -1;
2958
2959         search_regs.num_regs = num_regs;
2960       }
2961
2962     for (i = 0; CONSP (list); i++)
2963       {
2964         marker = XCAR (list);
2965         if (BUFFERP (marker))
2966           {
2967             last_thing_searched = marker;
2968             break;
2969           }
2970         if (i >= length)
2971           break;
2972         if (NILP (marker))
2973           {
2974             search_regs.start[i] = -1;
2975             list = XCDR (list);
2976           }
2977         else
2978           {
2979             Lisp_Object from;
2980             Lisp_Object m;
2981
2982             m = marker;
2983             if (MARKERP (marker))
2984               {
2985                 if (XMARKER (marker)->buffer == 0)
2986                   XSETFASTINT (marker, 0);
2987                 else
2988                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2989               }
2990
2991             CHECK_NUMBER_COERCE_MARKER (marker);
2992             from = marker;
2993
2994             if (!NILP (reseat) && MARKERP (m))
2995               {
2996                 unchain_marker (XMARKER (m));
2997                 XSETCAR (list, Qnil);
2998               }
2999
3000             if ((list = XCDR (list), !CONSP (list)))
3001               break;
3002
3003             m = marker = XCAR (list);
3004
3005             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3006               XSETFASTINT (marker, 0);
3007
3008             CHECK_NUMBER_COERCE_MARKER (marker);
3009             if ((XINT (from) < 0
3010                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
3011                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
3012                 && (XINT (marker) < 0
3013                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
3014                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
3015               {
3016                 search_regs.start[i] = XINT (from);
3017                 search_regs.end[i] = XINT (marker);
3018               }
3019             else
3020               {
3021                 search_regs.start[i] = -1;
3022               }
3023
3024             if (!NILP (reseat) && MARKERP (m))
3025               {
3026                 unchain_marker (XMARKER (m));
3027                 XSETCAR (list, Qnil);
3028               }
3029           }
3030         list = XCDR (list);
3031       }
3032
3033     for (; i < search_regs.num_regs; i++)
3034       search_regs.start[i] = -1;
3035   }
3036
3037   return Qnil;
3038 }
3039
3040 /* If true the match data have been saved in saved_search_regs
3041    during the execution of a sentinel or filter. */
3042 static bool search_regs_saved;
3043 static struct re_registers saved_search_regs;
3044 static Lisp_Object saved_last_thing_searched;
3045
3046 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3047    if asynchronous code (filter or sentinel) is running. */
3048 static void
3049 save_search_regs (void)
3050 {
3051   if (!search_regs_saved)
3052     {
3053       saved_search_regs.num_regs = search_regs.num_regs;
3054       saved_search_regs.start = search_regs.start;
3055       saved_search_regs.end = search_regs.end;
3056       saved_last_thing_searched = last_thing_searched;
3057       last_thing_searched = Qnil;
3058       search_regs.num_regs = 0;
3059       search_regs.start = 0;
3060       search_regs.end = 0;
3061
3062       search_regs_saved = 1;
3063     }
3064 }
3065
3066 /* Called upon exit from filters and sentinels. */
3067 void
3068 restore_search_regs (void)
3069 {
3070   if (search_regs_saved)
3071     {
3072       if (search_regs.num_regs > 0)
3073         {
3074           xfree (search_regs.start);
3075           xfree (search_regs.end);
3076         }
3077       search_regs.num_regs = saved_search_regs.num_regs;
3078       search_regs.start = saved_search_regs.start;
3079       search_regs.end = saved_search_regs.end;
3080       last_thing_searched = saved_last_thing_searched;
3081       saved_last_thing_searched = Qnil;
3082       search_regs_saved = 0;
3083     }
3084 }
3085
3086 /* Called from replace-match via replace_range.  */
3087 void
3088 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
3089 {
3090   /* Adjust search data for this change.  */
3091   ptrdiff_t change = newend - oldend;
3092   ptrdiff_t i;
3093
3094   for (i = 0; i < search_regs.num_regs; i++)
3095     {
3096       if (search_regs.start[i] >= oldend)
3097         search_regs.start[i] += change;
3098       else if (search_regs.start[i] > oldstart)
3099         search_regs.start[i] = oldstart;
3100       if (search_regs.end[i] >= oldend)
3101         search_regs.end[i] += change;
3102       else if (search_regs.end[i] > oldstart)
3103         search_regs.end[i] = oldstart;
3104     }
3105 }
3106
3107 static void
3108 unwind_set_match_data (Lisp_Object list)
3109 {
3110   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3111   Fset_match_data (list, Qt);
3112 }
3113
3114 /* Called to unwind protect the match data.  */
3115 void
3116 record_unwind_save_match_data (void)
3117 {
3118   record_unwind_protect (unwind_set_match_data,
3119                          Fmatch_data (Qnil, Qnil, Qnil));
3120 }
3121
3122 /* Quote a string to deactivate reg-expr chars */
3123
3124 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3125        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3126   (Lisp_Object string)
3127 {
3128   char *in, *out, *end;
3129   char *temp;
3130   ptrdiff_t backslashes_added = 0;
3131
3132   CHECK_STRING (string);
3133
3134   USE_SAFE_ALLOCA;
3135   SAFE_NALLOCA (temp, 2, SBYTES (string));
3136
3137   /* Now copy the data into the new string, inserting escapes. */
3138
3139   in = SSDATA (string);
3140   end = in + SBYTES (string);
3141   out = temp;
3142
3143   for (; in != end; in++)
3144     {
3145       if (*in == '['
3146           || *in == '*' || *in == '.' || *in == '\\'
3147           || *in == '?' || *in == '+'
3148           || *in == '^' || *in == '$')
3149         *out++ = '\\', backslashes_added++;
3150       *out++ = *in;
3151     }
3152
3153   Lisp_Object result
3154     = make_specified_string (temp,
3155                              SCHARS (string) + backslashes_added,
3156                              out - temp,
3157                              STRING_MULTIBYTE (string));
3158   SAFE_FREE ();
3159   return result;
3160 }
3161
3162 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
3163 static ptrdiff_t
3164 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3165                ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
3166                ptrdiff_t *bytepos, bool allow_quit)
3167 {
3168   if (count > 0)
3169     {
3170       if (!end)
3171         end = ZV, end_byte = ZV_BYTE;
3172     }
3173   else
3174     {
3175       if (!end)
3176         end = BEGV, end_byte = BEGV_BYTE;
3177     }
3178   if (end_byte == -1)
3179     end_byte = CHAR_TO_BYTE (end);
3180
3181   if (shortage != 0)
3182     *shortage = 0;
3183
3184   immediate_quit = allow_quit;
3185
3186   if (count > 0)
3187     while (start != end)
3188       {
3189         /* Our innermost scanning loop is very simple; it doesn't know
3190            about gaps, buffer ends, or the newline cache.  ceiling is
3191            the position of the last character before the next such
3192            obstacle --- the last character the dumb search loop should
3193            examine.  */
3194         ptrdiff_t tem, ceiling_byte = end_byte - 1;
3195
3196         if (start_byte == -1)
3197           start_byte = CHAR_TO_BYTE (start);
3198
3199         /* The dumb loop can only scan text stored in contiguous
3200            bytes. BUFFER_CEILING_OF returns the last character
3201            position that is contiguous, so the ceiling is the
3202            position after that.  */
3203         tem = BUFFER_CEILING_OF (start_byte);
3204         ceiling_byte = min (tem, ceiling_byte);
3205
3206         {
3207           /* The termination address of the dumb loop.  */
3208           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3209           ptrdiff_t lim_byte = ceiling_byte + 1;
3210
3211           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3212              of the base, the cursor, and the next line.  */
3213           ptrdiff_t base = start_byte - lim_byte;
3214           ptrdiff_t cursor, next;
3215
3216           for (cursor = base; cursor < 0; cursor = next)
3217             {
3218               /* The dumb loop.  */
3219               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3220               next = nl ? nl - lim_addr : 0;
3221
3222               if (! nl)
3223                 break;
3224               next++;
3225
3226               if (--count == 0)
3227                 {
3228                   immediate_quit = 0;
3229                   if (bytepos)
3230                     *bytepos = lim_byte + next;
3231                   return BYTE_TO_CHAR (lim_byte + next);
3232                 }
3233             }
3234
3235           start_byte = lim_byte;
3236           start = BYTE_TO_CHAR (start_byte);
3237         }
3238       }
3239
3240   immediate_quit = 0;
3241   if (shortage)
3242     *shortage = count;
3243   if (bytepos)
3244     {
3245       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3246       eassert (*bytepos == CHAR_TO_BYTE (start));
3247     }
3248   return start;
3249 }
3250
3251 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3252        0, 1, 0,
3253        doc: /* Check the newline cache of BUFFER against buffer contents.
3254
3255 BUFFER defaults to the current buffer.
3256
3257 Value is an array of 2 sub-arrays of buffer positions for newlines,
3258 the first based on the cache, the second based on actually scanning
3259 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
3260   (Lisp_Object buffer)
3261 {
3262   struct buffer *buf, *old = NULL;
3263   ptrdiff_t shortage, nl_count_cache, nl_count_buf;
3264   Lisp_Object cache_newlines, buf_newlines, val;
3265   ptrdiff_t from, found, i;
3266
3267   if (NILP (buffer))
3268     buf = current_buffer;
3269   else
3270     {
3271       CHECK_BUFFER (buffer);
3272       buf = XBUFFER (buffer);
3273       old = current_buffer;
3274     }
3275   if (buf->base_buffer)
3276     buf = buf->base_buffer;
3277
3278   /* If the buffer doesn't have a newline cache, return nil.  */
3279   if (NILP (BVAR (buf, cache_long_scans))
3280       || buf->newline_cache == NULL)
3281     return Qnil;
3282
3283   /* find_newline can only work on the current buffer.  */
3284   if (old != NULL)
3285     set_buffer_internal_1 (buf);
3286
3287   /* How many newlines are there according to the cache?  */
3288   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3289                 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3290   nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3291
3292   /* Create vector and populate it.  */
3293   cache_newlines = make_uninit_vector (nl_count_cache);
3294
3295   if (nl_count_cache)
3296     {
3297       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3298         {
3299           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3300
3301           found = find_newline (from, from_byte, 0, -1, 1, &shortage,
3302                                 NULL, true);
3303           if (shortage != 0 || i >= nl_count_cache)
3304             break;
3305           ASET (cache_newlines, i, make_number (found - 1));
3306         }
3307       /* Fill the rest of slots with an invalid position.  */
3308       for ( ; i < nl_count_cache; i++)
3309         ASET (cache_newlines, i, make_number (-1));
3310     }
3311
3312   /* Now do the same, but without using the cache.  */
3313   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3314                  TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3315   nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3316   buf_newlines = make_uninit_vector (nl_count_buf);
3317   if (nl_count_buf)
3318     {
3319       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3320         {
3321           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3322
3323           found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
3324                                  NULL, true);
3325           if (shortage != 0 || i >= nl_count_buf)
3326             break;
3327           ASET (buf_newlines, i, make_number (found - 1));
3328         }
3329       for ( ; i < nl_count_buf; i++)
3330         ASET (buf_newlines, i, make_number (-1));
3331     }
3332
3333   /* Construct the value and return it.  */
3334   val = make_uninit_vector (2);
3335   ASET (val, 0, cache_newlines);
3336   ASET (val, 1, buf_newlines);
3337
3338   if (old != NULL)
3339     set_buffer_internal_1 (old);
3340   return val;
3341 }
3342 \f
3343 void
3344 syms_of_search (void)
3345 {
3346   register int i;
3347
3348   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3349     {
3350       searchbufs[i].buf.allocated = 100;
3351       searchbufs[i].buf.buffer = xmalloc (100);
3352       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3353       searchbufs[i].regexp = Qnil;
3354       searchbufs[i].whitespace_regexp = Qnil;
3355       searchbufs[i].syntax_table = Qnil;
3356       staticpro (&searchbufs[i].regexp);
3357       staticpro (&searchbufs[i].whitespace_regexp);
3358       staticpro (&searchbufs[i].syntax_table);
3359       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3360     }
3361   searchbuf_head = &searchbufs[0];
3362
3363   /* Error condition used for failing searches.  */
3364   DEFSYM (Qsearch_failed, "search-failed");
3365
3366   /* Error condition signaled when regexp compile_pattern fails.  */
3367   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3368
3369   Fput (Qsearch_failed, Qerror_conditions,
3370         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3371   Fput (Qsearch_failed, Qerror_message,
3372         build_pure_c_string ("Search failed"));
3373
3374   Fput (Qinvalid_regexp, Qerror_conditions,
3375         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3376   Fput (Qinvalid_regexp, Qerror_message,
3377         build_pure_c_string ("Invalid regexp"));
3378
3379   last_thing_searched = Qnil;
3380   staticpro (&last_thing_searched);
3381
3382   saved_last_thing_searched = Qnil;
3383   staticpro (&saved_last_thing_searched);
3384
3385   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3386       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3387 Some commands use this for user-specified regexps.
3388 Spaces that occur inside character classes or repetition operators
3389 or other such regexp constructs are not replaced with this.
3390 A value of nil (which is the normal value) means treat spaces literally.  */);
3391   Vsearch_spaces_regexp = Qnil;
3392
3393   DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
3394   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3395       doc: /* Internal use only.
3396 If non-nil, the primitive searching and matching functions
3397 such as `looking-at', `string-match', `re-search-forward', etc.,
3398 do not set the match data.  The proper way to use this variable
3399 is to bind it with `let' around a small expression.  */);
3400   Vinhibit_changing_match_data = Qnil;
3401
3402   defsubr (&Slooking_at);
3403   defsubr (&Sposix_looking_at);
3404   defsubr (&Sstring_match);
3405   defsubr (&Sposix_string_match);
3406   defsubr (&Ssearch_forward);
3407   defsubr (&Ssearch_backward);
3408   defsubr (&Sre_search_forward);
3409   defsubr (&Sre_search_backward);
3410   defsubr (&Sposix_search_forward);
3411   defsubr (&Sposix_search_backward);
3412   defsubr (&Sreplace_match);
3413   defsubr (&Smatch_beginning);
3414   defsubr (&Smatch_end);
3415   defsubr (&Smatch_data);
3416   defsubr (&Sset_match_data);
3417   defsubr (&Sregexp_quote);
3418   defsubr (&Snewline_cache_check);
3419 }