src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs (EMACS_INT, EMACS_INT);
 102 static void save_search_regs (void);
 103 static EMACS_INT simple_search (EMACS_INT, unsigned char *, EMACS_INT,
 104                                 EMACS_INT, Lisp_Object, EMACS_INT, EMACS_INT,
 105                                 EMACS_INT, EMACS_INT);
 106 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, EMACS_INT, EMACS_INT,
 107                               Lisp_Object, Lisp_Object,
 108                               EMACS_INT, EMACS_INT,
 109                               EMACS_INT, EMACS_INT, int);
 110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
 111                                 EMACS_INT, EMACS_INT, EMACS_INT, int,
 112                                 Lisp_Object, Lisp_Object, int);
 113 static void matcher_overflow (void) NO_RETURN;
 114
 115 static void
 116 matcher_overflow (void)
 117 {
 118   error ("Stack overflow in regexp matcher");
 119 }
 120
 121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 122    PATTERN is the pattern to compile.
 123    CP is the place to put the result.
 124    TRANSLATE is a translation table for ignoring case, or nil for none.
 125    REGP is the structure that says where to store the "register"
 126    values that will result from matching this pattern.
 127    If it is 0, we should compile the pattern not to record any
 128    subexpression bounds.
 129    POSIX is nonzero if we want full backtracking (POSIX style)
 130    for this pattern.  0 means backtrack only enough to get a valid match.
 131
 132    The behavior also depends on Vsearch_spaces_regexp.  */
 133
 134 static void
 135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
 136 {
 137   char *val;
 138   reg_syntax_t old;
 139
 140   cp->regexp = Qnil;
 141   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 142   cp->posix = posix;
 143   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 144   cp->buf.charset_unibyte = charset_unibyte;
 145   if (STRINGP (Vsearch_spaces_regexp))
 146     cp->whitespace_regexp = Vsearch_spaces_regexp;
 147   else
 148     cp->whitespace_regexp = Qnil;
 149
 150   /* rms: I think BLOCK_INPUT is not needed here any more,
 151      because regex.c defines malloc to call xmalloc.
 152      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 153      So let's turn it off.  */
 154   /*  BLOCK_INPUT;  */
 155   old = re_set_syntax (RE_SYNTAX_EMACS
 156                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 157
 158   if (STRINGP (Vsearch_spaces_regexp))
 159     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 160   else
 161     re_set_whitespace_regexp (NULL);
 162
 163   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 164                                      SBYTES (pattern), &cp->buf);
 165
 166   /* If the compiled pattern hard codes some of the contents of the
 167      syntax-table, it can only be reused with *this* syntax table.  */
 168   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 169
 170   re_set_whitespace_regexp (NULL);
 171
 172   re_set_syntax (old);
 173   /* UNBLOCK_INPUT;  */
 174   if (val)
 175     xsignal1 (Qinvalid_regexp, build_string (val));
 176
 177   cp->regexp = Fcopy_sequence (pattern);
 178 }
 179
 180 /* Shrink each compiled regexp buffer in the cache
 181    to the size actually used right now.
 182    This is called from garbage collection.  */
 183
 184 void
 185 shrink_regexp_cache (void)
 186 {
 187   struct regexp_cache *cp;
 188
 189   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 190     {
 191       cp->buf.allocated = cp->buf.used;
 192       cp->buf.buffer
 193         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 194     }
 195 }
 196
 197 /* Clear the regexp cache w.r.t. a particular syntax table,
 198    because it was changed.
 199    There is no danger of memory leak here because re_compile_pattern
 200    automagically manages the memory in each re_pattern_buffer struct,
 201    based on its `allocated' and `buffer' values.  */
 202 void
 203 clear_regexp_cache (void)
 204 {
 205   int i;
 206
 207   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 208     /* It's tempting to compare with the syntax-table we've actually changed,
 209        but it's not sufficient because char-table inheritance means that
 210        modifying one syntax-table can change others at the same time.  */
 211     if (!EQ (searchbufs[i].syntax_table, Qt))
 212       searchbufs[i].regexp = Qnil;
 213 }
 214
 215 /* Compile a regexp if necessary, but first check to see if there's one in
 216    the cache.
 217    PATTERN is the pattern to compile.
 218    TRANSLATE is a translation table for ignoring case, or nil for none.
 219    REGP is the structure that says where to store the "register"
 220    values that will result from matching this pattern.
 221    If it is 0, we should compile the pattern not to record any
 222    subexpression bounds.
 223    POSIX is nonzero if we want full backtracking (POSIX style)
 224    for this pattern.  0 means backtrack only enough to get a valid match.  */
 225
 226 struct re_pattern_buffer *
 227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 228 {
 229   struct regexp_cache *cp, **cpp;
 230
 231   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 232     {
 233       cp = *cpp;
 234       /* Entries are initialized to nil, and may be set to nil by
 235          compile_pattern_1 if the pattern isn't valid.  Don't apply
 236          string accessors in those cases.  However, compile_pattern_1
 237          is only applied to the cache entry we pick here to reuse.  So
 238          nil should never appear before a non-nil entry.  */
 239       if (NILP (cp->regexp))
 240         goto compile_it;
 241       if (SCHARS (cp->regexp) == SCHARS (pattern)
 242           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 243           && !NILP (Fstring_equal (cp->regexp, pattern))
 244           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 245           && cp->posix == posix
 246           && (EQ (cp->syntax_table, Qt)
 247               || EQ (cp->syntax_table, current_buffer->syntax_table))
 248           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 249           && cp->buf.charset_unibyte == charset_unibyte)
 250         break;
 251
 252       /* If we're at the end of the cache, compile into the nil cell
 253          we found, or the last (least recently used) cell with a
 254          string value.  */
 255       if (cp->next == 0)
 256         {
 257         compile_it:
 258           compile_pattern_1 (cp, pattern, translate, regp, posix);
 259           break;
 260         }
 261     }
 262
 263   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 264      either because we found it in the cache or because we just compiled it.
 265      Move it to the front of the queue to mark it as most recently used.  */
 266   *cpp = cp->next;
 267   cp->next = searchbuf_head;
 268   searchbuf_head = cp;
 269
 270   /* Advise the searching functions about the space we have allocated
 271      for register data.  */
 272   if (regp)
 273     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 274
 275   /* The compiled pattern can be used both for multibyte and unibyte
 276      target.  But, we have to tell which the pattern is used for. */
 277   cp->buf.target_multibyte = multibyte;
 278
 279   return &cp->buf;
 280 }
 281
 282 \f
 283 static Lisp_Object
 284 looking_at_1 (Lisp_Object string, int posix)
 285 {
 286   Lisp_Object val;
 287   unsigned char *p1, *p2;
 288   EMACS_INT s1, s2;
 289   register EMACS_INT i;
 290   struct re_pattern_buffer *bufp;
 291
 292   if (running_asynch_code)
 293     save_search_regs ();
 294
 295   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 296   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 297     = current_buffer->case_eqv_table;
 298
 299   CHECK_STRING (string);
 300   bufp = compile_pattern (string,
 301                           (NILP (Vinhibit_changing_match_data)
 302                            ? &search_regs : NULL),
 303                           (!NILP (current_buffer->case_fold_search)
 304                            ? current_buffer->case_canon_table : Qnil),
 305                           posix,
 306                           !NILP (current_buffer->enable_multibyte_characters));
 307
 308   immediate_quit = 1;
 309   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 310
 311   /* Get pointers and sizes of the two strings
 312      that make up the visible portion of the buffer. */
 313
 314   p1 = BEGV_ADDR;
 315   s1 = GPT_BYTE - BEGV_BYTE;
 316   p2 = GAP_END_ADDR;
 317   s2 = ZV_BYTE - GPT_BYTE;
 318   if (s1 < 0)
 319     {
 320       p2 = p1;
 321       s2 = ZV_BYTE - BEGV_BYTE;
 322       s1 = 0;
 323     }
 324   if (s2 < 0)
 325     {
 326       s1 = ZV_BYTE - BEGV_BYTE;
 327       s2 = 0;
 328     }
 329
 330   re_match_object = Qnil;
 331
 332   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 333                   PT_BYTE - BEGV_BYTE,
 334                   (NILP (Vinhibit_changing_match_data)
 335                    ? &search_regs : NULL),
 336                   ZV_BYTE - BEGV_BYTE);
 337   immediate_quit = 0;
 338
 339   if (i == -2)
 340     matcher_overflow ();
 341
 342   val = (0 <= i ? Qt : Qnil);
 343   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 344     for (i = 0; i < search_regs.num_regs; i++)
 345       if (search_regs.start[i] >= 0)
 346         {
 347           search_regs.start[i]
 348             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 349           search_regs.end[i]
 350             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 351         }
 352
 353   /* Set last_thing_searched only when match data is changed.  */
 354   if (NILP (Vinhibit_changing_match_data))
 355     XSETBUFFER (last_thing_searched, current_buffer);
 356
 357   return val;
 358 }
 359
 360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 361        doc: /* Return t if text after point matches regular expression REGEXP.
 362 This function modifies the match data that `match-beginning',
 363 `match-end' and `match-data' access; save and restore the match
 364 data if you want to preserve them.  */)
 365   (Lisp_Object regexp)
 366 {
 367   return looking_at_1 (regexp, 0);
 368 }
 369
 370 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 371        doc: /* Return t if text after point matches regular expression REGEXP.
 372 Find the longest match, in accord with Posix regular expression rules.
 373 This function modifies the match data that `match-beginning',
 374 `match-end' and `match-data' access; save and restore the match
 375 data if you want to preserve them.  */)
 376   (Lisp_Object regexp)
 377 {
 378   return looking_at_1 (regexp, 1);
 379 }
 380 \f
 381 static Lisp_Object
 382 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 383 {
 384   int val;
 385   struct re_pattern_buffer *bufp;
 386   EMACS_INT pos, pos_byte;
 387   int i;
 388
 389   if (running_asynch_code)
 390     save_search_regs ();
 391
 392   CHECK_STRING (regexp);
 393   CHECK_STRING (string);
 394
 395   if (NILP (start))
 396     pos = 0, pos_byte = 0;
 397   else
 398     {
 399       EMACS_INT len = SCHARS (string);
 400
 401       CHECK_NUMBER (start);
 402       pos = XINT (start);
 403       if (pos < 0 && -pos <= len)
 404         pos = len + pos;
 405       else if (0 > pos || pos > len)
 406         args_out_of_range (string, start);
 407       pos_byte = string_char_to_byte (string, pos);
 408     }
 409
 410   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 411   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 412     = current_buffer->case_eqv_table;
 413
 414   bufp = compile_pattern (regexp,
 415                           (NILP (Vinhibit_changing_match_data)
 416                            ? &search_regs : NULL),
 417                           (!NILP (current_buffer->case_fold_search)
 418                            ? current_buffer->case_canon_table : Qnil),
 419                           posix,
 420                           STRING_MULTIBYTE (string));
 421   immediate_quit = 1;
 422   re_match_object = string;
 423
 424   val = re_search (bufp, (char *) SDATA (string),
 425                    SBYTES (string), pos_byte,
 426                    SBYTES (string) - pos_byte,
 427                    (NILP (Vinhibit_changing_match_data)
 428                     ? &search_regs : NULL));
 429   immediate_quit = 0;
 430
 431   /* Set last_thing_searched only when match data is changed.  */
 432   if (NILP (Vinhibit_changing_match_data))
 433     last_thing_searched = Qt;
 434
 435   if (val == -2)
 436     matcher_overflow ();
 437   if (val < 0) return Qnil;
 438
 439   if (NILP (Vinhibit_changing_match_data))
 440     for (i = 0; i < search_regs.num_regs; i++)
 441       if (search_regs.start[i] >= 0)
 442         {
 443           search_regs.start[i]
 444             = string_byte_to_char (string, search_regs.start[i]);
 445           search_regs.end[i]
 446             = string_byte_to_char (string, search_regs.end[i]);
 447         }
 448
 449   return make_number (string_byte_to_char (string, val));
 450 }
 451
 452 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 453        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 454 Matching ignores case if `case-fold-search' is non-nil.
 455 If third arg START is non-nil, start search at that index in STRING.
 456 For index of first char beyond the match, do (match-end 0).
 457 `match-end' and `match-beginning' also give indices of substrings
 458 matched by parenthesis constructs in the pattern.
 459
 460 You can use the function `match-string' to extract the substrings
 461 matched by the parenthesis constructions in REGEXP. */)
 462   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 463 {
 464   return string_match_1 (regexp, string, start, 0);
 465 }
 466
 467 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 468        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 469 Find the longest match, in accord with Posix regular expression rules.
 470 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 471 If third arg START is non-nil, start search at that index in STRING.
 472 For index of first char beyond the match, do (match-end 0).
 473 `match-end' and `match-beginning' also give indices of substrings
 474 matched by parenthesis constructs in the pattern.  */)
 475   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 476 {
 477   return string_match_1 (regexp, string, start, 1);
 478 }
 479
 480 /* Match REGEXP against STRING, searching all of STRING,
 481    and return the index of the match, or negative on failure.
 482    This does not clobber the match data.  */
 483
 484 int
 485 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 486 {
 487   int val;
 488   struct re_pattern_buffer *bufp;
 489
 490   bufp = compile_pattern (regexp, 0, Qnil,
 491                           0, STRING_MULTIBYTE (string));
 492   immediate_quit = 1;
 493   re_match_object = string;
 494
 495   val = re_search (bufp, (char *) SDATA (string),
 496                    SBYTES (string), 0,
 497                    SBYTES (string), 0);
 498   immediate_quit = 0;
 499   return val;
 500 }
 501
 502 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 503    and return the index of the match, or negative on failure.
 504    This does not clobber the match data.
 505    We assume that STRING contains single-byte characters.  */
 506
 507 int
 508 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
 509 {
 510   int val;
 511   struct re_pattern_buffer *bufp;
 512   size_t len = strlen (string);
 513
 514   regexp = string_make_unibyte (regexp);
 515   re_match_object = Qt;
 516   bufp = compile_pattern (regexp, 0,
 517                           Vascii_canon_table, 0,
 518                           0);
 519   immediate_quit = 1;
 520   val = re_search (bufp, string, len, 0, len, 0);
 521   immediate_quit = 0;
 522   return val;
 523 }
 524
 525 /* Like fast_string_match but ignore case.  */
 526
 527 int
 528 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 529 {
 530   int val;
 531   struct re_pattern_buffer *bufp;
 532
 533   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 534                           0, STRING_MULTIBYTE (string));
 535   immediate_quit = 1;
 536   re_match_object = string;
 537
 538   val = re_search (bufp, (char *) SDATA (string),
 539                    SBYTES (string), 0,
 540                    SBYTES (string), 0);
 541   immediate_quit = 0;
 542   return val;
 543 }
 544 \f
 545 /* Match REGEXP against the characters after POS to LIMIT, and return
 546    the number of matched characters.  If STRING is non-nil, match
 547    against the characters in it.  In that case, POS and LIMIT are
 548    indices into the string.  This function doesn't modify the match
 549    data.  */
 550
 551 EMACS_INT
 552 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
 553 {
 554   int multibyte;
 555   struct re_pattern_buffer *buf;
 556   unsigned char *p1, *p2;
 557   EMACS_INT s1, s2;
 558   EMACS_INT len;
 559
 560   if (STRINGP (string))
 561     {
 562       if (pos_byte < 0)
 563         pos_byte = string_char_to_byte (string, pos);
 564       if (limit_byte < 0)
 565         limit_byte = string_char_to_byte (string, limit);
 566       p1 = NULL;
 567       s1 = 0;
 568       p2 = SDATA (string);
 569       s2 = SBYTES (string);
 570       re_match_object = string;
 571       multibyte = STRING_MULTIBYTE (string);
 572     }
 573   else
 574     {
 575       if (pos_byte < 0)
 576         pos_byte = CHAR_TO_BYTE (pos);
 577       if (limit_byte < 0)
 578         limit_byte = CHAR_TO_BYTE (limit);
 579       pos_byte -= BEGV_BYTE;
 580       limit_byte -= BEGV_BYTE;
 581       p1 = BEGV_ADDR;
 582       s1 = GPT_BYTE - BEGV_BYTE;
 583       p2 = GAP_END_ADDR;
 584       s2 = ZV_BYTE - GPT_BYTE;
 585       if (s1 < 0)
 586         {
 587           p2 = p1;
 588           s2 = ZV_BYTE - BEGV_BYTE;
 589           s1 = 0;
 590         }
 591       if (s2 < 0)
 592         {
 593           s1 = ZV_BYTE - BEGV_BYTE;
 594           s2 = 0;
 595         }
 596       re_match_object = Qnil;
 597       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 598     }
 599
 600   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 601   immediate_quit = 1;
 602   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 603                     pos_byte, NULL, limit_byte);
 604   immediate_quit = 0;
 605
 606   return len;
 607 }
 608
 609 \f
 610 /* The newline cache: remembering which sections of text have no newlines.  */
 611
 612 /* If the user has requested newline caching, make sure it's on.
 613    Otherwise, make sure it's off.
 614    This is our cheezy way of associating an action with the change of
 615    state of a buffer-local variable.  */
 616 static void
 617 newline_cache_on_off (struct buffer *buf)
 618 {
 619   if (NILP (buf->cache_long_line_scans))
 620     {
 621       /* It should be off.  */
 622       if (buf->newline_cache)
 623         {
 624           free_region_cache (buf->newline_cache);
 625           buf->newline_cache = 0;
 626         }
 627     }
 628   else
 629     {
 630       /* It should be on.  */
 631       if (buf->newline_cache == 0)
 632         buf->newline_cache = new_region_cache ();
 633     }
 634 }
 635
 636 \f
 637 /* Search for COUNT instances of the character TARGET between START and END.
 638
 639    If COUNT is positive, search forwards; END must be >= START.
 640    If COUNT is negative, search backwards for the -COUNTth instance;
 641       END must be <= START.
 642    If COUNT is zero, do anything you please; run rogue, for all I care.
 643
 644    If END is zero, use BEGV or ZV instead, as appropriate for the
 645    direction indicated by COUNT.
 646
 647    If we find COUNT instances, set *SHORTAGE to zero, and return the
 648    position past the COUNTth match.  Note that for reverse motion
 649    this is not the same as the usual convention for Emacs motion commands.
 650
 651    If we don't find COUNT instances before reaching END, set *SHORTAGE
 652    to the number of TARGETs left unfound, and return END.
 653
 654    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 655    except when inside redisplay.  */
 656
 657 EMACS_INT
 658 scan_buffer (register int target, EMACS_INT start, EMACS_INT end,
 659              EMACS_INT count, int *shortage, int allow_quit)
 660 {
 661   struct region_cache *newline_cache;
 662   int direction;
 663
 664   if (count > 0)
 665     {
 666       direction = 1;
 667       if (! end) end = ZV;
 668     }
 669   else
 670     {
 671       direction = -1;
 672       if (! end) end = BEGV;
 673     }
 674
 675   newline_cache_on_off (current_buffer);
 676   newline_cache = current_buffer->newline_cache;
 677
 678   if (shortage != 0)
 679     *shortage = 0;
 680
 681   immediate_quit = allow_quit;
 682
 683   if (count > 0)
 684     while (start != end)
 685       {
 686         /* Our innermost scanning loop is very simple; it doesn't know
 687            about gaps, buffer ends, or the newline cache.  ceiling is
 688            the position of the last character before the next such
 689            obstacle --- the last character the dumb search loop should
 690            examine.  */
 691         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 692         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 693         EMACS_INT tem;
 694
 695         /* If we're looking for a newline, consult the newline cache
 696            to see where we can avoid some scanning.  */
 697         if (target == '\n' && newline_cache)
 698           {
 699             EMACS_INT next_change;
 700             immediate_quit = 0;
 701             while (region_cache_forward
 702                    (current_buffer, newline_cache, start_byte, &next_change))
 703               start_byte = next_change;
 704             immediate_quit = allow_quit;
 705
 706             /* START should never be after END.  */
 707             if (start_byte > ceiling_byte)
 708               start_byte = ceiling_byte;
 709
 710             /* Now the text after start is an unknown region, and
 711                next_change is the position of the next known region. */
 712             ceiling_byte = min (next_change - 1, ceiling_byte);
 713           }
 714
 715         /* The dumb loop can only scan text stored in contiguous
 716            bytes. BUFFER_CEILING_OF returns the last character
 717            position that is contiguous, so the ceiling is the
 718            position after that.  */
 719         tem = BUFFER_CEILING_OF (start_byte);
 720         ceiling_byte = min (tem, ceiling_byte);
 721
 722         {
 723           /* The termination address of the dumb loop.  */
 724           register unsigned char *ceiling_addr
 725             = BYTE_POS_ADDR (ceiling_byte) + 1;
 726           register unsigned char *cursor
 727             = BYTE_POS_ADDR (start_byte);
 728           unsigned char *base = cursor;
 729
 730           while (cursor < ceiling_addr)
 731             {
 732               unsigned char *scan_start = cursor;
 733
 734               /* The dumb loop.  */
 735               while (*cursor != target && ++cursor < ceiling_addr)
 736                 ;
 737
 738               /* If we're looking for newlines, cache the fact that
 739                  the region from start to cursor is free of them. */
 740               if (target == '\n' && newline_cache)
 741                 know_region_cache (current_buffer, newline_cache,
 742                                    start_byte + scan_start - base,
 743                                    start_byte + cursor - base);
 744
 745               /* Did we find the target character?  */
 746               if (cursor < ceiling_addr)
 747                 {
 748                   if (--count == 0)
 749                     {
 750                       immediate_quit = 0;
 751                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 752                     }
 753                   cursor++;
 754                 }
 755             }
 756
 757           start = BYTE_TO_CHAR (start_byte + cursor - base);
 758         }
 759       }
 760   else
 761     while (start > end)
 762       {
 763         /* The last character to check before the next obstacle.  */
 764         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 765         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 766         EMACS_INT tem;
 767
 768         /* Consult the newline cache, if appropriate.  */
 769         if (target == '\n' && newline_cache)
 770           {
 771             EMACS_INT next_change;
 772             immediate_quit = 0;
 773             while (region_cache_backward
 774                    (current_buffer, newline_cache, start_byte, &next_change))
 775               start_byte = next_change;
 776             immediate_quit = allow_quit;
 777
 778             /* Start should never be at or before end.  */
 779             if (start_byte <= ceiling_byte)
 780               start_byte = ceiling_byte + 1;
 781
 782             /* Now the text before start is an unknown region, and
 783                next_change is the position of the next known region. */
 784             ceiling_byte = max (next_change, ceiling_byte);
 785           }
 786
 787         /* Stop scanning before the gap.  */
 788         tem = BUFFER_FLOOR_OF (start_byte - 1);
 789         ceiling_byte = max (tem, ceiling_byte);
 790
 791         {
 792           /* The termination address of the dumb loop.  */
 793           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 794           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 795           unsigned char *base = cursor;
 796
 797           while (cursor >= ceiling_addr)
 798             {
 799               unsigned char *scan_start = cursor;
 800
 801               while (*cursor != target && --cursor >= ceiling_addr)
 802                 ;
 803
 804               /* If we're looking for newlines, cache the fact that
 805                  the region from after the cursor to start is free of them.  */
 806               if (target == '\n' && newline_cache)
 807                 know_region_cache (current_buffer, newline_cache,
 808                                    start_byte + cursor - base,
 809                                    start_byte + scan_start - base);
 810
 811               /* Did we find the target character?  */
 812               if (cursor >= ceiling_addr)
 813                 {
 814                   if (++count >= 0)
 815                     {
 816                       immediate_quit = 0;
 817                       return BYTE_TO_CHAR (start_byte + cursor - base);
 818                     }
 819                   cursor--;
 820                 }
 821             }
 822
 823           start = BYTE_TO_CHAR (start_byte + cursor - base);
 824         }
 825       }
 826
 827   immediate_quit = 0;
 828   if (shortage != 0)
 829     *shortage = count * direction;
 830   return start;
 831 }
 832 \f
 833 /* Search for COUNT instances of a line boundary, which means either a
 834    newline or (if selective display enabled) a carriage return.
 835    Start at START.  If COUNT is negative, search backwards.
 836
 837    We report the resulting position by calling TEMP_SET_PT_BOTH.
 838
 839    If we find COUNT instances. we position after (always after,
 840    even if scanning backwards) the COUNTth match, and return 0.
 841
 842    If we don't find COUNT instances before reaching the end of the
 843    buffer (or the beginning, if scanning backwards), we return
 844    the number of line boundaries left unfound, and position at
 845    the limit we bumped up against.
 846
 847    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 848    except in special cases.  */
 849
 850 EMACS_INT
 851 scan_newline (EMACS_INT start, EMACS_INT start_byte,
 852               EMACS_INT limit, EMACS_INT limit_byte,
 853               register EMACS_INT count, int allow_quit)
 854 {
 855   int direction = ((count > 0) ? 1 : -1);
 856
 857   register unsigned char *cursor;
 858   unsigned char *base;
 859
 860   EMACS_INT ceiling;
 861   register unsigned char *ceiling_addr;
 862
 863   int old_immediate_quit = immediate_quit;
 864
 865   /* The code that follows is like scan_buffer
 866      but checks for either newline or carriage return.  */
 867
 868   if (allow_quit)
 869     immediate_quit++;
 870
 871   start_byte = CHAR_TO_BYTE (start);
 872
 873   if (count > 0)
 874     {
 875       while (start_byte < limit_byte)
 876         {
 877           ceiling =  BUFFER_CEILING_OF (start_byte);
 878           ceiling = min (limit_byte - 1, ceiling);
 879           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 880           base = (cursor = BYTE_POS_ADDR (start_byte));
 881           while (1)
 882             {
 883               while (*cursor != '\n' && ++cursor != ceiling_addr)
 884                 ;
 885
 886               if (cursor != ceiling_addr)
 887                 {
 888                   if (--count == 0)
 889                     {
 890                       immediate_quit = old_immediate_quit;
 891                       start_byte = start_byte + cursor - base + 1;
 892                       start = BYTE_TO_CHAR (start_byte);
 893                       TEMP_SET_PT_BOTH (start, start_byte);
 894                       return 0;
 895                     }
 896                   else
 897                     if (++cursor == ceiling_addr)
 898                       break;
 899                 }
 900               else
 901                 break;
 902             }
 903           start_byte += cursor - base;
 904         }
 905     }
 906   else
 907     {
 908       while (start_byte > limit_byte)
 909         {
 910           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 911           ceiling = max (limit_byte, ceiling);
 912           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 913           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 914           while (1)
 915             {
 916               while (--cursor != ceiling_addr && *cursor != '\n')
 917                 ;
 918
 919               if (cursor != ceiling_addr)
 920                 {
 921                   if (++count == 0)
 922                     {
 923                       immediate_quit = old_immediate_quit;
 924                       /* Return the position AFTER the match we found.  */
 925                       start_byte = start_byte + cursor - base + 1;
 926                       start = BYTE_TO_CHAR (start_byte);
 927                       TEMP_SET_PT_BOTH (start, start_byte);
 928                       return 0;
 929                     }
 930                 }
 931               else
 932                 break;
 933             }
 934           /* Here we add 1 to compensate for the last decrement
 935              of CURSOR, which took it past the valid range.  */
 936           start_byte += cursor - base + 1;
 937         }
 938     }
 939
 940   TEMP_SET_PT_BOTH (limit, limit_byte);
 941   immediate_quit = old_immediate_quit;
 942
 943   return count * direction;
 944 }
 945
 946 EMACS_INT
 947 find_next_newline_no_quit (EMACS_INT from, EMACS_INT cnt)
 948 {
 949   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 950 }
 951
 952 /* Like find_next_newline, but returns position before the newline,
 953    not after, and only search up to TO.  This isn't just
 954    find_next_newline (...)-1, because you might hit TO.  */
 955
 956 EMACS_INT
 957 find_before_next_newline (EMACS_INT from, EMACS_INT to, EMACS_INT cnt)
 958 {
 959   int shortage;
 960   EMACS_INT pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 961
 962   if (shortage == 0)
 963     pos--;
 964
 965   return pos;
 966 }
 967 \f
 968 /* Subroutines of Lisp buffer search functions. */
 969
 970 static Lisp_Object
 971 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
 972                 Lisp_Object count, int direction, int RE, int posix)
 973 {
 974   register int np;
 975   EMACS_INT lim, lim_byte;
 976   int n = direction;
 977
 978   if (!NILP (count))
 979     {
 980       CHECK_NUMBER (count);
 981       n *= XINT (count);
 982     }
 983
 984   CHECK_STRING (string);
 985   if (NILP (bound))
 986     {
 987       if (n > 0)
 988         lim = ZV, lim_byte = ZV_BYTE;
 989       else
 990         lim = BEGV, lim_byte = BEGV_BYTE;
 991     }
 992   else
 993     {
 994       CHECK_NUMBER_COERCE_MARKER (bound);
 995       lim = XINT (bound);
 996       if (n > 0 ? lim < PT : lim > PT)
 997         error ("Invalid search bound (wrong side of point)");
 998       if (lim > ZV)
 999         lim = ZV, lim_byte = ZV_BYTE;
1000       else if (lim < BEGV)
1001         lim = BEGV, lim_byte = BEGV_BYTE;
1002       else
1003         lim_byte = CHAR_TO_BYTE (lim);
1004     }
1005
1006   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1007   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1008     = current_buffer->case_eqv_table;
1009
1010   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1011                       (!NILP (current_buffer->case_fold_search)
1012                        ? current_buffer->case_canon_table
1013                        : Qnil),
1014                       (!NILP (current_buffer->case_fold_search)
1015                        ? current_buffer->case_eqv_table
1016                        : Qnil),
1017                       posix);
1018   if (np <= 0)
1019     {
1020       if (NILP (noerror))
1021         xsignal1 (Qsearch_failed, string);
1022
1023       if (!EQ (noerror, Qt))
1024         {
1025           if (lim < BEGV || lim > ZV)
1026             abort ();
1027           SET_PT_BOTH (lim, lim_byte);
1028           return Qnil;
1029 #if 0 /* This would be clean, but maybe programs depend on
1030          a value of nil here.  */
1031           np = lim;
1032 #endif
1033         }
1034       else
1035         return Qnil;
1036     }
1037
1038   if (np < BEGV || np > ZV)
1039     abort ();
1040
1041   SET_PT (np);
1042
1043   return make_number (np);
1044 }
1045 \f
1046 /* Return 1 if REGEXP it matches just one constant string.  */
1047
1048 static int
1049 trivial_regexp_p (Lisp_Object regexp)
1050 {
1051   EMACS_INT len = SBYTES (regexp);
1052   unsigned char *s = SDATA (regexp);
1053   while (--len >= 0)
1054     {
1055       switch (*s++)
1056         {
1057         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1058           return 0;
1059         case '\\':
1060           if (--len < 0)
1061             return 0;
1062           switch (*s++)
1063             {
1064             case '|': case '(': case ')': case '`': case '\'': case 'b':
1065             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1066             case 'S': case '=': case '{': case '}': case '_':
1067             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1068             case '1': case '2': case '3': case '4': case '5':
1069             case '6': case '7': case '8': case '9':
1070               return 0;
1071             }
1072         }
1073     }
1074   return 1;
1075 }
1076
1077 /* Search for the n'th occurrence of STRING in the current buffer,
1078    starting at position POS and stopping at position LIM,
1079    treating STRING as a literal string if RE is false or as
1080    a regular expression if RE is true.
1081
1082    If N is positive, searching is forward and LIM must be greater than POS.
1083    If N is negative, searching is backward and LIM must be less than POS.
1084
1085    Returns -x if x occurrences remain to be found (x > 0),
1086    or else the position at the beginning of the Nth occurrence
1087    (if searching backward) or the end (if searching forward).
1088
1089    POSIX is nonzero if we want full backtracking (POSIX style)
1090    for this pattern.  0 means backtrack only enough to get a valid match.  */
1091
1092 #define TRANSLATE(out, trt, d)                  \
1093 do                                              \
1094   {                                             \
1095     if (! NILP (trt))                           \
1096       {                                         \
1097         Lisp_Object temp;                       \
1098         temp = Faref (trt, make_number (d));    \
1099         if (INTEGERP (temp))                    \
1100           out = XINT (temp);                    \
1101         else                                    \
1102           out = d;                              \
1103       }                                         \
1104     else                                        \
1105       out = d;                                  \
1106   }                                             \
1107 while (0)
1108
1109 /* Only used in search_buffer, to record the end position of the match
1110    when searching regexps and SEARCH_REGS should not be changed
1111    (i.e. Vinhibit_changing_match_data is non-nil).  */
1112 static struct re_registers search_regs_1;
1113
1114 static EMACS_INT
1115 search_buffer (Lisp_Object string, EMACS_INT pos, EMACS_INT pos_byte,
1116                EMACS_INT lim, EMACS_INT lim_byte, EMACS_INT n,
1117                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1118 {
1119   EMACS_INT len = SCHARS (string);
1120   EMACS_INT len_byte = SBYTES (string);
1121   register int i;
1122
1123   if (running_asynch_code)
1124     save_search_regs ();
1125
1126   /* Searching 0 times means don't move.  */
1127   /* Null string is found at starting position.  */
1128   if (len == 0 || n == 0)
1129     {
1130       set_search_regs (pos_byte, 0);
1131       return pos;
1132     }
1133
1134   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1135     {
1136       unsigned char *p1, *p2;
1137       EMACS_INT s1, s2;
1138       struct re_pattern_buffer *bufp;
1139
1140       bufp = compile_pattern (string,
1141                               (NILP (Vinhibit_changing_match_data)
1142                                ? &search_regs : &search_regs_1),
1143                               trt, posix,
1144                               !NILP (current_buffer->enable_multibyte_characters));
1145
1146       immediate_quit = 1;       /* Quit immediately if user types ^G,
1147                                    because letting this function finish
1148                                    can take too long. */
1149       QUIT;                     /* Do a pending quit right away,
1150                                    to avoid paradoxical behavior */
1151       /* Get pointers and sizes of the two strings
1152          that make up the visible portion of the buffer. */
1153
1154       p1 = BEGV_ADDR;
1155       s1 = GPT_BYTE - BEGV_BYTE;
1156       p2 = GAP_END_ADDR;
1157       s2 = ZV_BYTE - GPT_BYTE;
1158       if (s1 < 0)
1159         {
1160           p2 = p1;
1161           s2 = ZV_BYTE - BEGV_BYTE;
1162           s1 = 0;
1163         }
1164       if (s2 < 0)
1165         {
1166           s1 = ZV_BYTE - BEGV_BYTE;
1167           s2 = 0;
1168         }
1169       re_match_object = Qnil;
1170
1171       while (n < 0)
1172         {
1173           EMACS_INT val;
1174           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1175                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1176                              (NILP (Vinhibit_changing_match_data)
1177                               ? &search_regs : &search_regs_1),
1178                              /* Don't allow match past current point */
1179                              pos_byte - BEGV_BYTE);
1180           if (val == -2)
1181             {
1182               matcher_overflow ();
1183             }
1184           if (val >= 0)
1185             {
1186               if (NILP (Vinhibit_changing_match_data))
1187                 {
1188                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1189                   for (i = 0; i < search_regs.num_regs; i++)
1190                     if (search_regs.start[i] >= 0)
1191                       {
1192                         search_regs.start[i]
1193                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1194                         search_regs.end[i]
1195                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1196                       }
1197                   XSETBUFFER (last_thing_searched, current_buffer);
1198                   /* Set pos to the new position. */
1199                   pos = search_regs.start[0];
1200                 }
1201               else
1202                 {
1203                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1204                   /* Set pos to the new position.  */
1205                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1206                 }
1207             }
1208           else
1209             {
1210               immediate_quit = 0;
1211               return (n);
1212             }
1213           n++;
1214         }
1215       while (n > 0)
1216         {
1217           EMACS_INT val;
1218           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1219                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1220                              (NILP (Vinhibit_changing_match_data)
1221                               ? &search_regs : &search_regs_1),
1222                              lim_byte - BEGV_BYTE);
1223           if (val == -2)
1224             {
1225               matcher_overflow ();
1226             }
1227           if (val >= 0)
1228             {
1229               if (NILP (Vinhibit_changing_match_data))
1230                 {
1231                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1232                   for (i = 0; i < search_regs.num_regs; i++)
1233                     if (search_regs.start[i] >= 0)
1234                       {
1235                         search_regs.start[i]
1236                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1237                         search_regs.end[i]
1238                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1239                       }
1240                   XSETBUFFER (last_thing_searched, current_buffer);
1241                   pos = search_regs.end[0];
1242                 }
1243               else
1244                 {
1245                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1246                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1247                 }
1248             }
1249           else
1250             {
1251               immediate_quit = 0;
1252               return (0 - n);
1253             }
1254           n--;
1255         }
1256       immediate_quit = 0;
1257       return (pos);
1258     }
1259   else                          /* non-RE case */
1260     {
1261       unsigned char *raw_pattern, *pat;
1262       EMACS_INT raw_pattern_size;
1263       EMACS_INT raw_pattern_size_byte;
1264       unsigned char *patbuf;
1265       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1266       unsigned char *base_pat;
1267       /* Set to positive if we find a non-ASCII char that need
1268          translation.  Otherwise set to zero later.  */
1269       int char_base = -1;
1270       int boyer_moore_ok = 1;
1271
1272       /* MULTIBYTE says whether the text to be searched is multibyte.
1273          We must convert PATTERN to match that, or we will not really
1274          find things right.  */
1275
1276       if (multibyte == STRING_MULTIBYTE (string))
1277         {
1278           raw_pattern = (unsigned char *) SDATA (string);
1279           raw_pattern_size = SCHARS (string);
1280           raw_pattern_size_byte = SBYTES (string);
1281         }
1282       else if (multibyte)
1283         {
1284           raw_pattern_size = SCHARS (string);
1285           raw_pattern_size_byte
1286             = count_size_as_multibyte (SDATA (string),
1287                                        raw_pattern_size);
1288           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1289           copy_text (SDATA (string), raw_pattern,
1290                      SCHARS (string), 0, 1);
1291         }
1292       else
1293         {
1294           /* Converting multibyte to single-byte.
1295
1296              ??? Perhaps this conversion should be done in a special way
1297              by subtracting nonascii-insert-offset from each non-ASCII char,
1298              so that only the multibyte chars which really correspond to
1299              the chosen single-byte character set can possibly match.  */
1300           raw_pattern_size = SCHARS (string);
1301           raw_pattern_size_byte = SCHARS (string);
1302           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1303           copy_text (SDATA (string), raw_pattern,
1304                      SBYTES (string), 1, 0);
1305         }
1306
1307       /* Copy and optionally translate the pattern.  */
1308       len = raw_pattern_size;
1309       len_byte = raw_pattern_size_byte;
1310       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1311       pat = patbuf;
1312       base_pat = raw_pattern;
1313       if (multibyte)
1314         {
1315           /* Fill patbuf by translated characters in STRING while
1316              checking if we can use boyer-moore search.  If TRT is
1317              non-nil, we can use boyer-moore search only if TRT can be
1318              represented by the byte array of 256 elements.  For that,
1319              all non-ASCII case-equivalents of all case-senstive
1320              characters in STRING must belong to the same charset and
1321              row.  */
1322
1323           while (--len >= 0)
1324             {
1325               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1326               int c, translated, inverse;
1327               int in_charlen, charlen;
1328
1329               /* If we got here and the RE flag is set, it's because we're
1330                  dealing with a regexp known to be trivial, so the backslash
1331                  just quotes the next character.  */
1332               if (RE && *base_pat == '\\')
1333                 {
1334                   len--;
1335                   raw_pattern_size--;
1336                   len_byte--;
1337                   base_pat++;
1338                 }
1339
1340               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1341
1342               if (NILP (trt))
1343                 {
1344                   str = base_pat;
1345                   charlen = in_charlen;
1346                 }
1347               else
1348                 {
1349                   /* Translate the character.  */
1350                   TRANSLATE (translated, trt, c);
1351                   charlen = CHAR_STRING (translated, str_base);
1352                   str = str_base;
1353
1354                   /* Check if C has any other case-equivalents.  */
1355                   TRANSLATE (inverse, inverse_trt, c);
1356                   /* If so, check if we can use boyer-moore.  */
1357                   if (c != inverse && boyer_moore_ok)
1358                     {
1359                       /* Check if all equivalents belong to the same
1360                          group of characters.  Note that the check of C
1361                          itself is done by the last iteration.  */
1362                       int this_char_base = -1;
1363
1364                       while (boyer_moore_ok)
1365                         {
1366                           if (ASCII_BYTE_P (inverse))
1367                             {
1368                               if (this_char_base > 0)
1369                                 boyer_moore_ok = 0;
1370                               else
1371                                 this_char_base = 0;
1372                             }
1373                           else if (CHAR_BYTE8_P (inverse))
1374                             /* Boyer-moore search can't handle a
1375                                translation of an eight-bit
1376                                character.  */
1377                             boyer_moore_ok = 0;
1378                           else if (this_char_base < 0)
1379                             {
1380                               this_char_base = inverse & ~0x3F;
1381                               if (char_base < 0)
1382                                 char_base = this_char_base;
1383                               else if (this_char_base != char_base)
1384                                 boyer_moore_ok = 0;
1385                             }
1386                           else if ((inverse & ~0x3F) != this_char_base)
1387                             boyer_moore_ok = 0;
1388                           if (c == inverse)
1389                             break;
1390                           TRANSLATE (inverse, inverse_trt, inverse);
1391                         }
1392                     }
1393                 }
1394
1395               /* Store this character into the translated pattern.  */
1396               memcpy (pat, str, charlen);
1397               pat += charlen;
1398               base_pat += in_charlen;
1399               len_byte -= in_charlen;
1400             }
1401
1402           /* If char_base is still negative we didn't find any translated
1403              non-ASCII characters.  */
1404           if (char_base < 0)
1405             char_base = 0;
1406         }
1407       else
1408         {
1409           /* Unibyte buffer.  */
1410           char_base = 0;
1411           while (--len >= 0)
1412             {
1413               int c, translated;
1414
1415               /* If we got here and the RE flag is set, it's because we're
1416                  dealing with a regexp known to be trivial, so the backslash
1417                  just quotes the next character.  */
1418               if (RE && *base_pat == '\\')
1419                 {
1420                   len--;
1421                   raw_pattern_size--;
1422                   base_pat++;
1423                 }
1424               c = *base_pat++;
1425               TRANSLATE (translated, trt, c);
1426               *pat++ = translated;
1427             }
1428         }
1429
1430       len_byte = pat - patbuf;
1431       len = raw_pattern_size;
1432       pat = base_pat = patbuf;
1433
1434       if (boyer_moore_ok)
1435         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1436                             pos, pos_byte, lim, lim_byte,
1437                             char_base);
1438       else
1439         return simple_search (n, pat, len, len_byte, trt,
1440                               pos, pos_byte, lim, lim_byte);
1441     }
1442 }
1443 \f
1444 /* Do a simple string search N times for the string PAT,
1445    whose length is LEN/LEN_BYTE,
1446    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1447    TRT is the translation table.
1448
1449    Return the character position where the match is found.
1450    Otherwise, if M matches remained to be found, return -M.
1451
1452    This kind of search works regardless of what is in PAT and
1453    regardless of what is in TRT.  It is used in cases where
1454    boyer_moore cannot work.  */
1455
1456 static EMACS_INT
1457 simple_search (EMACS_INT n, unsigned char *pat,
1458                EMACS_INT len, EMACS_INT len_byte, Lisp_Object trt,
1459                EMACS_INT pos, EMACS_INT pos_byte,
1460                EMACS_INT lim, EMACS_INT lim_byte)
1461 {
1462   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1463   int forward = n > 0;
1464   /* Number of buffer bytes matched.  Note that this may be different
1465      from len_byte in a multibyte buffer.  */
1466   EMACS_INT match_byte;
1467
1468   if (lim > pos && multibyte)
1469     while (n > 0)
1470       {
1471         while (1)
1472           {
1473             /* Try matching at position POS.  */
1474             EMACS_INT this_pos = pos;
1475             EMACS_INT this_pos_byte = pos_byte;
1476             EMACS_INT this_len = len;
1477             unsigned char *p = pat;
1478             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1479               goto stop;
1480
1481             while (this_len > 0)
1482               {
1483                 int charlen, buf_charlen;
1484                 int pat_ch, buf_ch;
1485
1486                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1487                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488                                                  buf_charlen);
1489                 TRANSLATE (buf_ch, trt, buf_ch);
1490
1491                 if (buf_ch != pat_ch)
1492                   break;
1493
1494                 this_len--;
1495                 p += charlen;
1496
1497                 this_pos_byte += buf_charlen;
1498                 this_pos++;
1499               }
1500
1501             if (this_len == 0)
1502               {
1503                 match_byte = this_pos_byte - pos_byte;
1504                 pos += len;
1505                 pos_byte += match_byte;
1506                 break;
1507               }
1508
1509             INC_BOTH (pos, pos_byte);
1510           }
1511
1512         n--;
1513       }
1514   else if (lim > pos)
1515     while (n > 0)
1516       {
1517         while (1)
1518           {
1519             /* Try matching at position POS.  */
1520             EMACS_INT this_pos = pos;
1521             EMACS_INT this_len = len;
1522             unsigned char *p = pat;
1523
1524             if (pos + len > lim)
1525               goto stop;
1526
1527             while (this_len > 0)
1528               {
1529                 int pat_ch = *p++;
1530                 int buf_ch = FETCH_BYTE (this_pos);
1531                 TRANSLATE (buf_ch, trt, buf_ch);
1532
1533                 if (buf_ch != pat_ch)
1534                   break;
1535
1536                 this_len--;
1537                 this_pos++;
1538               }
1539
1540             if (this_len == 0)
1541               {
1542                 match_byte = len;
1543                 pos += len;
1544                 break;
1545               }
1546
1547             pos++;
1548           }
1549
1550         n--;
1551       }
1552   /* Backwards search.  */
1553   else if (lim < pos && multibyte)
1554     while (n < 0)
1555       {
1556         while (1)
1557           {
1558             /* Try matching at position POS.  */
1559             EMACS_INT this_pos = pos;
1560             EMACS_INT this_pos_byte = pos_byte;
1561             EMACS_INT this_len = len;
1562             const unsigned char *p = pat + len_byte;
1563
1564             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1565               goto stop;
1566
1567             while (this_len > 0)
1568               {
1569                 int charlen;
1570                 int pat_ch, buf_ch;
1571
1572                 DEC_BOTH (this_pos, this_pos_byte);
1573                 PREV_CHAR_BOUNDARY (p, pat);
1574                 pat_ch = STRING_CHAR (p);
1575                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1576                 TRANSLATE (buf_ch, trt, buf_ch);
1577
1578                 if (buf_ch != pat_ch)
1579                   break;
1580
1581                 this_len--;
1582               }
1583
1584             if (this_len == 0)
1585               {
1586                 match_byte = pos_byte - this_pos_byte;
1587                 pos = this_pos;
1588                 pos_byte = this_pos_byte;
1589                 break;
1590               }
1591
1592             DEC_BOTH (pos, pos_byte);
1593           }
1594
1595         n++;
1596       }
1597   else if (lim < pos)
1598     while (n < 0)
1599       {
1600         while (1)
1601           {
1602             /* Try matching at position POS.  */
1603             EMACS_INT this_pos = pos - len;
1604             EMACS_INT this_len = len;
1605             unsigned char *p = pat;
1606
1607             if (this_pos < lim)
1608               goto stop;
1609
1610             while (this_len > 0)
1611               {
1612                 int pat_ch = *p++;
1613                 int buf_ch = FETCH_BYTE (this_pos);
1614                 TRANSLATE (buf_ch, trt, buf_ch);
1615
1616                 if (buf_ch != pat_ch)
1617                   break;
1618                 this_len--;
1619                 this_pos++;
1620               }
1621
1622             if (this_len == 0)
1623               {
1624                 match_byte = len;
1625                 pos -= len;
1626                 break;
1627               }
1628
1629             pos--;
1630           }
1631
1632         n++;
1633       }
1634
1635  stop:
1636   if (n == 0)
1637     {
1638       if (forward)
1639         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1640       else
1641         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1642
1643       return pos;
1644     }
1645   else if (n > 0)
1646     return -n;
1647   else
1648     return n;
1649 }
1650 \f
1651 /* Do Boyer-Moore search N times for the string BASE_PAT,
1652    whose length is LEN/LEN_BYTE,
1653    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1654    DIRECTION says which direction we search in.
1655    TRT and INVERSE_TRT are translation tables.
1656    Characters in PAT are already translated by TRT.
1657
1658    This kind of search works if all the characters in BASE_PAT that
1659    have nontrivial translation are the same aside from the last byte.
1660    This makes it possible to translate just the last byte of a
1661    character, and do so after just a simple test of the context.
1662    CHAR_BASE is nonzero if there is such a non-ASCII character.
1663
1664    If that criterion is not satisfied, do not call this function.  */
1665
1666 static EMACS_INT
1667 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1668              EMACS_INT len, EMACS_INT len_byte,
1669              Lisp_Object trt, Lisp_Object inverse_trt,
1670              EMACS_INT pos, EMACS_INT pos_byte,
1671              EMACS_INT lim, EMACS_INT lim_byte, int char_base)
1672 {
1673   int direction = ((n > 0) ? 1 : -1);
1674   register EMACS_INT dirlen;
1675   EMACS_INT limit;
1676   int stride_for_teases = 0;
1677   int BM_tab[0400];
1678   register unsigned char *cursor, *p_limit;
1679   register EMACS_INT i;
1680   register int j;
1681   unsigned char *pat, *pat_end;
1682   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1683
1684   unsigned char simple_translate[0400];
1685   /* These are set to the preceding bytes of a byte to be translated
1686      if char_base is nonzero.  As the maximum byte length of a
1687      multibyte character is 5, we have to check at most four previous
1688      bytes.  */
1689   int translate_prev_byte1 = 0;
1690   int translate_prev_byte2 = 0;
1691   int translate_prev_byte3 = 0;
1692   int translate_prev_byte4 = 0;
1693
1694   /* The general approach is that we are going to maintain that we know
1695      the first (closest to the present position, in whatever direction
1696      we're searching) character that could possibly be the last
1697      (furthest from present position) character of a valid match.  We
1698      advance the state of our knowledge by looking at that character
1699      and seeing whether it indeed matches the last character of the
1700      pattern.  If it does, we take a closer look.  If it does not, we
1701      move our pointer (to putative last characters) as far as is
1702      logically possible.  This amount of movement, which I call a
1703      stride, will be the length of the pattern if the actual character
1704      appears nowhere in the pattern, otherwise it will be the distance
1705      from the last occurrence of that character to the end of the
1706      pattern.  If the amount is zero we have a possible match.  */
1707
1708   /* Here we make a "mickey mouse" BM table.  The stride of the search
1709      is determined only by the last character of the putative match.
1710      If that character does not match, we will stride the proper
1711      distance to propose a match that superimposes it on the last
1712      instance of a character that matches it (per trt), or misses
1713      it entirely if there is none. */
1714
1715   dirlen = len_byte * direction;
1716
1717   /* Record position after the end of the pattern.  */
1718   pat_end = base_pat + len_byte;
1719   /* BASE_PAT points to a character that we start scanning from.
1720      It is the first character in a forward search,
1721      the last character in a backward search.  */
1722   if (direction < 0)
1723     base_pat = pat_end - 1;
1724
1725   /* A character that does not appear in the pattern induces a
1726      stride equal to the pattern length.  */
1727   for (i = 0; i < 0400; i++)
1728     BM_tab[i] = dirlen;
1729
1730   /* We use this for translation, instead of TRT itself.
1731      We fill this in to handle the characters that actually
1732      occur in the pattern.  Others don't matter anyway!  */
1733   for (i = 0; i < 0400; i++)
1734     simple_translate[i] = i;
1735
1736   if (char_base)
1737     {
1738       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1739          byte following them are the target of translation.  */
1740       unsigned char str[MAX_MULTIBYTE_LENGTH];
1741       int len = CHAR_STRING (char_base, str);
1742
1743       translate_prev_byte1 = str[len - 2];
1744       if (len > 2)
1745         {
1746           translate_prev_byte2 = str[len - 3];
1747           if (len > 3)
1748             {
1749               translate_prev_byte3 = str[len - 4];
1750               if (len > 4)
1751                 translate_prev_byte4 = str[len - 5];
1752             }
1753         }
1754     }
1755
1756   i = 0;
1757   while (i != dirlen)
1758     {
1759       unsigned char *ptr = base_pat + i;
1760       i += direction;
1761       if (! NILP (trt))
1762         {
1763           /* If the byte currently looking at is the last of a
1764              character to check case-equivalents, set CH to that
1765              character.  An ASCII character and a non-ASCII character
1766              matching with CHAR_BASE are to be checked.  */
1767           int ch = -1;
1768
1769           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1770             ch = *ptr;
1771           else if (char_base
1772                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1773             {
1774               unsigned char *charstart = ptr - 1;
1775
1776               while (! (CHAR_HEAD_P (*charstart)))
1777                 charstart--;
1778               ch = STRING_CHAR (charstart);
1779               if (char_base != (ch & ~0x3F))
1780                 ch = -1;
1781             }
1782
1783           if (ch >= 0200)
1784             j = (ch & 0x3F) | 0200;
1785           else
1786             j = *ptr;
1787
1788           if (i == dirlen)
1789             stride_for_teases = BM_tab[j];
1790
1791           BM_tab[j] = dirlen - i;
1792           /* A translation table is accompanied by its inverse -- see */
1793           /* comment following downcase_table for details */
1794           if (ch >= 0)
1795             {
1796               int starting_ch = ch;
1797               int starting_j = j;
1798
1799               while (1)
1800                 {
1801                   TRANSLATE (ch, inverse_trt, ch);
1802                   if (ch >= 0200)
1803                     j = (ch & 0x3F) | 0200;
1804                   else
1805                     j = ch;
1806
1807                   /* For all the characters that map into CH,
1808                      set up simple_translate to map the last byte
1809                      into STARTING_J.  */
1810                   simple_translate[j] = starting_j;
1811                   if (ch == starting_ch)
1812                     break;
1813                   BM_tab[j] = dirlen - i;
1814                 }
1815             }
1816         }
1817       else
1818         {
1819           j = *ptr;
1820
1821           if (i == dirlen)
1822             stride_for_teases = BM_tab[j];
1823           BM_tab[j] = dirlen - i;
1824         }
1825       /* stride_for_teases tells how much to stride if we get a
1826          match on the far character but are subsequently
1827          disappointed, by recording what the stride would have been
1828          for that character if the last character had been
1829          different.  */
1830     }
1831   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1832   /* loop invariant - POS_BYTE points at where last char (first
1833      char if reverse) of pattern would align in a possible match.  */
1834   while (n != 0)
1835     {
1836       EMACS_INT tail_end;
1837       unsigned char *tail_end_ptr;
1838
1839       /* It's been reported that some (broken) compiler thinks that
1840          Boolean expressions in an arithmetic context are unsigned.
1841          Using an explicit ?1:0 prevents this.  */
1842       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1843           < 0)
1844         return (n * (0 - direction));
1845       /* First we do the part we can by pointers (maybe nothing) */
1846       QUIT;
1847       pat = base_pat;
1848       limit = pos_byte - dirlen + direction;
1849       if (direction > 0)
1850         {
1851           limit = BUFFER_CEILING_OF (limit);
1852           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1853              can take on without hitting edge of buffer or the gap.  */
1854           limit = min (limit, pos_byte + 20000);
1855           limit = min (limit, lim_byte - 1);
1856         }
1857       else
1858         {
1859           limit = BUFFER_FLOOR_OF (limit);
1860           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1861              can take on without hitting edge of buffer or the gap.  */
1862           limit = max (limit, pos_byte - 20000);
1863           limit = max (limit, lim_byte);
1864         }
1865       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1866       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1867
1868       if ((limit - pos_byte) * direction > 20)
1869         {
1870           unsigned char *p2;
1871
1872           p_limit = BYTE_POS_ADDR (limit);
1873           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1874           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1875           while (1)             /* use one cursor setting as long as i can */
1876             {
1877               if (direction > 0) /* worth duplicating */
1878                 {
1879                   while (cursor <= p_limit)
1880                     {
1881                       if (BM_tab[*cursor] == 0)
1882                         goto hit;
1883                       cursor += BM_tab[*cursor];
1884                     }
1885                 }
1886               else
1887                 {
1888                   while (cursor >= p_limit)
1889                     {
1890                       if (BM_tab[*cursor] == 0)
1891                         goto hit;
1892                       cursor += BM_tab[*cursor];
1893                     }
1894                 }
1895               /* If you are here, cursor is beyond the end of the
1896                  searched region.  You fail to match within the
1897                  permitted region and would otherwise try a character
1898                  beyond that region.  */
1899               break;
1900
1901             hit:
1902               i = dirlen - direction;
1903               if (! NILP (trt))
1904                 {
1905                   while ((i -= direction) + direction != 0)
1906                     {
1907                       int ch;
1908                       cursor -= direction;
1909                       /* Translate only the last byte of a character.  */
1910                       if (! multibyte
1911                           || ((cursor == tail_end_ptr
1912                                || CHAR_HEAD_P (cursor[1]))
1913                               && (CHAR_HEAD_P (cursor[0])
1914                                   /* Check if this is the last byte of
1915                                      a translable character.  */
1916                                   || (translate_prev_byte1 == cursor[-1]
1917                                       && (CHAR_HEAD_P (translate_prev_byte1)
1918                                           || (translate_prev_byte2 == cursor[-2]
1919                                               && (CHAR_HEAD_P (translate_prev_byte2)
1920                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1921                         ch = simple_translate[*cursor];
1922                       else
1923                         ch = *cursor;
1924                       if (pat[i] != ch)
1925                         break;
1926                     }
1927                 }
1928               else
1929                 {
1930                   while ((i -= direction) + direction != 0)
1931                     {
1932                       cursor -= direction;
1933                       if (pat[i] != *cursor)
1934                         break;
1935                     }
1936                 }
1937               cursor += dirlen - i - direction; /* fix cursor */
1938               if (i + direction == 0)
1939                 {
1940                   EMACS_INT position, start, end;
1941
1942                   cursor -= direction;
1943
1944                   position = pos_byte + cursor - p2 + ((direction > 0)
1945                                                        ? 1 - len_byte : 0);
1946                   set_search_regs (position, len_byte);
1947
1948                   if (NILP (Vinhibit_changing_match_data))
1949                     {
1950                       start = search_regs.start[0];
1951                       end = search_regs.end[0];
1952                     }
1953                   else
1954                     /* If Vinhibit_changing_match_data is non-nil,
1955                        search_regs will not be changed.  So let's
1956                        compute start and end here.  */
1957                     {
1958                       start = BYTE_TO_CHAR (position);
1959                       end = BYTE_TO_CHAR (position + len_byte);
1960                     }
1961
1962                   if ((n -= direction) != 0)
1963                     cursor += dirlen; /* to resume search */
1964                   else
1965                     return direction > 0 ? end : start;
1966                 }
1967               else
1968                 cursor += stride_for_teases; /* <sigh> we lose -  */
1969             }
1970           pos_byte += cursor - p2;
1971         }
1972       else
1973         /* Now we'll pick up a clump that has to be done the hard
1974            way because it covers a discontinuity.  */
1975         {
1976           limit = ((direction > 0)
1977                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1978                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1979           limit = ((direction > 0)
1980                    ? min (limit + len_byte, lim_byte - 1)
1981                    : max (limit - len_byte, lim_byte));
1982           /* LIMIT is now the last value POS_BYTE can have
1983              and still be valid for a possible match.  */
1984           while (1)
1985             {
1986               /* This loop can be coded for space rather than
1987                  speed because it will usually run only once.
1988                  (the reach is at most len + 21, and typically
1989                  does not exceed len).  */
1990               while ((limit - pos_byte) * direction >= 0)
1991                 {
1992                   int ch = FETCH_BYTE (pos_byte);
1993                   if (BM_tab[ch] == 0)
1994                     goto hit2;
1995                   pos_byte += BM_tab[ch];
1996                 }
1997               break;    /* ran off the end */
1998
1999             hit2:
2000               /* Found what might be a match.  */
2001               i = dirlen - direction;
2002               while ((i -= direction) + direction != 0)
2003                 {
2004                   int ch;
2005                   unsigned char *ptr;
2006                   pos_byte -= direction;
2007                   ptr = BYTE_POS_ADDR (pos_byte);
2008                   /* Translate only the last byte of a character.  */
2009                   if (! multibyte
2010                       || ((ptr == tail_end_ptr
2011                            || CHAR_HEAD_P (ptr[1]))
2012                           && (CHAR_HEAD_P (ptr[0])
2013                               /* Check if this is the last byte of a
2014                                  translable character.  */
2015                               || (translate_prev_byte1 == ptr[-1]
2016                                   && (CHAR_HEAD_P (translate_prev_byte1)
2017                                       || (translate_prev_byte2 == ptr[-2]
2018                                           && (CHAR_HEAD_P (translate_prev_byte2)
2019                                               || translate_prev_byte3 == ptr[-3])))))))
2020                     ch = simple_translate[*ptr];
2021                   else
2022                     ch = *ptr;
2023                   if (pat[i] != ch)
2024                     break;
2025                 }
2026               /* Above loop has moved POS_BYTE part or all the way
2027                  back to the first pos (last pos if reverse).
2028                  Set it once again at the last (first if reverse) char.  */
2029               pos_byte += dirlen - i - direction;
2030               if (i + direction == 0)
2031                 {
2032                   EMACS_INT position, start, end;
2033                   pos_byte -= direction;
2034
2035                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2036                   set_search_regs (position, len_byte);
2037
2038                   if (NILP (Vinhibit_changing_match_data))
2039                     {
2040                       start = search_regs.start[0];
2041                       end = search_regs.end[0];
2042                     }
2043                   else
2044                     /* If Vinhibit_changing_match_data is non-nil,
2045                        search_regs will not be changed.  So let's
2046                        compute start and end here.  */
2047                     {
2048                       start = BYTE_TO_CHAR (position);
2049                       end = BYTE_TO_CHAR (position + len_byte);
2050                     }
2051
2052                   if ((n -= direction) != 0)
2053                     pos_byte += dirlen; /* to resume search */
2054                   else
2055                     return direction > 0 ? end : start;
2056                 }
2057               else
2058                 pos_byte += stride_for_teases;
2059             }
2060           }
2061       /* We have done one clump.  Can we continue? */
2062       if ((lim_byte - pos_byte) * direction < 0)
2063         return ((0 - n) * direction);
2064     }
2065   return BYTE_TO_CHAR (pos_byte);
2066 }
2067
2068 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2069    for the overall match just found in the current buffer.
2070    Also clear out the match data for registers 1 and up.  */
2071
2072 static void
2073 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2074 {
2075   int i;
2076
2077   if (!NILP (Vinhibit_changing_match_data))
2078     return;
2079
2080   /* Make sure we have registers in which to store
2081      the match position.  */
2082   if (search_regs.num_regs == 0)
2083     {
2084       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2085       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2086       search_regs.num_regs = 2;
2087     }
2088
2089   /* Clear out the other registers.  */
2090   for (i = 1; i < search_regs.num_regs; i++)
2091     {
2092       search_regs.start[i] = -1;
2093       search_regs.end[i] = -1;
2094     }
2095
2096   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2097   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2098   XSETBUFFER (last_thing_searched, current_buffer);
2099 }
2100 \f
2101 /* Given STRING, a string of words separated by word delimiters,
2102    compute a regexp that matches those exact words separated by
2103    arbitrary punctuation.  If LAX is nonzero, the end of the string
2104    need not match a word boundary unless it ends in whitespace.  */
2105
2106 static Lisp_Object
2107 wordify (Lisp_Object string, int lax)
2108 {
2109   register unsigned char *p, *o;
2110   register EMACS_INT i, i_byte, len, punct_count = 0, word_count = 0;
2111   Lisp_Object val;
2112   int prev_c = 0;
2113   EMACS_INT adjust;
2114   int whitespace_at_end;
2115
2116   CHECK_STRING (string);
2117   p = SDATA (string);
2118   len = SCHARS (string);
2119
2120   for (i = 0, i_byte = 0; i < len; )
2121     {
2122       int c;
2123
2124       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2125
2126       if (SYNTAX (c) != Sword)
2127         {
2128           punct_count++;
2129           if (i > 0 && SYNTAX (prev_c) == Sword)
2130             word_count++;
2131         }
2132
2133       prev_c = c;
2134     }
2135
2136   if (SYNTAX (prev_c) == Sword)
2137     {
2138       word_count++;
2139       whitespace_at_end = 0;
2140     }
2141   else
2142     whitespace_at_end = 1;
2143
2144   if (!word_count)
2145     return empty_unibyte_string;
2146
2147   adjust = - punct_count + 5 * (word_count - 1)
2148     + ((lax && !whitespace_at_end) ? 2 : 4);
2149   if (STRING_MULTIBYTE (string))
2150     val = make_uninit_multibyte_string (len + adjust,
2151                                         SBYTES (string)
2152                                         + adjust);
2153   else
2154     val = make_uninit_string (len + adjust);
2155
2156   o = SDATA (val);
2157   *o++ = '\\';
2158   *o++ = 'b';
2159   prev_c = 0;
2160
2161   for (i = 0, i_byte = 0; i < len; )
2162     {
2163       int c;
2164       EMACS_INT i_byte_orig = i_byte;
2165
2166       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2167
2168       if (SYNTAX (c) == Sword)
2169         {
2170           memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2171           o += i_byte - i_byte_orig;
2172         }
2173       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2174         {
2175           *o++ = '\\';
2176           *o++ = 'W';
2177           *o++ = '\\';
2178           *o++ = 'W';
2179           *o++ = '*';
2180         }
2181
2182       prev_c = c;
2183     }
2184
2185   if (!lax || whitespace_at_end)
2186     {
2187       *o++ = '\\';
2188       *o++ = 'b';
2189     }
2190
2191   return val;
2192 }
2193 \f
2194 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2195        "MSearch backward: ",
2196        doc: /* Search backward from point for STRING.
2197 Set point to the beginning of the occurrence found, and return point.
2198 An optional second argument bounds the search; it is a buffer position.
2199 The match found must not extend before that position.
2200 Optional third argument, if t, means if fail just return nil (no error).
2201  If not nil and not t, position at limit of search and return nil.
2202 Optional fourth argument is repeat count--search for successive occurrences.
2203
2204 Search case-sensitivity is determined by the value of the variable
2205 `case-fold-search', which see.
2206
2207 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2208   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2209 {
2210   return search_command (string, bound, noerror, count, -1, 0, 0);
2211 }
2212
2213 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2214        doc: /* Search forward from point for STRING.
2215 Set point to the end of the occurrence found, and return point.
2216 An optional second argument bounds the search; it is a buffer position.
2217 The match found must not extend after that position.  A value of nil is
2218   equivalent to (point-max).
2219 Optional third argument, if t, means if fail just return nil (no error).
2220   If not nil and not t, move to limit of search and return nil.
2221 Optional fourth argument is repeat count--search for successive occurrences.
2222
2223 Search case-sensitivity is determined by the value of the variable
2224 `case-fold-search', which see.
2225
2226 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2227   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2228 {
2229   return search_command (string, bound, noerror, count, 1, 0, 0);
2230 }
2231
2232 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2233        "sWord search backward: ",
2234        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2235 Set point to the beginning of the occurrence found, and return point.
2236 An optional second argument bounds the search; it is a buffer position.
2237 The match found must not extend before that position.
2238 Optional third argument, if t, means if fail just return nil (no error).
2239   If not nil and not t, move to limit of search and return nil.
2240 Optional fourth argument is repeat count--search for successive occurrences.  */)
2241   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2242 {
2243   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2244 }
2245
2246 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2247        "sWord search: ",
2248        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2249 Set point to the end of the occurrence found, and return point.
2250 An optional second argument bounds the search; it is a buffer position.
2251 The match found must not extend after that position.
2252 Optional third argument, if t, means if fail just return nil (no error).
2253   If not nil and not t, move to limit of search and return nil.
2254 Optional fourth argument is repeat count--search for successive occurrences.  */)
2255   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2256 {
2257   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2258 }
2259
2260 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2261        "sWord search backward: ",
2262        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2263 Set point to the beginning of the occurrence found, and return point.
2264
2265 Unlike `word-search-backward', the end of STRING need not match a word
2266 boundary unless it ends in whitespace.
2267
2268 An optional second argument bounds the search; it is a buffer position.
2269 The match found must not extend before that position.
2270 Optional third argument, if t, means if fail just return nil (no error).
2271   If not nil and not t, move to limit of search and return nil.
2272 Optional fourth argument is repeat count--search for successive occurrences.  */)
2273   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2274 {
2275   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2276 }
2277
2278 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2279        "sWord search: ",
2280        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2281 Set point to the end of the occurrence found, and return point.
2282
2283 Unlike `word-search-forward', the end of STRING need not match a word
2284 boundary unless it ends in whitespace.
2285
2286 An optional second argument bounds the search; it is a buffer position.
2287 The match found must not extend after that position.
2288 Optional third argument, if t, means if fail just return nil (no error).
2289   If not nil and not t, move to limit of search and return nil.
2290 Optional fourth argument is repeat count--search for successive occurrences.  */)
2291   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2292 {
2293   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2294 }
2295
2296 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2297        "sRE search backward: ",
2298        doc: /* Search backward from point for match for regular expression REGEXP.
2299 Set point to the beginning of the match, and return point.
2300 The match found is the one starting last in the buffer
2301 and yet ending before the origin of the search.
2302 An optional second argument bounds the search; it is a buffer position.
2303 The match found must start at or after that position.
2304 Optional third argument, if t, means if fail just return nil (no error).
2305   If not nil and not t, move to limit of search and return nil.
2306 Optional fourth argument is repeat count--search for successive occurrences.
2307 See also the functions `match-beginning', `match-end', `match-string',
2308 and `replace-match'.  */)
2309   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2310 {
2311   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2312 }
2313
2314 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2315        "sRE search: ",
2316        doc: /* Search forward from point for regular expression REGEXP.
2317 Set point to the end of the occurrence found, and return point.
2318 An optional second argument bounds the search; it is a buffer position.
2319 The match found must not extend after that position.
2320 Optional third argument, if t, means if fail just return nil (no error).
2321   If not nil and not t, move to limit of search and return nil.
2322 Optional fourth argument is repeat count--search for successive occurrences.
2323 See also the functions `match-beginning', `match-end', `match-string',
2324 and `replace-match'.  */)
2325   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2326 {
2327   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2328 }
2329
2330 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2331        "sPosix search backward: ",
2332        doc: /* Search backward from point for match for regular expression REGEXP.
2333 Find the longest match in accord with Posix regular expression rules.
2334 Set point to the beginning of the match, and return point.
2335 The match found is the one starting last in the buffer
2336 and yet ending before the origin of the search.
2337 An optional second argument bounds the search; it is a buffer position.
2338 The match found must start at or after that position.
2339 Optional third argument, if t, means if fail just return nil (no error).
2340   If not nil and not t, move to limit of search and return nil.
2341 Optional fourth argument is repeat count--search for successive occurrences.
2342 See also the functions `match-beginning', `match-end', `match-string',
2343 and `replace-match'.  */)
2344   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2345 {
2346   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2347 }
2348
2349 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2350        "sPosix search: ",
2351        doc: /* Search forward from point for regular expression REGEXP.
2352 Find the longest match in accord with Posix regular expression rules.
2353 Set point to the end of the occurrence found, and return point.
2354 An optional second argument bounds the search; it is a buffer position.
2355 The match found must not extend after that position.
2356 Optional third argument, if t, means if fail just return nil (no error).
2357   If not nil and not t, move to limit of search and return nil.
2358 Optional fourth argument is repeat count--search for successive occurrences.
2359 See also the functions `match-beginning', `match-end', `match-string',
2360 and `replace-match'.  */)
2361   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2362 {
2363   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2364 }
2365 \f
2366 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2367        doc: /* Replace text matched by last search with NEWTEXT.
2368 Leave point at the end of the replacement text.
2369
2370 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2371 Otherwise maybe capitalize the whole text, or maybe just word initials,
2372 based on the replaced text.
2373 If the replaced text has only capital letters
2374 and has at least one multiletter word, convert NEWTEXT to all caps.
2375 Otherwise if all words are capitalized in the replaced text,
2376 capitalize each word in NEWTEXT.
2377
2378 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2379 Otherwise treat `\\' as special:
2380   `\\&' in NEWTEXT means substitute original matched text.
2381   `\\N' means substitute what matched the Nth `\\(...\\)'.
2382        If Nth parens didn't match, substitute nothing.
2383   `\\\\' means insert one `\\'.
2384 Case conversion does not apply to these substitutions.
2385
2386 FIXEDCASE and LITERAL are optional arguments.
2387
2388 The optional fourth argument STRING can be a string to modify.
2389 This is meaningful when the previous match was done against STRING,
2390 using `string-match'.  When used this way, `replace-match'
2391 creates and returns a new string made by copying STRING and replacing
2392 the part of STRING that was matched.
2393
2394 The optional fifth argument SUBEXP specifies a subexpression;
2395 it says to replace just that subexpression with NEWTEXT,
2396 rather than replacing the entire matched text.
2397 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2398 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2399 NEWTEXT in place of subexp N.
2400 This is useful only after a regular expression search or match,
2401 since only regular expressions have distinguished subexpressions.  */)
2402   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2403 {
2404   enum { nochange, all_caps, cap_initial } case_action;
2405   register EMACS_INT pos, pos_byte;
2406   int some_multiletter_word;
2407   int some_lowercase;
2408   int some_uppercase;
2409   int some_nonuppercase_initial;
2410   register int c, prevc;
2411   int sub;
2412   EMACS_INT opoint, newpoint;
2413
2414   CHECK_STRING (newtext);
2415
2416   if (! NILP (string))
2417     CHECK_STRING (string);
2418
2419   case_action = nochange;       /* We tried an initialization */
2420                                 /* but some C compilers blew it */
2421
2422   if (search_regs.num_regs <= 0)
2423     error ("`replace-match' called before any match found");
2424
2425   if (NILP (subexp))
2426     sub = 0;
2427   else
2428     {
2429       CHECK_NUMBER (subexp);
2430       sub = XINT (subexp);
2431       if (sub < 0 || sub >= search_regs.num_regs)
2432         args_out_of_range (subexp, make_number (search_regs.num_regs));
2433     }
2434
2435   if (NILP (string))
2436     {
2437       if (search_regs.start[sub] < BEGV
2438           || search_regs.start[sub] > search_regs.end[sub]
2439           || search_regs.end[sub] > ZV)
2440         args_out_of_range (make_number (search_regs.start[sub]),
2441                            make_number (search_regs.end[sub]));
2442     }
2443   else
2444     {
2445       if (search_regs.start[sub] < 0
2446           || search_regs.start[sub] > search_regs.end[sub]
2447           || search_regs.end[sub] > SCHARS (string))
2448         args_out_of_range (make_number (search_regs.start[sub]),
2449                            make_number (search_regs.end[sub]));
2450     }
2451
2452   if (NILP (fixedcase))
2453     {
2454       /* Decide how to casify by examining the matched text. */
2455       EMACS_INT last;
2456
2457       pos = search_regs.start[sub];
2458       last = search_regs.end[sub];
2459
2460       if (NILP (string))
2461         pos_byte = CHAR_TO_BYTE (pos);
2462       else
2463         pos_byte = string_char_to_byte (string, pos);
2464
2465       prevc = '\n';
2466       case_action = all_caps;
2467
2468       /* some_multiletter_word is set nonzero if any original word
2469          is more than one letter long. */
2470       some_multiletter_word = 0;
2471       some_lowercase = 0;
2472       some_nonuppercase_initial = 0;
2473       some_uppercase = 0;
2474
2475       while (pos < last)
2476         {
2477           if (NILP (string))
2478             {
2479               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2480               INC_BOTH (pos, pos_byte);
2481             }
2482           else
2483             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2484
2485           if (LOWERCASEP (c))
2486             {
2487               /* Cannot be all caps if any original char is lower case */
2488
2489               some_lowercase = 1;
2490               if (SYNTAX (prevc) != Sword)
2491                 some_nonuppercase_initial = 1;
2492               else
2493                 some_multiletter_word = 1;
2494             }
2495           else if (UPPERCASEP (c))
2496             {
2497               some_uppercase = 1;
2498               if (SYNTAX (prevc) != Sword)
2499                 ;
2500               else
2501                 some_multiletter_word = 1;
2502             }
2503           else
2504             {
2505               /* If the initial is a caseless word constituent,
2506                  treat that like a lowercase initial.  */
2507               if (SYNTAX (prevc) != Sword)
2508                 some_nonuppercase_initial = 1;
2509             }
2510
2511           prevc = c;
2512         }
2513
2514       /* Convert to all caps if the old text is all caps
2515          and has at least one multiletter word.  */
2516       if (! some_lowercase && some_multiletter_word)
2517         case_action = all_caps;
2518       /* Capitalize each word, if the old text has all capitalized words.  */
2519       else if (!some_nonuppercase_initial && some_multiletter_word)
2520         case_action = cap_initial;
2521       else if (!some_nonuppercase_initial && some_uppercase)
2522         /* Should x -> yz, operating on X, give Yz or YZ?
2523            We'll assume the latter.  */
2524         case_action = all_caps;
2525       else
2526         case_action = nochange;
2527     }
2528
2529   /* Do replacement in a string.  */
2530   if (!NILP (string))
2531     {
2532       Lisp_Object before, after;
2533
2534       before = Fsubstring (string, make_number (0),
2535                            make_number (search_regs.start[sub]));
2536       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2537
2538       /* Substitute parts of the match into NEWTEXT
2539          if desired.  */
2540       if (NILP (literal))
2541         {
2542           EMACS_INT lastpos = 0;
2543           EMACS_INT lastpos_byte = 0;
2544           /* We build up the substituted string in ACCUM.  */
2545           Lisp_Object accum;
2546           Lisp_Object middle;
2547           int length = SBYTES (newtext);
2548
2549           accum = Qnil;
2550
2551           for (pos_byte = 0, pos = 0; pos_byte < length;)
2552             {
2553               EMACS_INT substart = -1;
2554               EMACS_INT subend = 0;
2555               int delbackslash = 0;
2556
2557               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2558
2559               if (c == '\\')
2560                 {
2561                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2562
2563                   if (c == '&')
2564                     {
2565                       substart = search_regs.start[sub];
2566                       subend = search_regs.end[sub];
2567                     }
2568                   else if (c >= '1' && c <= '9')
2569                     {
2570                       if (search_regs.start[c - '0'] >= 0
2571                           && c <= search_regs.num_regs + '0')
2572                         {
2573                           substart = search_regs.start[c - '0'];
2574                           subend = search_regs.end[c - '0'];
2575                         }
2576                       else
2577                         {
2578                           /* If that subexp did not match,
2579                              replace \\N with nothing.  */
2580                           substart = 0;
2581                           subend = 0;
2582                         }
2583                     }
2584                   else if (c == '\\')
2585                     delbackslash = 1;
2586                   else
2587                     error ("Invalid use of `\\' in replacement text");
2588                 }
2589               if (substart >= 0)
2590                 {
2591                   if (pos - 2 != lastpos)
2592                     middle = substring_both (newtext, lastpos,
2593                                              lastpos_byte,
2594                                              pos - 2, pos_byte - 2);
2595                   else
2596                     middle = Qnil;
2597                   accum = concat3 (accum, middle,
2598                                    Fsubstring (string,
2599                                                make_number (substart),
2600                                                make_number (subend)));
2601                   lastpos = pos;
2602                   lastpos_byte = pos_byte;
2603                 }
2604               else if (delbackslash)
2605                 {
2606                   middle = substring_both (newtext, lastpos,
2607                                            lastpos_byte,
2608                                            pos - 1, pos_byte - 1);
2609
2610                   accum = concat2 (accum, middle);
2611                   lastpos = pos;
2612                   lastpos_byte = pos_byte;
2613                 }
2614             }
2615
2616           if (pos != lastpos)
2617             middle = substring_both (newtext, lastpos,
2618                                      lastpos_byte,
2619                                      pos, pos_byte);
2620           else
2621             middle = Qnil;
2622
2623           newtext = concat2 (accum, middle);
2624         }
2625
2626       /* Do case substitution in NEWTEXT if desired.  */
2627       if (case_action == all_caps)
2628         newtext = Fupcase (newtext);
2629       else if (case_action == cap_initial)
2630         newtext = Fupcase_initials (newtext);
2631
2632       return concat3 (before, newtext, after);
2633     }
2634
2635   /* Record point, then move (quietly) to the start of the match.  */
2636   if (PT >= search_regs.end[sub])
2637     opoint = PT - ZV;
2638   else if (PT > search_regs.start[sub])
2639     opoint = search_regs.end[sub] - ZV;
2640   else
2641     opoint = PT;
2642
2643   /* If we want non-literal replacement,
2644      perform substitution on the replacement string.  */
2645   if (NILP (literal))
2646     {
2647       EMACS_INT length = SBYTES (newtext);
2648       unsigned char *substed;
2649       EMACS_INT substed_alloc_size, substed_len;
2650       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2651       int str_multibyte = STRING_MULTIBYTE (newtext);
2652       Lisp_Object rev_tbl;
2653       int really_changed = 0;
2654
2655       rev_tbl = Qnil;
2656
2657       substed_alloc_size = length * 2 + 100;
2658       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2659       substed_len = 0;
2660
2661       /* Go thru NEWTEXT, producing the actual text to insert in
2662          SUBSTED while adjusting multibyteness to that of the current
2663          buffer.  */
2664
2665       for (pos_byte = 0, pos = 0; pos_byte < length;)
2666         {
2667           unsigned char str[MAX_MULTIBYTE_LENGTH];
2668           const unsigned char *add_stuff = NULL;
2669           EMACS_INT add_len = 0;
2670           int idx = -1;
2671
2672           if (str_multibyte)
2673             {
2674               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2675               if (!buf_multibyte)
2676                 c = multibyte_char_to_unibyte (c, rev_tbl);
2677             }
2678           else
2679             {
2680               /* Note that we don't have to increment POS.  */
2681               c = SREF (newtext, pos_byte++);
2682               if (buf_multibyte)
2683                 MAKE_CHAR_MULTIBYTE (c);
2684             }
2685
2686           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2687              or set IDX to a match index, which means put that part
2688              of the buffer text into SUBSTED.  */
2689
2690           if (c == '\\')
2691             {
2692               really_changed = 1;
2693
2694               if (str_multibyte)
2695                 {
2696                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2697                                                       pos, pos_byte);
2698                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2699                     c = multibyte_char_to_unibyte (c, rev_tbl);
2700                 }
2701               else
2702                 {
2703                   c = SREF (newtext, pos_byte++);
2704                   if (buf_multibyte)
2705                     MAKE_CHAR_MULTIBYTE (c);
2706                 }
2707
2708               if (c == '&')
2709                 idx = sub;
2710               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2711                 {
2712                   if (search_regs.start[c - '0'] >= 1)
2713                     idx = c - '0';
2714                 }
2715               else if (c == '\\')
2716                 add_len = 1, add_stuff = "\\";
2717               else
2718                 {
2719                   xfree (substed);
2720                   error ("Invalid use of `\\' in replacement text");
2721                 }
2722             }
2723           else
2724             {
2725               add_len = CHAR_STRING (c, str);
2726               add_stuff = str;
2727             }
2728
2729           /* If we want to copy part of a previous match,
2730              set up ADD_STUFF and ADD_LEN to point to it.  */
2731           if (idx >= 0)
2732             {
2733               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2734               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2735               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2736                 move_gap (search_regs.start[idx]);
2737               add_stuff = BYTE_POS_ADDR (begbyte);
2738             }
2739
2740           /* Now the stuff we want to add to SUBSTED
2741              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2742
2743           /* Make sure SUBSTED is big enough.  */
2744           if (substed_len + add_len >= substed_alloc_size)
2745             {
2746               substed_alloc_size = substed_len + add_len + 500;
2747               substed = (unsigned char *) xrealloc (substed,
2748                                                     substed_alloc_size + 1);
2749             }
2750
2751           /* Now add to the end of SUBSTED.  */
2752           if (add_stuff)
2753             {
2754               memcpy (substed + substed_len, add_stuff, add_len);
2755               substed_len += add_len;
2756             }
2757         }
2758
2759       if (really_changed)
2760         {
2761           if (buf_multibyte)
2762             {
2763               EMACS_INT nchars =
2764                 multibyte_chars_in_text (substed, substed_len);
2765
2766               newtext = make_multibyte_string (substed, nchars, substed_len);
2767             }
2768           else
2769             newtext = make_unibyte_string (substed, substed_len);
2770         }
2771       xfree (substed);
2772     }
2773
2774   /* Replace the old text with the new in the cleanest possible way.  */
2775   replace_range (search_regs.start[sub], search_regs.end[sub],
2776                  newtext, 1, 0, 1);
2777   newpoint = search_regs.start[sub] + SCHARS (newtext);
2778
2779   if (case_action == all_caps)
2780     Fupcase_region (make_number (search_regs.start[sub]),
2781                     make_number (newpoint));
2782   else if (case_action == cap_initial)
2783     Fupcase_initials_region (make_number (search_regs.start[sub]),
2784                              make_number (newpoint));
2785
2786   /* Adjust search data for this change.  */
2787   {
2788     EMACS_INT oldend = search_regs.end[sub];
2789     EMACS_INT oldstart = search_regs.start[sub];
2790     EMACS_INT change = newpoint - search_regs.end[sub];
2791     int i;
2792
2793     for (i = 0; i < search_regs.num_regs; i++)
2794       {
2795         if (search_regs.start[i] >= oldend)
2796           search_regs.start[i] += change;
2797         else if (search_regs.start[i] > oldstart)
2798           search_regs.start[i] = oldstart;
2799         if (search_regs.end[i] >= oldend)
2800           search_regs.end[i] += change;
2801         else if (search_regs.end[i] > oldstart)
2802           search_regs.end[i] = oldstart;
2803       }
2804   }
2805
2806   /* Put point back where it was in the text.  */
2807   if (opoint <= 0)
2808     TEMP_SET_PT (opoint + ZV);
2809   else
2810     TEMP_SET_PT (opoint);
2811
2812   /* Now move point "officially" to the start of the inserted replacement.  */
2813   move_if_not_intangible (newpoint);
2814
2815   return Qnil;
2816 }
2817 \f
2818 static Lisp_Object
2819 match_limit (Lisp_Object num, int beginningp)
2820 {
2821   register int n;
2822
2823   CHECK_NUMBER (num);
2824   n = XINT (num);
2825   if (n < 0)
2826     args_out_of_range (num, make_number (0));
2827   if (search_regs.num_regs <= 0)
2828     error ("No match data, because no search succeeded");
2829   if (n >= search_regs.num_regs
2830       || search_regs.start[n] < 0)
2831     return Qnil;
2832   return (make_number ((beginningp) ? search_regs.start[n]
2833                                     : search_regs.end[n]));
2834 }
2835
2836 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2837        doc: /* Return position of start of text matched by last search.
2838 SUBEXP, a number, specifies which parenthesized expression in the last
2839   regexp.
2840 Value is nil if SUBEXPth pair didn't match, or there were less than
2841   SUBEXP pairs.
2842 Zero means the entire text matched by the whole regexp or whole string.  */)
2843   (Lisp_Object subexp)
2844 {
2845   return match_limit (subexp, 1);
2846 }
2847
2848 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2849        doc: /* Return position of end of text matched by last search.
2850 SUBEXP, a number, specifies which parenthesized expression in the last
2851   regexp.
2852 Value is nil if SUBEXPth pair didn't match, or there were less than
2853   SUBEXP pairs.
2854 Zero means the entire text matched by the whole regexp or whole string.  */)
2855   (Lisp_Object subexp)
2856 {
2857   return match_limit (subexp, 0);
2858 }
2859
2860 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2861        doc: /* Return a list containing all info on what the last search matched.
2862 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2863 All the elements are markers or nil (nil if the Nth pair didn't match)
2864 if the last match was on a buffer; integers or nil if a string was matched.
2865 Use `set-match-data' to reinstate the data in this list.
2866
2867 If INTEGERS (the optional first argument) is non-nil, always use
2868 integers \(rather than markers) to represent buffer positions.  In
2869 this case, and if the last match was in a buffer, the buffer will get
2870 stored as one additional element at the end of the list.
2871
2872 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2873 enough to hold all the values, and if INTEGERS is non-nil, no consing
2874 is done.
2875
2876 If optional third arg RESEAT is non-nil, any previous markers on the
2877 REUSE list will be modified to point to nowhere.
2878
2879 Return value is undefined if the last search failed.  */)
2880   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2881 {
2882   Lisp_Object tail, prev;
2883   Lisp_Object *data;
2884   int i, len;
2885
2886   if (!NILP (reseat))
2887     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2888       if (MARKERP (XCAR (tail)))
2889         {
2890           unchain_marker (XMARKER (XCAR (tail)));
2891           XSETCAR (tail, Qnil);
2892         }
2893
2894   if (NILP (last_thing_searched))
2895     return Qnil;
2896
2897   prev = Qnil;
2898
2899   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2900                                  * sizeof (Lisp_Object));
2901
2902   len = 0;
2903   for (i = 0; i < search_regs.num_regs; i++)
2904     {
2905       int start = search_regs.start[i];
2906       if (start >= 0)
2907         {
2908           if (EQ (last_thing_searched, Qt)
2909               || ! NILP (integers))
2910             {
2911               XSETFASTINT (data[2 * i], start);
2912               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2913             }
2914           else if (BUFFERP (last_thing_searched))
2915             {
2916               data[2 * i] = Fmake_marker ();
2917               Fset_marker (data[2 * i],
2918                            make_number (start),
2919                            last_thing_searched);
2920               data[2 * i + 1] = Fmake_marker ();
2921               Fset_marker (data[2 * i + 1],
2922                            make_number (search_regs.end[i]),
2923                            last_thing_searched);
2924             }
2925           else
2926             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2927             abort ();
2928
2929           len = 2 * i + 2;
2930         }
2931       else
2932         data[2 * i] = data[2 * i + 1] = Qnil;
2933     }
2934
2935   if (BUFFERP (last_thing_searched) && !NILP (integers))
2936     {
2937       data[len] = last_thing_searched;
2938       len++;
2939     }
2940
2941   /* If REUSE is not usable, cons up the values and return them.  */
2942   if (! CONSP (reuse))
2943     return Flist (len, data);
2944
2945   /* If REUSE is a list, store as many value elements as will fit
2946      into the elements of REUSE.  */
2947   for (i = 0, tail = reuse; CONSP (tail);
2948        i++, tail = XCDR (tail))
2949     {
2950       if (i < len)
2951         XSETCAR (tail, data[i]);
2952       else
2953         XSETCAR (tail, Qnil);
2954       prev = tail;
2955     }
2956
2957   /* If we couldn't fit all value elements into REUSE,
2958      cons up the rest of them and add them to the end of REUSE.  */
2959   if (i < len)
2960     XSETCDR (prev, Flist (len - i, data + i));
2961
2962   return reuse;
2963 }
2964
2965 /* We used to have an internal use variant of `reseat' described as:
2966
2967       If RESEAT is `evaporate', put the markers back on the free list
2968       immediately.  No other references to the markers must exist in this
2969       case, so it is used only internally on the unwind stack and
2970       save-match-data from Lisp.
2971
2972    But it was ill-conceived: those supposedly-internal markers get exposed via
2973    the undo-list, so freeing them here is unsafe.  */
2974
2975 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2976        doc: /* Set internal data on last search match from elements of LIST.
2977 LIST should have been created by calling `match-data' previously.
2978
2979 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2980   (register Lisp_Object list, Lisp_Object reseat)
2981 {
2982   register int i;
2983   register Lisp_Object marker;
2984
2985   if (running_asynch_code)
2986     save_search_regs ();
2987
2988   CHECK_LIST (list);
2989
2990   /* Unless we find a marker with a buffer or an explicit buffer
2991      in LIST, assume that this match data came from a string.  */
2992   last_thing_searched = Qt;
2993
2994   /* Allocate registers if they don't already exist.  */
2995   {
2996     int length = XFASTINT (Flength (list)) / 2;
2997
2998     if (length > search_regs.num_regs)
2999       {
3000         if (search_regs.num_regs == 0)
3001           {
3002             search_regs.start
3003               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3004             search_regs.end
3005               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3006           }
3007         else
3008           {
3009             search_regs.start
3010               = (regoff_t *) xrealloc (search_regs.start,
3011                                        length * sizeof (regoff_t));
3012             search_regs.end
3013               = (regoff_t *) xrealloc (search_regs.end,
3014                                        length * sizeof (regoff_t));
3015           }
3016
3017         for (i = search_regs.num_regs; i < length; i++)
3018           search_regs.start[i] = -1;
3019
3020         search_regs.num_regs = length;
3021       }
3022
3023     for (i = 0; CONSP (list); i++)
3024       {
3025         marker = XCAR (list);
3026         if (BUFFERP (marker))
3027           {
3028             last_thing_searched = marker;
3029             break;
3030           }
3031         if (i >= length)
3032           break;
3033         if (NILP (marker))
3034           {
3035             search_regs.start[i] = -1;
3036             list = XCDR (list);
3037           }
3038         else
3039           {
3040             EMACS_INT from;
3041             Lisp_Object m;
3042
3043             m = marker;
3044             if (MARKERP (marker))
3045               {
3046                 if (XMARKER (marker)->buffer == 0)
3047                   XSETFASTINT (marker, 0);
3048                 else
3049                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3050               }
3051
3052             CHECK_NUMBER_COERCE_MARKER (marker);
3053             from = XINT (marker);
3054
3055             if (!NILP (reseat) && MARKERP (m))
3056               {
3057                 unchain_marker (XMARKER (m));
3058                 XSETCAR (list, Qnil);
3059               }
3060
3061             if ((list = XCDR (list), !CONSP (list)))
3062               break;
3063
3064             m = marker = XCAR (list);
3065
3066             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3067               XSETFASTINT (marker, 0);
3068
3069             CHECK_NUMBER_COERCE_MARKER (marker);
3070             search_regs.start[i] = from;
3071             search_regs.end[i] = XINT (marker);
3072
3073             if (!NILP (reseat) && MARKERP (m))
3074               {
3075                 unchain_marker (XMARKER (m));
3076                 XSETCAR (list, Qnil);
3077               }
3078           }
3079         list = XCDR (list);
3080       }
3081
3082     for (; i < search_regs.num_regs; i++)
3083       search_regs.start[i] = -1;
3084   }
3085
3086   return Qnil;
3087 }
3088
3089 /* If non-zero the match data have been saved in saved_search_regs
3090    during the execution of a sentinel or filter. */
3091 static int search_regs_saved;
3092 static struct re_registers saved_search_regs;
3093 static Lisp_Object saved_last_thing_searched;
3094
3095 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3096    if asynchronous code (filter or sentinel) is running. */
3097 static void
3098 save_search_regs (void)
3099 {
3100   if (!search_regs_saved)
3101     {
3102       saved_search_regs.num_regs = search_regs.num_regs;
3103       saved_search_regs.start = search_regs.start;
3104       saved_search_regs.end = search_regs.end;
3105       saved_last_thing_searched = last_thing_searched;
3106       last_thing_searched = Qnil;
3107       search_regs.num_regs = 0;
3108       search_regs.start = 0;
3109       search_regs.end = 0;
3110
3111       search_regs_saved = 1;
3112     }
3113 }
3114
3115 /* Called upon exit from filters and sentinels. */
3116 void
3117 restore_search_regs (void)
3118 {
3119   if (search_regs_saved)
3120     {
3121       if (search_regs.num_regs > 0)
3122         {
3123           xfree (search_regs.start);
3124           xfree (search_regs.end);
3125         }
3126       search_regs.num_regs = saved_search_regs.num_regs;
3127       search_regs.start = saved_search_regs.start;
3128       search_regs.end = saved_search_regs.end;
3129       last_thing_searched = saved_last_thing_searched;
3130       saved_last_thing_searched = Qnil;
3131       search_regs_saved = 0;
3132     }
3133 }
3134
3135 static Lisp_Object
3136 unwind_set_match_data (Lisp_Object list)
3137 {
3138   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3139   return Fset_match_data (list, Qt);
3140 }
3141
3142 /* Called to unwind protect the match data.  */
3143 void
3144 record_unwind_save_match_data (void)
3145 {
3146   record_unwind_protect (unwind_set_match_data,
3147                          Fmatch_data (Qnil, Qnil, Qnil));
3148 }
3149
3150 /* Quote a string to inactivate reg-expr chars */
3151
3152 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3153        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3154   (Lisp_Object string)
3155 {
3156   register unsigned char *in, *out, *end;
3157   register unsigned char *temp;
3158   int backslashes_added = 0;
3159
3160   CHECK_STRING (string);
3161
3162   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3163
3164   /* Now copy the data into the new string, inserting escapes. */
3165
3166   in = SDATA (string);
3167   end = in + SBYTES (string);
3168   out = temp;
3169
3170   for (; in != end; in++)
3171     {
3172       if (*in == '['
3173           || *in == '*' || *in == '.' || *in == '\\'
3174           || *in == '?' || *in == '+'
3175           || *in == '^' || *in == '$')
3176         *out++ = '\\', backslashes_added++;
3177       *out++ = *in;
3178     }
3179
3180   return make_specified_string (temp,
3181                                 SCHARS (string) + backslashes_added,
3182                                 out - temp,
3183                                 STRING_MULTIBYTE (string));
3184 }
3185 \f
3186 void
3187 syms_of_search (void)
3188 {
3189   register int i;
3190
3191   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3192     {
3193       searchbufs[i].buf.allocated = 100;
3194       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3195       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3196       searchbufs[i].regexp = Qnil;
3197       searchbufs[i].whitespace_regexp = Qnil;
3198       searchbufs[i].syntax_table = Qnil;
3199       staticpro (&searchbufs[i].regexp);
3200       staticpro (&searchbufs[i].whitespace_regexp);
3201       staticpro (&searchbufs[i].syntax_table);
3202       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3203     }
3204   searchbuf_head = &searchbufs[0];
3205
3206   Qsearch_failed = intern_c_string ("search-failed");
3207   staticpro (&Qsearch_failed);
3208   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3209   staticpro (&Qinvalid_regexp);
3210
3211   Fput (Qsearch_failed, Qerror_conditions,
3212         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3213   Fput (Qsearch_failed, Qerror_message,
3214         make_pure_c_string ("Search failed"));
3215
3216   Fput (Qinvalid_regexp, Qerror_conditions,
3217         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3218   Fput (Qinvalid_regexp, Qerror_message,
3219         make_pure_c_string ("Invalid regexp"));
3220
3221   last_thing_searched = Qnil;
3222   staticpro (&last_thing_searched);
3223
3224   saved_last_thing_searched = Qnil;
3225   staticpro (&saved_last_thing_searched);
3226
3227   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3228       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3229 Some commands use this for user-specified regexps.
3230 Spaces that occur inside character classes or repetition operators
3231 or other such regexp constructs are not replaced with this.
3232 A value of nil (which is the normal value) means treat spaces literally.  */);
3233   Vsearch_spaces_regexp = Qnil;
3234
3235   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3236       doc: /* Internal use only.
3237 If non-nil, the primitive searching and matching functions
3238 such as `looking-at', `string-match', `re-search-forward', etc.,
3239 do not set the match data.  The proper way to use this variable
3240 is to bind it with `let' around a small expression.  */);
3241   Vinhibit_changing_match_data = Qnil;
3242
3243   defsubr (&Slooking_at);
3244   defsubr (&Sposix_looking_at);
3245   defsubr (&Sstring_match);
3246   defsubr (&Sposix_string_match);
3247   defsubr (&Ssearch_forward);
3248   defsubr (&Ssearch_backward);
3249   defsubr (&Sword_search_forward);
3250   defsubr (&Sword_search_backward);
3251   defsubr (&Sword_search_forward_lax);
3252   defsubr (&Sword_search_backward_lax);
3253   defsubr (&Sre_search_forward);
3254   defsubr (&Sre_search_backward);
3255   defsubr (&Sposix_search_forward);
3256   defsubr (&Sposix_search_backward);
3257   defsubr (&Sreplace_match);
3258   defsubr (&Smatch_beginning);
3259   defsubr (&Smatch_end);
3260   defsubr (&Smatch_data);
3261   defsubr (&Sset_match_data);
3262   defsubr (&Sregexp_quote);
3263 }
3264
3265 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3266    (do not change this comment) */