src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs P_ ((EMACS_INT, EMACS_INT));
 101 static void save_search_regs P_ ((void));
 102 static EMACS_INT simple_search P_ ((int, unsigned char *, int, int,
 103                                     Lisp_Object, EMACS_INT, EMACS_INT,
 104                                     EMACS_INT, EMACS_INT));
 105 static EMACS_INT boyer_moore P_ ((int, unsigned char *, int, int,
 106                                   Lisp_Object, Lisp_Object,
 107                                   EMACS_INT, EMACS_INT,
 108                                   EMACS_INT, EMACS_INT, int));
 109 static EMACS_INT search_buffer P_ ((Lisp_Object, EMACS_INT, EMACS_INT,
 110                                     EMACS_INT, EMACS_INT, int, int,
 111                                     Lisp_Object, Lisp_Object, int));
 112 static void matcher_overflow () NO_RETURN;
 113
 114 static void
 115 matcher_overflow ()
 116 {
 117   error ("Stack overflow in regexp matcher");
 118 }
 119
 120 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 121    PATTERN is the pattern to compile.
 122    CP is the place to put the result.
 123    TRANSLATE is a translation table for ignoring case, or nil for none.
 124    REGP is the structure that says where to store the "register"
 125    values that will result from matching this pattern.
 126    If it is 0, we should compile the pattern not to record any
 127    subexpression bounds.
 128    POSIX is nonzero if we want full backtracking (POSIX style)
 129    for this pattern.  0 means backtrack only enough to get a valid match.
 130
 131    The behavior also depends on Vsearch_spaces_regexp.  */
 132
 133 static void
 134 compile_pattern_1 (cp, pattern, translate, regp, posix)
 135      struct regexp_cache *cp;
 136      Lisp_Object pattern;
 137      Lisp_Object translate;
 138      struct re_registers *regp;
 139      int posix;
 140 {
 141   char *val;
 142   reg_syntax_t old;
 143
 144   cp->regexp = Qnil;
 145   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 146   cp->posix = posix;
 147   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 148   cp->buf.charset_unibyte = charset_unibyte;
 149   if (STRINGP (Vsearch_spaces_regexp))
 150     cp->whitespace_regexp = Vsearch_spaces_regexp;
 151   else
 152     cp->whitespace_regexp = Qnil;
 153
 154   /* rms: I think BLOCK_INPUT is not needed here any more,
 155      because regex.c defines malloc to call xmalloc.
 156      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 157      So let's turn it off.  */
 158   /*  BLOCK_INPUT;  */
 159   old = re_set_syntax (RE_SYNTAX_EMACS
 160                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 161
 162   if (STRINGP (Vsearch_spaces_regexp))
 163     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 164   else
 165     re_set_whitespace_regexp (NULL);
 166
 167   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 168                                      SBYTES (pattern), &cp->buf);
 169
 170   /* If the compiled pattern hard codes some of the contents of the
 171      syntax-table, it can only be reused with *this* syntax table.  */
 172   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 173
 174   re_set_whitespace_regexp (NULL);
 175
 176   re_set_syntax (old);
 177   /* UNBLOCK_INPUT;  */
 178   if (val)
 179     xsignal1 (Qinvalid_regexp, build_string (val));
 180
 181   cp->regexp = Fcopy_sequence (pattern);
 182 }
 183
 184 /* Shrink each compiled regexp buffer in the cache
 185    to the size actually used right now.
 186    This is called from garbage collection.  */
 187
 188 void
 189 shrink_regexp_cache ()
 190 {
 191   struct regexp_cache *cp;
 192
 193   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 194     {
 195       cp->buf.allocated = cp->buf.used;
 196       cp->buf.buffer
 197         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 198     }
 199 }
 200
 201 /* Clear the regexp cache w.r.t. a particular syntax table,
 202    because it was changed.
 203    There is no danger of memory leak here because re_compile_pattern
 204    automagically manages the memory in each re_pattern_buffer struct,
 205    based on its `allocated' and `buffer' values.  */
 206 void
 207 clear_regexp_cache ()
 208 {
 209   int i;
 210
 211   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 212     /* It's tempting to compare with the syntax-table we've actually changd,
 213        but it's not sufficient because char-table inheritance mewans that
 214        modifying one syntax-table can change others at the same time.  */
 215     if (!EQ (searchbufs[i].syntax_table, Qt))
 216       searchbufs[i].regexp = Qnil;
 217 }
 218
 219 /* Compile a regexp if necessary, but first check to see if there's one in
 220    the cache.
 221    PATTERN is the pattern to compile.
 222    TRANSLATE is a translation table for ignoring case, or nil for none.
 223    REGP is the structure that says where to store the "register"
 224    values that will result from matching this pattern.
 225    If it is 0, we should compile the pattern not to record any
 226    subexpression bounds.
 227    POSIX is nonzero if we want full backtracking (POSIX style)
 228    for this pattern.  0 means backtrack only enough to get a valid match.  */
 229
 230 struct re_pattern_buffer *
 231 compile_pattern (pattern, regp, translate, posix, multibyte)
 232      Lisp_Object pattern;
 233      struct re_registers *regp;
 234      Lisp_Object translate;
 235      int posix, multibyte;
 236 {
 237   struct regexp_cache *cp, **cpp;
 238
 239   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 240     {
 241       cp = *cpp;
 242       /* Entries are initialized to nil, and may be set to nil by
 243          compile_pattern_1 if the pattern isn't valid.  Don't apply
 244          string accessors in those cases.  However, compile_pattern_1
 245          is only applied to the cache entry we pick here to reuse.  So
 246          nil should never appear before a non-nil entry.  */
 247       if (NILP (cp->regexp))
 248         goto compile_it;
 249       if (SCHARS (cp->regexp) == SCHARS (pattern)
 250           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 251           && !NILP (Fstring_equal (cp->regexp, pattern))
 252           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 253           && cp->posix == posix
 254           && (EQ (cp->syntax_table, Qt)
 255               || EQ (cp->syntax_table, current_buffer->syntax_table))
 256           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 257           && cp->buf.charset_unibyte == charset_unibyte)
 258         break;
 259
 260       /* If we're at the end of the cache, compile into the nil cell
 261          we found, or the last (least recently used) cell with a
 262          string value.  */
 263       if (cp->next == 0)
 264         {
 265         compile_it:
 266           compile_pattern_1 (cp, pattern, translate, regp, posix);
 267           break;
 268         }
 269     }
 270
 271   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 272      either because we found it in the cache or because we just compiled it.
 273      Move it to the front of the queue to mark it as most recently used.  */
 274   *cpp = cp->next;
 275   cp->next = searchbuf_head;
 276   searchbuf_head = cp;
 277
 278   /* Advise the searching functions about the space we have allocated
 279      for register data.  */
 280   if (regp)
 281     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 282
 283   /* The compiled pattern can be used both for mulitbyte and unibyte
 284      target.  But, we have to tell which the pattern is used for. */
 285   cp->buf.target_multibyte = multibyte;
 286
 287   return &cp->buf;
 288 }
 289
 290 \f
 291 static Lisp_Object
 292 looking_at_1 (string, posix)
 293      Lisp_Object string;
 294      int posix;
 295 {
 296   Lisp_Object val;
 297   unsigned char *p1, *p2;
 298   EMACS_INT s1, s2;
 299   register int i;
 300   struct re_pattern_buffer *bufp;
 301
 302   if (running_asynch_code)
 303     save_search_regs ();
 304
 305   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 306   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 307     = current_buffer->case_eqv_table;
 308
 309   CHECK_STRING (string);
 310   bufp = compile_pattern (string,
 311                           (NILP (Vinhibit_changing_match_data)
 312                            ? &search_regs : NULL),
 313                           (!NILP (current_buffer->case_fold_search)
 314                            ? current_buffer->case_canon_table : Qnil),
 315                           posix,
 316                           !NILP (current_buffer->enable_multibyte_characters));
 317
 318   immediate_quit = 1;
 319   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 320
 321   /* Get pointers and sizes of the two strings
 322      that make up the visible portion of the buffer. */
 323
 324   p1 = BEGV_ADDR;
 325   s1 = GPT_BYTE - BEGV_BYTE;
 326   p2 = GAP_END_ADDR;
 327   s2 = ZV_BYTE - GPT_BYTE;
 328   if (s1 < 0)
 329     {
 330       p2 = p1;
 331       s2 = ZV_BYTE - BEGV_BYTE;
 332       s1 = 0;
 333     }
 334   if (s2 < 0)
 335     {
 336       s1 = ZV_BYTE - BEGV_BYTE;
 337       s2 = 0;
 338     }
 339
 340   re_match_object = Qnil;
 341
 342   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 343                   PT_BYTE - BEGV_BYTE,
 344                   (NILP (Vinhibit_changing_match_data)
 345                    ? &search_regs : NULL),
 346                   ZV_BYTE - BEGV_BYTE);
 347   immediate_quit = 0;
 348
 349   if (i == -2)
 350     matcher_overflow ();
 351
 352   val = (0 <= i ? Qt : Qnil);
 353   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 354     for (i = 0; i < search_regs.num_regs; i++)
 355       if (search_regs.start[i] >= 0)
 356         {
 357           search_regs.start[i]
 358             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 359           search_regs.end[i]
 360             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 361         }
 362
 363   /* Set last_thing_searched only when match data is changed.  */
 364   if (NILP (Vinhibit_changing_match_data))
 365     XSETBUFFER (last_thing_searched, current_buffer);
 366
 367   return val;
 368 }
 369
 370 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 371        doc: /* Return t if text after point matches regular expression REGEXP.
 372 This function modifies the match data that `match-beginning',
 373 `match-end' and `match-data' access; save and restore the match
 374 data if you want to preserve them.  */)
 375      (regexp)
 376      Lisp_Object regexp;
 377 {
 378   return looking_at_1 (regexp, 0);
 379 }
 380
 381 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 382        doc: /* Return t if text after point matches regular expression REGEXP.
 383 Find the longest match, in accord with Posix regular expression rules.
 384 This function modifies the match data that `match-beginning',
 385 `match-end' and `match-data' access; save and restore the match
 386 data if you want to preserve them.  */)
 387      (regexp)
 388      Lisp_Object regexp;
 389 {
 390   return looking_at_1 (regexp, 1);
 391 }
 392 \f
 393 static Lisp_Object
 394 string_match_1 (regexp, string, start, posix)
 395      Lisp_Object regexp, string, start;
 396      int posix;
 397 {
 398   int val;
 399   struct re_pattern_buffer *bufp;
 400   EMACS_INT pos, pos_byte;
 401   int i;
 402
 403   if (running_asynch_code)
 404     save_search_regs ();
 405
 406   CHECK_STRING (regexp);
 407   CHECK_STRING (string);
 408
 409   if (NILP (start))
 410     pos = 0, pos_byte = 0;
 411   else
 412     {
 413       int len = SCHARS (string);
 414
 415       CHECK_NUMBER (start);
 416       pos = XINT (start);
 417       if (pos < 0 && -pos <= len)
 418         pos = len + pos;
 419       else if (0 > pos || pos > len)
 420         args_out_of_range (string, start);
 421       pos_byte = string_char_to_byte (string, pos);
 422     }
 423
 424   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 425   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 426     = current_buffer->case_eqv_table;
 427
 428   bufp = compile_pattern (regexp,
 429                           (NILP (Vinhibit_changing_match_data)
 430                            ? &search_regs : NULL),
 431                           (!NILP (current_buffer->case_fold_search)
 432                            ? current_buffer->case_canon_table : Qnil),
 433                           posix,
 434                           STRING_MULTIBYTE (string));
 435   immediate_quit = 1;
 436   re_match_object = string;
 437
 438   val = re_search (bufp, (char *) SDATA (string),
 439                    SBYTES (string), pos_byte,
 440                    SBYTES (string) - pos_byte,
 441                    (NILP (Vinhibit_changing_match_data)
 442                     ? &search_regs : NULL));
 443   immediate_quit = 0;
 444
 445   /* Set last_thing_searched only when match data is changed.  */
 446   if (NILP (Vinhibit_changing_match_data))
 447     last_thing_searched = Qt;
 448
 449   if (val == -2)
 450     matcher_overflow ();
 451   if (val < 0) return Qnil;
 452
 453   if (NILP (Vinhibit_changing_match_data))
 454     for (i = 0; i < search_regs.num_regs; i++)
 455       if (search_regs.start[i] >= 0)
 456         {
 457           search_regs.start[i]
 458             = string_byte_to_char (string, search_regs.start[i]);
 459           search_regs.end[i]
 460             = string_byte_to_char (string, search_regs.end[i]);
 461         }
 462
 463   return make_number (string_byte_to_char (string, val));
 464 }
 465
 466 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 467        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 468 Matching ignores case if `case-fold-search' is non-nil.
 469 If third arg START is non-nil, start search at that index in STRING.
 470 For index of first char beyond the match, do (match-end 0).
 471 `match-end' and `match-beginning' also give indices of substrings
 472 matched by parenthesis constructs in the pattern.
 473
 474 You can use the function `match-string' to extract the substrings
 475 matched by the parenthesis constructions in REGEXP. */)
 476      (regexp, string, start)
 477      Lisp_Object regexp, string, start;
 478 {
 479   return string_match_1 (regexp, string, start, 0);
 480 }
 481
 482 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 483        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 484 Find the longest match, in accord with Posix regular expression rules.
 485 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 486 If third arg START is non-nil, start search at that index in STRING.
 487 For index of first char beyond the match, do (match-end 0).
 488 `match-end' and `match-beginning' also give indices of substrings
 489 matched by parenthesis constructs in the pattern.  */)
 490      (regexp, string, start)
 491      Lisp_Object regexp, string, start;
 492 {
 493   return string_match_1 (regexp, string, start, 1);
 494 }
 495
 496 /* Match REGEXP against STRING, searching all of STRING,
 497    and return the index of the match, or negative on failure.
 498    This does not clobber the match data.  */
 499
 500 int
 501 fast_string_match (regexp, string)
 502      Lisp_Object regexp, string;
 503 {
 504   int val;
 505   struct re_pattern_buffer *bufp;
 506
 507   bufp = compile_pattern (regexp, 0, Qnil,
 508                           0, STRING_MULTIBYTE (string));
 509   immediate_quit = 1;
 510   re_match_object = string;
 511
 512   val = re_search (bufp, (char *) SDATA (string),
 513                    SBYTES (string), 0,
 514                    SBYTES (string), 0);
 515   immediate_quit = 0;
 516   return val;
 517 }
 518
 519 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 520    and return the index of the match, or negative on failure.
 521    This does not clobber the match data.
 522    We assume that STRING contains single-byte characters.  */
 523
 524 extern Lisp_Object Vascii_downcase_table;
 525
 526 int
 527 fast_c_string_match_ignore_case (regexp, string)
 528      Lisp_Object regexp;
 529      const char *string;
 530 {
 531   int val;
 532   struct re_pattern_buffer *bufp;
 533   int len = strlen (string);
 534
 535   regexp = string_make_unibyte (regexp);
 536   re_match_object = Qt;
 537   bufp = compile_pattern (regexp, 0,
 538                           Vascii_canon_table, 0,
 539                           0);
 540   immediate_quit = 1;
 541   val = re_search (bufp, string, len, 0, len, 0);
 542   immediate_quit = 0;
 543   return val;
 544 }
 545
 546 /* Like fast_string_match but ignore case.  */
 547
 548 int
 549 fast_string_match_ignore_case (regexp, string)
 550      Lisp_Object regexp, string;
 551 {
 552   int val;
 553   struct re_pattern_buffer *bufp;
 554
 555   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 556                           0, STRING_MULTIBYTE (string));
 557   immediate_quit = 1;
 558   re_match_object = string;
 559
 560   val = re_search (bufp, (char *) SDATA (string),
 561                    SBYTES (string), 0,
 562                    SBYTES (string), 0);
 563   immediate_quit = 0;
 564   return val;
 565 }
 566 \f
 567 /* Match REGEXP atainst the characters after POS to LIMIT, and return
 568    the number of matched characters.  If STRING is non-nil, match
 569    against the characters in it.  In that case, POS and LIMIT are
 570    indices into the string.  This function doesn't modify the match
 571    data.  */
 572
 573 EMACS_INT
 574 fast_looking_at (regexp, pos, pos_byte, limit, limit_byte, string)
 575      Lisp_Object regexp;
 576      EMACS_INT pos, pos_byte, limit, limit_byte;
 577      Lisp_Object string;
 578 {
 579   int multibyte;
 580   struct re_pattern_buffer *buf;
 581   unsigned char *p1, *p2;
 582   EMACS_INT s1, s2;
 583   EMACS_INT len;
 584
 585   if (STRINGP (string))
 586     {
 587       if (pos_byte < 0)
 588         pos_byte = string_char_to_byte (string, pos);
 589       if (limit_byte < 0)
 590         limit_byte = string_char_to_byte (string, limit);
 591       p1 = NULL;
 592       s1 = 0;
 593       p2 = SDATA (string);
 594       s2 = SBYTES (string);
 595       re_match_object = string;
 596       multibyte = STRING_MULTIBYTE (string);
 597     }
 598   else
 599     {
 600       if (pos_byte < 0)
 601         pos_byte = CHAR_TO_BYTE (pos);
 602       if (limit_byte < 0)
 603         limit_byte = CHAR_TO_BYTE (limit);
 604       pos_byte -= BEGV_BYTE;
 605       limit_byte -= BEGV_BYTE;
 606       p1 = BEGV_ADDR;
 607       s1 = GPT_BYTE - BEGV_BYTE;
 608       p2 = GAP_END_ADDR;
 609       s2 = ZV_BYTE - GPT_BYTE;
 610       if (s1 < 0)
 611         {
 612           p2 = p1;
 613           s2 = ZV_BYTE - BEGV_BYTE;
 614           s1 = 0;
 615         }
 616       if (s2 < 0)
 617         {
 618           s1 = ZV_BYTE - BEGV_BYTE;
 619           s2 = 0;
 620         }
 621       re_match_object = Qnil;
 622       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 623     }
 624
 625   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 626   immediate_quit = 1;
 627   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 628                     pos_byte, NULL, limit_byte);
 629   immediate_quit = 0;
 630
 631   return len;
 632 }
 633
 634 \f
 635 /* The newline cache: remembering which sections of text have no newlines.  */
 636
 637 /* If the user has requested newline caching, make sure it's on.
 638    Otherwise, make sure it's off.
 639    This is our cheezy way of associating an action with the change of
 640    state of a buffer-local variable.  */
 641 static void
 642 newline_cache_on_off (buf)
 643      struct buffer *buf;
 644 {
 645   if (NILP (buf->cache_long_line_scans))
 646     {
 647       /* It should be off.  */
 648       if (buf->newline_cache)
 649         {
 650           free_region_cache (buf->newline_cache);
 651           buf->newline_cache = 0;
 652         }
 653     }
 654   else
 655     {
 656       /* It should be on.  */
 657       if (buf->newline_cache == 0)
 658         buf->newline_cache = new_region_cache ();
 659     }
 660 }
 661
 662 \f
 663 /* Search for COUNT instances of the character TARGET between START and END.
 664
 665    If COUNT is positive, search forwards; END must be >= START.
 666    If COUNT is negative, search backwards for the -COUNTth instance;
 667       END must be <= START.
 668    If COUNT is zero, do anything you please; run rogue, for all I care.
 669
 670    If END is zero, use BEGV or ZV instead, as appropriate for the
 671    direction indicated by COUNT.
 672
 673    If we find COUNT instances, set *SHORTAGE to zero, and return the
 674    position past the COUNTth match.  Note that for reverse motion
 675    this is not the same as the usual convention for Emacs motion commands.
 676
 677    If we don't find COUNT instances before reaching END, set *SHORTAGE
 678    to the number of TARGETs left unfound, and return END.
 679
 680    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 681    except when inside redisplay.  */
 682
 683 int
 684 scan_buffer (target, start, end, count, shortage, allow_quit)
 685      register int target;
 686      EMACS_INT start, end;
 687      int count;
 688      int *shortage;
 689      int allow_quit;
 690 {
 691   struct region_cache *newline_cache;
 692   int direction;
 693
 694   if (count > 0)
 695     {
 696       direction = 1;
 697       if (! end) end = ZV;
 698     }
 699   else
 700     {
 701       direction = -1;
 702       if (! end) end = BEGV;
 703     }
 704
 705   newline_cache_on_off (current_buffer);
 706   newline_cache = current_buffer->newline_cache;
 707
 708   if (shortage != 0)
 709     *shortage = 0;
 710
 711   immediate_quit = allow_quit;
 712
 713   if (count > 0)
 714     while (start != end)
 715       {
 716         /* Our innermost scanning loop is very simple; it doesn't know
 717            about gaps, buffer ends, or the newline cache.  ceiling is
 718            the position of the last character before the next such
 719            obstacle --- the last character the dumb search loop should
 720            examine.  */
 721         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 722         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 723         EMACS_INT tem;
 724
 725         /* If we're looking for a newline, consult the newline cache
 726            to see where we can avoid some scanning.  */
 727         if (target == '\n' && newline_cache)
 728           {
 729             int next_change;
 730             immediate_quit = 0;
 731             while (region_cache_forward
 732                    (current_buffer, newline_cache, start_byte, &next_change))
 733               start_byte = next_change;
 734             immediate_quit = allow_quit;
 735
 736             /* START should never be after END.  */
 737             if (start_byte > ceiling_byte)
 738               start_byte = ceiling_byte;
 739
 740             /* Now the text after start is an unknown region, and
 741                next_change is the position of the next known region. */
 742             ceiling_byte = min (next_change - 1, ceiling_byte);
 743           }
 744
 745         /* The dumb loop can only scan text stored in contiguous
 746            bytes. BUFFER_CEILING_OF returns the last character
 747            position that is contiguous, so the ceiling is the
 748            position after that.  */
 749         tem = BUFFER_CEILING_OF (start_byte);
 750         ceiling_byte = min (tem, ceiling_byte);
 751
 752         {
 753           /* The termination address of the dumb loop.  */
 754           register unsigned char *ceiling_addr
 755             = BYTE_POS_ADDR (ceiling_byte) + 1;
 756           register unsigned char *cursor
 757             = BYTE_POS_ADDR (start_byte);
 758           unsigned char *base = cursor;
 759
 760           while (cursor < ceiling_addr)
 761             {
 762               unsigned char *scan_start = cursor;
 763
 764               /* The dumb loop.  */
 765               while (*cursor != target && ++cursor < ceiling_addr)
 766                 ;
 767
 768               /* If we're looking for newlines, cache the fact that
 769                  the region from start to cursor is free of them. */
 770               if (target == '\n' && newline_cache)
 771                 know_region_cache (current_buffer, newline_cache,
 772                                    start_byte + scan_start - base,
 773                                    start_byte + cursor - base);
 774
 775               /* Did we find the target character?  */
 776               if (cursor < ceiling_addr)
 777                 {
 778                   if (--count == 0)
 779                     {
 780                       immediate_quit = 0;
 781                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 782                     }
 783                   cursor++;
 784                 }
 785             }
 786
 787           start = BYTE_TO_CHAR (start_byte + cursor - base);
 788         }
 789       }
 790   else
 791     while (start > end)
 792       {
 793         /* The last character to check before the next obstacle.  */
 794         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 795         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 796         EMACS_INT tem;
 797
 798         /* Consult the newline cache, if appropriate.  */
 799         if (target == '\n' && newline_cache)
 800           {
 801             int next_change;
 802             immediate_quit = 0;
 803             while (region_cache_backward
 804                    (current_buffer, newline_cache, start_byte, &next_change))
 805               start_byte = next_change;
 806             immediate_quit = allow_quit;
 807
 808             /* Start should never be at or before end.  */
 809             if (start_byte <= ceiling_byte)
 810               start_byte = ceiling_byte + 1;
 811
 812             /* Now the text before start is an unknown region, and
 813                next_change is the position of the next known region. */
 814             ceiling_byte = max (next_change, ceiling_byte);
 815           }
 816
 817         /* Stop scanning before the gap.  */
 818         tem = BUFFER_FLOOR_OF (start_byte - 1);
 819         ceiling_byte = max (tem, ceiling_byte);
 820
 821         {
 822           /* The termination address of the dumb loop.  */
 823           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 824           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 825           unsigned char *base = cursor;
 826
 827           while (cursor >= ceiling_addr)
 828             {
 829               unsigned char *scan_start = cursor;
 830
 831               while (*cursor != target && --cursor >= ceiling_addr)
 832                 ;
 833
 834               /* If we're looking for newlines, cache the fact that
 835                  the region from after the cursor to start is free of them.  */
 836               if (target == '\n' && newline_cache)
 837                 know_region_cache (current_buffer, newline_cache,
 838                                    start_byte + cursor - base,
 839                                    start_byte + scan_start - base);
 840
 841               /* Did we find the target character?  */
 842               if (cursor >= ceiling_addr)
 843                 {
 844                   if (++count >= 0)
 845                     {
 846                       immediate_quit = 0;
 847                       return BYTE_TO_CHAR (start_byte + cursor - base);
 848                     }
 849                   cursor--;
 850                 }
 851             }
 852
 853           start = BYTE_TO_CHAR (start_byte + cursor - base);
 854         }
 855       }
 856
 857   immediate_quit = 0;
 858   if (shortage != 0)
 859     *shortage = count * direction;
 860   return start;
 861 }
 862 \f
 863 /* Search for COUNT instances of a line boundary, which means either a
 864    newline or (if selective display enabled) a carriage return.
 865    Start at START.  If COUNT is negative, search backwards.
 866
 867    We report the resulting position by calling TEMP_SET_PT_BOTH.
 868
 869    If we find COUNT instances. we position after (always after,
 870    even if scanning backwards) the COUNTth match, and return 0.
 871
 872    If we don't find COUNT instances before reaching the end of the
 873    buffer (or the beginning, if scanning backwards), we return
 874    the number of line boundaries left unfound, and position at
 875    the limit we bumped up against.
 876
 877    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 878    except in special cases.  */
 879
 880 int
 881 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 882      EMACS_INT start, start_byte;
 883      EMACS_INT limit, limit_byte;
 884      register int count;
 885      int allow_quit;
 886 {
 887   int direction = ((count > 0) ? 1 : -1);
 888
 889   register unsigned char *cursor;
 890   unsigned char *base;
 891
 892   EMACS_INT ceiling;
 893   register unsigned char *ceiling_addr;
 894
 895   int old_immediate_quit = immediate_quit;
 896
 897   /* The code that follows is like scan_buffer
 898      but checks for either newline or carriage return.  */
 899
 900   if (allow_quit)
 901     immediate_quit++;
 902
 903   start_byte = CHAR_TO_BYTE (start);
 904
 905   if (count > 0)
 906     {
 907       while (start_byte < limit_byte)
 908         {
 909           ceiling =  BUFFER_CEILING_OF (start_byte);
 910           ceiling = min (limit_byte - 1, ceiling);
 911           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 912           base = (cursor = BYTE_POS_ADDR (start_byte));
 913           while (1)
 914             {
 915               while (*cursor != '\n' && ++cursor != ceiling_addr)
 916                 ;
 917
 918               if (cursor != ceiling_addr)
 919                 {
 920                   if (--count == 0)
 921                     {
 922                       immediate_quit = old_immediate_quit;
 923                       start_byte = start_byte + cursor - base + 1;
 924                       start = BYTE_TO_CHAR (start_byte);
 925                       TEMP_SET_PT_BOTH (start, start_byte);
 926                       return 0;
 927                     }
 928                   else
 929                     if (++cursor == ceiling_addr)
 930                       break;
 931                 }
 932               else
 933                 break;
 934             }
 935           start_byte += cursor - base;
 936         }
 937     }
 938   else
 939     {
 940       while (start_byte > limit_byte)
 941         {
 942           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 943           ceiling = max (limit_byte, ceiling);
 944           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 945           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 946           while (1)
 947             {
 948               while (--cursor != ceiling_addr && *cursor != '\n')
 949                 ;
 950
 951               if (cursor != ceiling_addr)
 952                 {
 953                   if (++count == 0)
 954                     {
 955                       immediate_quit = old_immediate_quit;
 956                       /* Return the position AFTER the match we found.  */
 957                       start_byte = start_byte + cursor - base + 1;
 958                       start = BYTE_TO_CHAR (start_byte);
 959                       TEMP_SET_PT_BOTH (start, start_byte);
 960                       return 0;
 961                     }
 962                 }
 963               else
 964                 break;
 965             }
 966           /* Here we add 1 to compensate for the last decrement
 967              of CURSOR, which took it past the valid range.  */
 968           start_byte += cursor - base + 1;
 969         }
 970     }
 971
 972   TEMP_SET_PT_BOTH (limit, limit_byte);
 973   immediate_quit = old_immediate_quit;
 974
 975   return count * direction;
 976 }
 977
 978 int
 979 find_next_newline_no_quit (from, cnt)
 980      EMACS_INT from;
 981      int cnt;
 982 {
 983   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 984 }
 985
 986 /* Like find_next_newline, but returns position before the newline,
 987    not after, and only search up to TO.  This isn't just
 988    find_next_newline (...)-1, because you might hit TO.  */
 989
 990 int
 991 find_before_next_newline (from, to, cnt)
 992      EMACS_INT from, to;
 993      int cnt;
 994 {
 995   int shortage;
 996   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 997
 998   if (shortage == 0)
 999     pos--;
1000
1001   return pos;
1002 }
1003 \f
1004 /* Subroutines of Lisp buffer search functions. */
1005
1006 static Lisp_Object
1007 search_command (string, bound, noerror, count, direction, RE, posix)
1008      Lisp_Object string, bound, noerror, count;
1009      int direction;
1010      int RE;
1011      int posix;
1012 {
1013   register int np;
1014   int lim, lim_byte;
1015   int n = direction;
1016
1017   if (!NILP (count))
1018     {
1019       CHECK_NUMBER (count);
1020       n *= XINT (count);
1021     }
1022
1023   CHECK_STRING (string);
1024   if (NILP (bound))
1025     {
1026       if (n > 0)
1027         lim = ZV, lim_byte = ZV_BYTE;
1028       else
1029         lim = BEGV, lim_byte = BEGV_BYTE;
1030     }
1031   else
1032     {
1033       CHECK_NUMBER_COERCE_MARKER (bound);
1034       lim = XINT (bound);
1035       if (n > 0 ? lim < PT : lim > PT)
1036         error ("Invalid search bound (wrong side of point)");
1037       if (lim > ZV)
1038         lim = ZV, lim_byte = ZV_BYTE;
1039       else if (lim < BEGV)
1040         lim = BEGV, lim_byte = BEGV_BYTE;
1041       else
1042         lim_byte = CHAR_TO_BYTE (lim);
1043     }
1044
1045   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1046   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1047     = current_buffer->case_eqv_table;
1048
1049   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1050                       (!NILP (current_buffer->case_fold_search)
1051                        ? current_buffer->case_canon_table
1052                        : Qnil),
1053                       (!NILP (current_buffer->case_fold_search)
1054                        ? current_buffer->case_eqv_table
1055                        : Qnil),
1056                       posix);
1057   if (np <= 0)
1058     {
1059       if (NILP (noerror))
1060         xsignal1 (Qsearch_failed, string);
1061
1062       if (!EQ (noerror, Qt))
1063         {
1064           if (lim < BEGV || lim > ZV)
1065             abort ();
1066           SET_PT_BOTH (lim, lim_byte);
1067           return Qnil;
1068 #if 0 /* This would be clean, but maybe programs depend on
1069          a value of nil here.  */
1070           np = lim;
1071 #endif
1072         }
1073       else
1074         return Qnil;
1075     }
1076
1077   if (np < BEGV || np > ZV)
1078     abort ();
1079
1080   SET_PT (np);
1081
1082   return make_number (np);
1083 }
1084 \f
1085 /* Return 1 if REGEXP it matches just one constant string.  */
1086
1087 static int
1088 trivial_regexp_p (regexp)
1089      Lisp_Object regexp;
1090 {
1091   int len = SBYTES (regexp);
1092   unsigned char *s = SDATA (regexp);
1093   while (--len >= 0)
1094     {
1095       switch (*s++)
1096         {
1097         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1098           return 0;
1099         case '\\':
1100           if (--len < 0)
1101             return 0;
1102           switch (*s++)
1103             {
1104             case '|': case '(': case ')': case '`': case '\'': case 'b':
1105             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1106             case 'S': case '=': case '{': case '}': case '_':
1107             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1108             case '1': case '2': case '3': case '4': case '5':
1109             case '6': case '7': case '8': case '9':
1110               return 0;
1111             }
1112         }
1113     }
1114   return 1;
1115 }
1116
1117 /* Search for the n'th occurrence of STRING in the current buffer,
1118    starting at position POS and stopping at position LIM,
1119    treating STRING as a literal string if RE is false or as
1120    a regular expression if RE is true.
1121
1122    If N is positive, searching is forward and LIM must be greater than POS.
1123    If N is negative, searching is backward and LIM must be less than POS.
1124
1125    Returns -x if x occurrences remain to be found (x > 0),
1126    or else the position at the beginning of the Nth occurrence
1127    (if searching backward) or the end (if searching forward).
1128
1129    POSIX is nonzero if we want full backtracking (POSIX style)
1130    for this pattern.  0 means backtrack only enough to get a valid match.  */
1131
1132 #define TRANSLATE(out, trt, d)                  \
1133 do                                              \
1134   {                                             \
1135     if (! NILP (trt))                           \
1136       {                                         \
1137         Lisp_Object temp;                       \
1138         temp = Faref (trt, make_number (d));    \
1139         if (INTEGERP (temp))                    \
1140           out = XINT (temp);                    \
1141         else                                    \
1142           out = d;                              \
1143       }                                         \
1144     else                                        \
1145       out = d;                                  \
1146   }                                             \
1147 while (0)
1148
1149 /* Only used in search_buffer, to record the end position of the match
1150    when searching regexps and SEARCH_REGS should not be changed
1151    (i.e. Vinhibit_changing_match_data is non-nil).  */
1152 static struct re_registers search_regs_1;
1153
1154 static EMACS_INT
1155 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1156                RE, trt, inverse_trt, posix)
1157      Lisp_Object string;
1158      EMACS_INT pos;
1159      EMACS_INT pos_byte;
1160      EMACS_INT lim;
1161      EMACS_INT lim_byte;
1162      int n;
1163      int RE;
1164      Lisp_Object trt;
1165      Lisp_Object inverse_trt;
1166      int posix;
1167 {
1168   int len = SCHARS (string);
1169   int len_byte = SBYTES (string);
1170   register int i;
1171
1172   if (running_asynch_code)
1173     save_search_regs ();
1174
1175   /* Searching 0 times means don't move.  */
1176   /* Null string is found at starting position.  */
1177   if (len == 0 || n == 0)
1178     {
1179       set_search_regs (pos_byte, 0);
1180       return pos;
1181     }
1182
1183   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1184     {
1185       unsigned char *p1, *p2;
1186       int s1, s2;
1187       struct re_pattern_buffer *bufp;
1188
1189       bufp = compile_pattern (string,
1190                               (NILP (Vinhibit_changing_match_data)
1191                                ? &search_regs : &search_regs_1),
1192                               trt, posix,
1193                               !NILP (current_buffer->enable_multibyte_characters));
1194
1195       immediate_quit = 1;       /* Quit immediately if user types ^G,
1196                                    because letting this function finish
1197                                    can take too long. */
1198       QUIT;                     /* Do a pending quit right away,
1199                                    to avoid paradoxical behavior */
1200       /* Get pointers and sizes of the two strings
1201          that make up the visible portion of the buffer. */
1202
1203       p1 = BEGV_ADDR;
1204       s1 = GPT_BYTE - BEGV_BYTE;
1205       p2 = GAP_END_ADDR;
1206       s2 = ZV_BYTE - GPT_BYTE;
1207       if (s1 < 0)
1208         {
1209           p2 = p1;
1210           s2 = ZV_BYTE - BEGV_BYTE;
1211           s1 = 0;
1212         }
1213       if (s2 < 0)
1214         {
1215           s1 = ZV_BYTE - BEGV_BYTE;
1216           s2 = 0;
1217         }
1218       re_match_object = Qnil;
1219
1220       while (n < 0)
1221         {
1222           int val;
1223           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1224                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1225                              (NILP (Vinhibit_changing_match_data)
1226                               ? &search_regs : &search_regs_1),
1227                              /* Don't allow match past current point */
1228                              pos_byte - BEGV_BYTE);
1229           if (val == -2)
1230             {
1231               matcher_overflow ();
1232             }
1233           if (val >= 0)
1234             {
1235               if (NILP (Vinhibit_changing_match_data))
1236                 {
1237                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1238                   for (i = 0; i < search_regs.num_regs; i++)
1239                     if (search_regs.start[i] >= 0)
1240                       {
1241                         search_regs.start[i]
1242                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1243                         search_regs.end[i]
1244                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1245                       }
1246                   XSETBUFFER (last_thing_searched, current_buffer);
1247                   /* Set pos to the new position. */
1248                   pos = search_regs.start[0];
1249                 }
1250               else
1251                 {
1252                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1253                   /* Set pos to the new position.  */
1254                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1255                 }
1256             }
1257           else
1258             {
1259               immediate_quit = 0;
1260               return (n);
1261             }
1262           n++;
1263         }
1264       while (n > 0)
1265         {
1266           int val;
1267           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1268                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1269                              (NILP (Vinhibit_changing_match_data)
1270                               ? &search_regs : &search_regs_1),
1271                              lim_byte - BEGV_BYTE);
1272           if (val == -2)
1273             {
1274               matcher_overflow ();
1275             }
1276           if (val >= 0)
1277             {
1278               if (NILP (Vinhibit_changing_match_data))
1279                 {
1280                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1281                   for (i = 0; i < search_regs.num_regs; i++)
1282                     if (search_regs.start[i] >= 0)
1283                       {
1284                         search_regs.start[i]
1285                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1286                         search_regs.end[i]
1287                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1288                       }
1289                   XSETBUFFER (last_thing_searched, current_buffer);
1290                   pos = search_regs.end[0];
1291                 }
1292               else
1293                 {
1294                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1295                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1296                 }
1297             }
1298           else
1299             {
1300               immediate_quit = 0;
1301               return (0 - n);
1302             }
1303           n--;
1304         }
1305       immediate_quit = 0;
1306       return (pos);
1307     }
1308   else                          /* non-RE case */
1309     {
1310       unsigned char *raw_pattern, *pat;
1311       int raw_pattern_size;
1312       int raw_pattern_size_byte;
1313       unsigned char *patbuf;
1314       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1315       unsigned char *base_pat;
1316       /* Set to positive if we find a non-ASCII char that need
1317          translation.  Otherwise set to zero later.  */
1318       int char_base = -1;
1319       int boyer_moore_ok = 1;
1320
1321       /* MULTIBYTE says whether the text to be searched is multibyte.
1322          We must convert PATTERN to match that, or we will not really
1323          find things right.  */
1324
1325       if (multibyte == STRING_MULTIBYTE (string))
1326         {
1327           raw_pattern = (unsigned char *) SDATA (string);
1328           raw_pattern_size = SCHARS (string);
1329           raw_pattern_size_byte = SBYTES (string);
1330         }
1331       else if (multibyte)
1332         {
1333           raw_pattern_size = SCHARS (string);
1334           raw_pattern_size_byte
1335             = count_size_as_multibyte (SDATA (string),
1336                                        raw_pattern_size);
1337           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1338           copy_text (SDATA (string), raw_pattern,
1339                      SCHARS (string), 0, 1);
1340         }
1341       else
1342         {
1343           /* Converting multibyte to single-byte.
1344
1345              ??? Perhaps this conversion should be done in a special way
1346              by subtracting nonascii-insert-offset from each non-ASCII char,
1347              so that only the multibyte chars which really correspond to
1348              the chosen single-byte character set can possibly match.  */
1349           raw_pattern_size = SCHARS (string);
1350           raw_pattern_size_byte = SCHARS (string);
1351           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1352           copy_text (SDATA (string), raw_pattern,
1353                      SBYTES (string), 1, 0);
1354         }
1355
1356       /* Copy and optionally translate the pattern.  */
1357       len = raw_pattern_size;
1358       len_byte = raw_pattern_size_byte;
1359       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1360       pat = patbuf;
1361       base_pat = raw_pattern;
1362       if (multibyte)
1363         {
1364           /* Fill patbuf by translated characters in STRING while
1365              checking if we can use boyer-moore search.  If TRT is
1366              non-nil, we can use boyer-moore search only if TRT can be
1367              represented by the byte array of 256 elements.  For that,
1368              all non-ASCII case-equivalents of all case-senstive
1369              characters in STRING must belong to the same charset and
1370              row.  */
1371
1372           while (--len >= 0)
1373             {
1374               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1375               int c, translated, inverse;
1376               int in_charlen, charlen;
1377
1378               /* If we got here and the RE flag is set, it's because we're
1379                  dealing with a regexp known to be trivial, so the backslash
1380                  just quotes the next character.  */
1381               if (RE && *base_pat == '\\')
1382                 {
1383                   len--;
1384                   raw_pattern_size--;
1385                   len_byte--;
1386                   base_pat++;
1387                 }
1388
1389               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1390
1391               if (NILP (trt))
1392                 {
1393                   str = base_pat;
1394                   charlen = in_charlen;
1395                 }
1396               else
1397                 {
1398                   /* Translate the character.  */
1399                   TRANSLATE (translated, trt, c);
1400                   charlen = CHAR_STRING (translated, str_base);
1401                   str = str_base;
1402
1403                   /* Check if C has any other case-equivalents.  */
1404                   TRANSLATE (inverse, inverse_trt, c);
1405                   /* If so, check if we can use boyer-moore.  */
1406                   if (c != inverse && boyer_moore_ok)
1407                     {
1408                       /* Check if all equivalents belong to the same
1409                          group of characters.  Note that the check of C
1410                          itself is done by the last iteration.  */
1411                       int this_char_base = -1;
1412
1413                       while (boyer_moore_ok)
1414                         {
1415                           if (ASCII_BYTE_P (inverse))
1416                             {
1417                               if (this_char_base > 0)
1418                                 boyer_moore_ok = 0;
1419                               else
1420                                 this_char_base = 0;
1421                             }
1422                           else if (CHAR_BYTE8_P (inverse))
1423                             /* Boyer-moore search can't handle a
1424                                translation of an eight-bit
1425                                character.  */
1426                             boyer_moore_ok = 0;
1427                           else if (this_char_base < 0)
1428                             {
1429                               this_char_base = inverse & ~0x3F;
1430                               if (char_base < 0)
1431                                 char_base = this_char_base;
1432                               else if (this_char_base != char_base)
1433                                 boyer_moore_ok = 0;
1434                             }
1435                           else if ((inverse & ~0x3F) != this_char_base)
1436                             boyer_moore_ok = 0;
1437                           if (c == inverse)
1438                             break;
1439                           TRANSLATE (inverse, inverse_trt, inverse);
1440                         }
1441                     }
1442                 }
1443
1444               /* Store this character into the translated pattern.  */
1445               bcopy (str, pat, charlen);
1446               pat += charlen;
1447               base_pat += in_charlen;
1448               len_byte -= in_charlen;
1449             }
1450
1451           /* If char_base is still negative we didn't find any translated
1452              non-ASCII characters.  */
1453           if (char_base < 0)
1454             char_base = 0;
1455         }
1456       else
1457         {
1458           /* Unibyte buffer.  */
1459           char_base = 0;
1460           while (--len >= 0)
1461             {
1462               int c, translated;
1463
1464               /* If we got here and the RE flag is set, it's because we're
1465                  dealing with a regexp known to be trivial, so the backslash
1466                  just quotes the next character.  */
1467               if (RE && *base_pat == '\\')
1468                 {
1469                   len--;
1470                   raw_pattern_size--;
1471                   base_pat++;
1472                 }
1473               c = *base_pat++;
1474               TRANSLATE (translated, trt, c);
1475               *pat++ = translated;
1476             }
1477         }
1478
1479       len_byte = pat - patbuf;
1480       len = raw_pattern_size;
1481       pat = base_pat = patbuf;
1482
1483       if (boyer_moore_ok)
1484         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1485                             pos, pos_byte, lim, lim_byte,
1486                             char_base);
1487       else
1488         return simple_search (n, pat, len, len_byte, trt,
1489                               pos, pos_byte, lim, lim_byte);
1490     }
1491 }
1492 \f
1493 /* Do a simple string search N times for the string PAT,
1494    whose length is LEN/LEN_BYTE,
1495    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1496    TRT is the translation table.
1497
1498    Return the character position where the match is found.
1499    Otherwise, if M matches remained to be found, return -M.
1500
1501    This kind of search works regardless of what is in PAT and
1502    regardless of what is in TRT.  It is used in cases where
1503    boyer_moore cannot work.  */
1504
1505 static EMACS_INT
1506 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1507      int n;
1508      unsigned char *pat;
1509      int len, len_byte;
1510      Lisp_Object trt;
1511      EMACS_INT pos, pos_byte;
1512      EMACS_INT lim, lim_byte;
1513 {
1514   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1515   int forward = n > 0;
1516   /* Number of buffer bytes matched.  Note that this may be different
1517      from len_byte in a multibyte buffer.  */
1518   int match_byte;
1519
1520   if (lim > pos && multibyte)
1521     while (n > 0)
1522       {
1523         while (1)
1524           {
1525             /* Try matching at position POS.  */
1526             EMACS_INT this_pos = pos;
1527             EMACS_INT this_pos_byte = pos_byte;
1528             int this_len = len;
1529             int this_len_byte = len_byte;
1530             unsigned char *p = pat;
1531             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1532               goto stop;
1533
1534             while (this_len > 0)
1535               {
1536                 int charlen, buf_charlen;
1537                 int pat_ch, buf_ch;
1538
1539                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1540                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1541                                                  ZV_BYTE - this_pos_byte,
1542                                                  buf_charlen);
1543                 TRANSLATE (buf_ch, trt, buf_ch);
1544
1545                 if (buf_ch != pat_ch)
1546                   break;
1547
1548                 this_len_byte -= charlen;
1549                 this_len--;
1550                 p += charlen;
1551
1552                 this_pos_byte += buf_charlen;
1553                 this_pos++;
1554               }
1555
1556             if (this_len == 0)
1557               {
1558                 match_byte = this_pos_byte - pos_byte;
1559                 pos += len;
1560                 pos_byte += match_byte;
1561                 break;
1562               }
1563
1564             INC_BOTH (pos, pos_byte);
1565           }
1566
1567         n--;
1568       }
1569   else if (lim > pos)
1570     while (n > 0)
1571       {
1572         while (1)
1573           {
1574             /* Try matching at position POS.  */
1575             EMACS_INT this_pos = pos;
1576             int this_len = len;
1577             unsigned char *p = pat;
1578
1579             if (pos + len > lim)
1580               goto stop;
1581
1582             while (this_len > 0)
1583               {
1584                 int pat_ch = *p++;
1585                 int buf_ch = FETCH_BYTE (this_pos);
1586                 TRANSLATE (buf_ch, trt, buf_ch);
1587
1588                 if (buf_ch != pat_ch)
1589                   break;
1590
1591                 this_len--;
1592                 this_pos++;
1593               }
1594
1595             if (this_len == 0)
1596               {
1597                 match_byte = len;
1598                 pos += len;
1599                 break;
1600               }
1601
1602             pos++;
1603           }
1604
1605         n--;
1606       }
1607   /* Backwards search.  */
1608   else if (lim < pos && multibyte)
1609     while (n < 0)
1610       {
1611         while (1)
1612           {
1613             /* Try matching at position POS.  */
1614             EMACS_INT this_pos = pos - len;
1615             EMACS_INT this_pos_byte;
1616             int this_len = len;
1617             int this_len_byte = len_byte;
1618             unsigned char *p = pat;
1619
1620             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1621               goto stop;
1622             this_pos_byte = CHAR_TO_BYTE (this_pos);
1623             match_byte = pos_byte - this_pos_byte;
1624
1625             while (this_len > 0)
1626               {
1627                 int charlen, buf_charlen;
1628                 int pat_ch, buf_ch;
1629
1630                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1631                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1632                                                  ZV_BYTE - this_pos_byte,
1633                                                  buf_charlen);
1634                 TRANSLATE (buf_ch, trt, buf_ch);
1635
1636                 if (buf_ch != pat_ch)
1637                   break;
1638
1639                 this_len_byte -= charlen;
1640                 this_len--;
1641                 p += charlen;
1642                 this_pos_byte += buf_charlen;
1643                 this_pos++;
1644               }
1645
1646             if (this_len == 0)
1647               {
1648                 pos -= len;
1649                 pos_byte -= match_byte;
1650                 break;
1651               }
1652
1653             DEC_BOTH (pos, pos_byte);
1654           }
1655
1656         n++;
1657       }
1658   else if (lim < pos)
1659     while (n < 0)
1660       {
1661         while (1)
1662           {
1663             /* Try matching at position POS.  */
1664             EMACS_INT this_pos = pos - len;
1665             int this_len = len;
1666             unsigned char *p = pat;
1667
1668             if (this_pos < lim)
1669               goto stop;
1670
1671             while (this_len > 0)
1672               {
1673                 int pat_ch = *p++;
1674                 int buf_ch = FETCH_BYTE (this_pos);
1675                 TRANSLATE (buf_ch, trt, buf_ch);
1676
1677                 if (buf_ch != pat_ch)
1678                   break;
1679                 this_len--;
1680                 this_pos++;
1681               }
1682
1683             if (this_len == 0)
1684               {
1685                 match_byte = len;
1686                 pos -= len;
1687                 break;
1688               }
1689
1690             pos--;
1691           }
1692
1693         n++;
1694       }
1695
1696  stop:
1697   if (n == 0)
1698     {
1699       if (forward)
1700         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1701       else
1702         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1703
1704       return pos;
1705     }
1706   else if (n > 0)
1707     return -n;
1708   else
1709     return n;
1710 }
1711 \f
1712 /* Do Boyer-Moore search N times for the string BASE_PAT,
1713    whose length is LEN/LEN_BYTE,
1714    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1715    DIRECTION says which direction we search in.
1716    TRT and INVERSE_TRT are translation tables.
1717    Characters in PAT are already translated by TRT.
1718
1719    This kind of search works if all the characters in BASE_PAT that
1720    have nontrivial translation are the same aside from the last byte.
1721    This makes it possible to translate just the last byte of a
1722    character, and do so after just a simple test of the context.
1723    CHAR_BASE is nonzero if there is such a non-ASCII character.
1724
1725    If that criterion is not satisfied, do not call this function.  */
1726
1727 static EMACS_INT
1728 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1729              pos, pos_byte, lim, lim_byte, char_base)
1730      int n;
1731      unsigned char *base_pat;
1732      int len, len_byte;
1733      Lisp_Object trt;
1734      Lisp_Object inverse_trt;
1735      EMACS_INT pos, pos_byte;
1736      EMACS_INT lim, lim_byte;
1737      int char_base;
1738 {
1739   int direction = ((n > 0) ? 1 : -1);
1740   register int dirlen;
1741   EMACS_INT limit;
1742   int stride_for_teases = 0;
1743   int BM_tab[0400];
1744   register unsigned char *cursor, *p_limit;
1745   register int i, j;
1746   unsigned char *pat, *pat_end;
1747   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1748
1749   unsigned char simple_translate[0400];
1750   /* These are set to the preceding bytes of a byte to be translated
1751      if char_base is nonzero.  As the maximum byte length of a
1752      multibyte character is 5, we have to check at most four previous
1753      bytes.  */
1754   int translate_prev_byte1 = 0;
1755   int translate_prev_byte2 = 0;
1756   int translate_prev_byte3 = 0;
1757   int translate_prev_byte4 = 0;
1758
1759   /* The general approach is that we are going to maintain that we know
1760      the first (closest to the present position, in whatever direction
1761      we're searching) character that could possibly be the last
1762      (furthest from present position) character of a valid match.  We
1763      advance the state of our knowledge by looking at that character
1764      and seeing whether it indeed matches the last character of the
1765      pattern.  If it does, we take a closer look.  If it does not, we
1766      move our pointer (to putative last characters) as far as is
1767      logically possible.  This amount of movement, which I call a
1768      stride, will be the length of the pattern if the actual character
1769      appears nowhere in the pattern, otherwise it will be the distance
1770      from the last occurrence of that character to the end of the
1771      pattern.  If the amount is zero we have a possible match.  */
1772
1773   /* Here we make a "mickey mouse" BM table.  The stride of the search
1774      is determined only by the last character of the putative match.
1775      If that character does not match, we will stride the proper
1776      distance to propose a match that superimposes it on the last
1777      instance of a character that matches it (per trt), or misses
1778      it entirely if there is none. */
1779
1780   dirlen = len_byte * direction;
1781
1782   /* Record position after the end of the pattern.  */
1783   pat_end = base_pat + len_byte;
1784   /* BASE_PAT points to a character that we start scanning from.
1785      It is the first character in a forward search,
1786      the last character in a backward search.  */
1787   if (direction < 0)
1788     base_pat = pat_end - 1;
1789
1790   /* A character that does not appear in the pattern induces a
1791      stride equal to the pattern length.  */
1792   for (i = 0; i < 0400; i++)
1793     BM_tab[i] = dirlen;
1794
1795   /* We use this for translation, instead of TRT itself.
1796      We fill this in to handle the characters that actually
1797      occur in the pattern.  Others don't matter anyway!  */
1798   for (i = 0; i < 0400; i++)
1799     simple_translate[i] = i;
1800
1801   if (char_base)
1802     {
1803       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1804          byte following them are the target of translation.  */
1805       unsigned char str[MAX_MULTIBYTE_LENGTH];
1806       int len = CHAR_STRING (char_base, str);
1807
1808       translate_prev_byte1 = str[len - 2];
1809       if (len > 2)
1810         {
1811           translate_prev_byte2 = str[len - 3];
1812           if (len > 3)
1813             {
1814               translate_prev_byte3 = str[len - 4];
1815               if (len > 4)
1816                 translate_prev_byte4 = str[len - 5];
1817             }
1818         }
1819     }
1820
1821   i = 0;
1822   while (i != dirlen)
1823     {
1824       unsigned char *ptr = base_pat + i;
1825       i += direction;
1826       if (! NILP (trt))
1827         {
1828           /* If the byte currently looking at is the last of a
1829              character to check case-equivalents, set CH to that
1830              character.  An ASCII character and a non-ASCII character
1831              matching with CHAR_BASE are to be checked.  */
1832           int ch = -1;
1833
1834           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1835             ch = *ptr;
1836           else if (char_base
1837                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1838             {
1839               unsigned char *charstart = ptr - 1;
1840
1841               while (! (CHAR_HEAD_P (*charstart)))
1842                 charstart--;
1843               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1844               if (char_base != (ch & ~0x3F))
1845                 ch = -1;
1846             }
1847
1848           if (ch >= 0200)
1849             j = (ch & 0x3F) | 0200;
1850           else
1851             j = *ptr;
1852
1853           if (i == dirlen)
1854             stride_for_teases = BM_tab[j];
1855
1856           BM_tab[j] = dirlen - i;
1857           /* A translation table is accompanied by its inverse -- see */
1858           /* comment following downcase_table for details */
1859           if (ch >= 0)
1860             {
1861               int starting_ch = ch;
1862               int starting_j = j;
1863
1864               while (1)
1865                 {
1866                   TRANSLATE (ch, inverse_trt, ch);
1867                   if (ch >= 0200)
1868                     j = (ch & 0x3F) | 0200;
1869                   else
1870                     j = ch;
1871
1872                   /* For all the characters that map into CH,
1873                      set up simple_translate to map the last byte
1874                      into STARTING_J.  */
1875                   simple_translate[j] = starting_j;
1876                   if (ch == starting_ch)
1877                     break;
1878                   BM_tab[j] = dirlen - i;
1879                 }
1880             }
1881         }
1882       else
1883         {
1884           j = *ptr;
1885
1886           if (i == dirlen)
1887             stride_for_teases = BM_tab[j];
1888           BM_tab[j] = dirlen - i;
1889         }
1890       /* stride_for_teases tells how much to stride if we get a
1891          match on the far character but are subsequently
1892          disappointed, by recording what the stride would have been
1893          for that character if the last character had been
1894          different.  */
1895     }
1896   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1897   /* loop invariant - POS_BYTE points at where last char (first
1898      char if reverse) of pattern would align in a possible match.  */
1899   while (n != 0)
1900     {
1901       EMACS_INT tail_end;
1902       unsigned char *tail_end_ptr;
1903
1904       /* It's been reported that some (broken) compiler thinks that
1905          Boolean expressions in an arithmetic context are unsigned.
1906          Using an explicit ?1:0 prevents this.  */
1907       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1908           < 0)
1909         return (n * (0 - direction));
1910       /* First we do the part we can by pointers (maybe nothing) */
1911       QUIT;
1912       pat = base_pat;
1913       limit = pos_byte - dirlen + direction;
1914       if (direction > 0)
1915         {
1916           limit = BUFFER_CEILING_OF (limit);
1917           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1918              can take on without hitting edge of buffer or the gap.  */
1919           limit = min (limit, pos_byte + 20000);
1920           limit = min (limit, lim_byte - 1);
1921         }
1922       else
1923         {
1924           limit = BUFFER_FLOOR_OF (limit);
1925           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1926              can take on without hitting edge of buffer or the gap.  */
1927           limit = max (limit, pos_byte - 20000);
1928           limit = max (limit, lim_byte);
1929         }
1930       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1931       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1932
1933       if ((limit - pos_byte) * direction > 20)
1934         {
1935           unsigned char *p2;
1936
1937           p_limit = BYTE_POS_ADDR (limit);
1938           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1939           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1940           while (1)             /* use one cursor setting as long as i can */
1941             {
1942               if (direction > 0) /* worth duplicating */
1943                 {
1944                   while (cursor <= p_limit)
1945                     {
1946                       if (BM_tab[*cursor] == 0)
1947                         goto hit;
1948                       cursor += BM_tab[*cursor];
1949                     }
1950                 }
1951               else
1952                 {
1953                   while (cursor >= p_limit)
1954                     {
1955                       if (BM_tab[*cursor] == 0)
1956                         goto hit;
1957                       cursor += BM_tab[*cursor];
1958                     }
1959                 }
1960               /* If you are here, cursor is beyond the end of the
1961                  searched region.  You fail to match within the
1962                  permitted region and would otherwise try a character
1963                  beyond that region.  */
1964               break;
1965
1966             hit:
1967               i = dirlen - direction;
1968               if (! NILP (trt))
1969                 {
1970                   while ((i -= direction) + direction != 0)
1971                     {
1972                       int ch;
1973                       cursor -= direction;
1974                       /* Translate only the last byte of a character.  */
1975                       if (! multibyte
1976                           || ((cursor == tail_end_ptr
1977                                || CHAR_HEAD_P (cursor[1]))
1978                               && (CHAR_HEAD_P (cursor[0])
1979                                   /* Check if this is the last byte of
1980                                      a translable character.  */
1981                                   || (translate_prev_byte1 == cursor[-1]
1982                                       && (CHAR_HEAD_P (translate_prev_byte1)
1983                                           || (translate_prev_byte2 == cursor[-2]
1984                                               && (CHAR_HEAD_P (translate_prev_byte2)
1985                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1986                         ch = simple_translate[*cursor];
1987                       else
1988                         ch = *cursor;
1989                       if (pat[i] != ch)
1990                         break;
1991                     }
1992                 }
1993               else
1994                 {
1995                   while ((i -= direction) + direction != 0)
1996                     {
1997                       cursor -= direction;
1998                       if (pat[i] != *cursor)
1999                         break;
2000                     }
2001                 }
2002               cursor += dirlen - i - direction; /* fix cursor */
2003               if (i + direction == 0)
2004                 {
2005                   EMACS_INT position, start, end;
2006
2007                   cursor -= direction;
2008
2009                   position = pos_byte + cursor - p2 + ((direction > 0)
2010                                                        ? 1 - len_byte : 0);
2011                   set_search_regs (position, len_byte);
2012
2013                   if (NILP (Vinhibit_changing_match_data))
2014                     {
2015                       start = search_regs.start[0];
2016                       end = search_regs.end[0];
2017                     }
2018                   else
2019                     /* If Vinhibit_changing_match_data is non-nil,
2020                        search_regs will not be changed.  So let's
2021                        compute start and end here.  */
2022                     {
2023                       start = BYTE_TO_CHAR (position);
2024                       end = BYTE_TO_CHAR (position + len_byte);
2025                     }
2026
2027                   if ((n -= direction) != 0)
2028                     cursor += dirlen; /* to resume search */
2029                   else
2030                     return direction > 0 ? end : start;
2031                 }
2032               else
2033                 cursor += stride_for_teases; /* <sigh> we lose -  */
2034             }
2035           pos_byte += cursor - p2;
2036         }
2037       else
2038         /* Now we'll pick up a clump that has to be done the hard
2039            way because it covers a discontinuity.  */
2040         {
2041           limit = ((direction > 0)
2042                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2043                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2044           limit = ((direction > 0)
2045                    ? min (limit + len_byte, lim_byte - 1)
2046                    : max (limit - len_byte, lim_byte));
2047           /* LIMIT is now the last value POS_BYTE can have
2048              and still be valid for a possible match.  */
2049           while (1)
2050             {
2051               /* This loop can be coded for space rather than
2052                  speed because it will usually run only once.
2053                  (the reach is at most len + 21, and typically
2054                  does not exceed len).  */
2055               while ((limit - pos_byte) * direction >= 0)
2056                 {
2057                   int ch = FETCH_BYTE (pos_byte);
2058                   if (BM_tab[ch] == 0)
2059                     goto hit2;
2060                   pos_byte += BM_tab[ch];
2061                 }
2062               break;    /* ran off the end */
2063
2064             hit2:
2065               /* Found what might be a match.  */
2066               i = dirlen - direction;
2067               while ((i -= direction) + direction != 0)
2068                 {
2069                   int ch;
2070                   unsigned char *ptr;
2071                   pos_byte -= direction;
2072                   ptr = BYTE_POS_ADDR (pos_byte);
2073                   /* Translate only the last byte of a character.  */
2074                   if (! multibyte
2075                       || ((ptr == tail_end_ptr
2076                            || CHAR_HEAD_P (ptr[1]))
2077                           && (CHAR_HEAD_P (ptr[0])
2078                               /* Check if this is the last byte of a
2079                                  translable character.  */
2080                               || (translate_prev_byte1 == ptr[-1]
2081                                   && (CHAR_HEAD_P (translate_prev_byte1)
2082                                       || (translate_prev_byte2 == ptr[-2]
2083                                           && (CHAR_HEAD_P (translate_prev_byte2)
2084                                               || translate_prev_byte3 == ptr[-3])))))))
2085                     ch = simple_translate[*ptr];
2086                   else
2087                     ch = *ptr;
2088                   if (pat[i] != ch)
2089                     break;
2090                 }
2091               /* Above loop has moved POS_BYTE part or all the way
2092                  back to the first pos (last pos if reverse).
2093                  Set it once again at the last (first if reverse) char.  */
2094               pos_byte += dirlen - i - direction;
2095               if (i + direction == 0)
2096                 {
2097                   EMACS_INT position, start, end;
2098                   pos_byte -= direction;
2099
2100                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2101                   set_search_regs (position, len_byte);
2102
2103                   if (NILP (Vinhibit_changing_match_data))
2104                     {
2105                       start = search_regs.start[0];
2106                       end = search_regs.end[0];
2107                     }
2108                   else
2109                     /* If Vinhibit_changing_match_data is non-nil,
2110                        search_regs will not be changed.  So let's
2111                        compute start and end here.  */
2112                     {
2113                       start = BYTE_TO_CHAR (position);
2114                       end = BYTE_TO_CHAR (position + len_byte);
2115                     }
2116
2117                   if ((n -= direction) != 0)
2118                     pos_byte += dirlen; /* to resume search */
2119                   else
2120                     return direction > 0 ? end : start;
2121                 }
2122               else
2123                 pos_byte += stride_for_teases;
2124             }
2125           }
2126       /* We have done one clump.  Can we continue? */
2127       if ((lim_byte - pos_byte) * direction < 0)
2128         return ((0 - n) * direction);
2129     }
2130   return BYTE_TO_CHAR (pos_byte);
2131 }
2132
2133 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2134    for the overall match just found in the current buffer.
2135    Also clear out the match data for registers 1 and up.  */
2136
2137 static void
2138 set_search_regs (beg_byte, nbytes)
2139      EMACS_INT beg_byte, nbytes;
2140 {
2141   int i;
2142
2143   if (!NILP (Vinhibit_changing_match_data))
2144     return;
2145
2146   /* Make sure we have registers in which to store
2147      the match position.  */
2148   if (search_regs.num_regs == 0)
2149     {
2150       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2151       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2152       search_regs.num_regs = 2;
2153     }
2154
2155   /* Clear out the other registers.  */
2156   for (i = 1; i < search_regs.num_regs; i++)
2157     {
2158       search_regs.start[i] = -1;
2159       search_regs.end[i] = -1;
2160     }
2161
2162   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2163   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2164   XSETBUFFER (last_thing_searched, current_buffer);
2165 }
2166 \f
2167 /* Given STRING, a string of words separated by word delimiters,
2168    compute a regexp that matches those exact words separated by
2169    arbitrary punctuation.  If LAX is nonzero, the end of the string
2170    need not match a word boundary unless it ends in whitespace.  */
2171
2172 static Lisp_Object
2173 wordify (string, lax)
2174      Lisp_Object string;
2175      int lax;
2176 {
2177   register unsigned char *p, *o;
2178   register int i, i_byte, len, punct_count = 0, word_count = 0;
2179   Lisp_Object val;
2180   int prev_c = 0;
2181   int adjust, whitespace_at_end;
2182
2183   CHECK_STRING (string);
2184   p = SDATA (string);
2185   len = SCHARS (string);
2186
2187   for (i = 0, i_byte = 0; i < len; )
2188     {
2189       int c;
2190
2191       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2192
2193       if (SYNTAX (c) != Sword)
2194         {
2195           punct_count++;
2196           if (i > 0 && SYNTAX (prev_c) == Sword)
2197             word_count++;
2198         }
2199
2200       prev_c = c;
2201     }
2202
2203   if (SYNTAX (prev_c) == Sword)
2204     {
2205       word_count++;
2206       whitespace_at_end = 0;
2207     }
2208   else
2209     whitespace_at_end = 1;
2210
2211   if (!word_count)
2212     return empty_unibyte_string;
2213
2214   adjust = - punct_count + 5 * (word_count - 1)
2215     + ((lax && !whitespace_at_end) ? 2 : 4);
2216   if (STRING_MULTIBYTE (string))
2217     val = make_uninit_multibyte_string (len + adjust,
2218                                         SBYTES (string)
2219                                         + adjust);
2220   else
2221     val = make_uninit_string (len + adjust);
2222
2223   o = SDATA (val);
2224   *o++ = '\\';
2225   *o++ = 'b';
2226   prev_c = 0;
2227
2228   for (i = 0, i_byte = 0; i < len; )
2229     {
2230       int c;
2231       int i_byte_orig = i_byte;
2232
2233       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2234
2235       if (SYNTAX (c) == Sword)
2236         {
2237           bcopy (SDATA (string) + i_byte_orig, o,
2238                  i_byte - i_byte_orig);
2239           o += i_byte - i_byte_orig;
2240         }
2241       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2242         {
2243           *o++ = '\\';
2244           *o++ = 'W';
2245           *o++ = '\\';
2246           *o++ = 'W';
2247           *o++ = '*';
2248         }
2249
2250       prev_c = c;
2251     }
2252
2253   if (!lax || whitespace_at_end)
2254     {
2255       *o++ = '\\';
2256       *o++ = 'b';
2257     }
2258
2259   return val;
2260 }
2261 \f
2262 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2263        "MSearch backward: ",
2264        doc: /* Search backward from point for STRING.
2265 Set point to the beginning of the occurrence found, and return point.
2266 An optional second argument bounds the search; it is a buffer position.
2267 The match found must not extend before that position.
2268 Optional third argument, if t, means if fail just return nil (no error).
2269  If not nil and not t, position at limit of search and return nil.
2270 Optional fourth argument is repeat count--search for successive occurrences.
2271
2272 Search case-sensitivity is determined by the value of the variable
2273 `case-fold-search', which see.
2274
2275 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2276      (string, bound, noerror, count)
2277      Lisp_Object string, bound, noerror, count;
2278 {
2279   return search_command (string, bound, noerror, count, -1, 0, 0);
2280 }
2281
2282 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2283        doc: /* Search forward from point for STRING.
2284 Set point to the end of the occurrence found, and return point.
2285 An optional second argument bounds the search; it is a buffer position.
2286 The match found must not extend after that position.  A value of nil is
2287   equivalent to (point-max).
2288 Optional third argument, if t, means if fail just return nil (no error).
2289   If not nil and not t, move to limit of search and return nil.
2290 Optional fourth argument is repeat count--search for successive occurrences.
2291
2292 Search case-sensitivity is determined by the value of the variable
2293 `case-fold-search', which see.
2294
2295 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2296      (string, bound, noerror, count)
2297      Lisp_Object string, bound, noerror, count;
2298 {
2299   return search_command (string, bound, noerror, count, 1, 0, 0);
2300 }
2301
2302 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2303        "sWord search backward: ",
2304        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2305 Set point to the beginning of the occurrence found, and return point.
2306 An optional second argument bounds the search; it is a buffer position.
2307 The match found must not extend before that position.
2308 Optional third argument, if t, means if fail just return nil (no error).
2309   If not nil and not t, move to limit of search and return nil.
2310 Optional fourth argument is repeat count--search for successive occurrences.  */)
2311      (string, bound, noerror, count)
2312      Lisp_Object string, bound, noerror, count;
2313 {
2314   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2315 }
2316
2317 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2318        "sWord search: ",
2319        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2320 Set point to the end of the occurrence found, and return point.
2321 An optional second argument bounds the search; it is a buffer position.
2322 The match found must not extend after that position.
2323 Optional third argument, if t, means if fail just return nil (no error).
2324   If not nil and not t, move to limit of search and return nil.
2325 Optional fourth argument is repeat count--search for successive occurrences.  */)
2326      (string, bound, noerror, count)
2327      Lisp_Object string, bound, noerror, count;
2328 {
2329   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2330 }
2331
2332 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2333        "sWord search backward: ",
2334        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2335 Set point to the beginning of the occurrence found, and return point.
2336
2337 Unlike `word-search-backward', the end of STRING need not match a word
2338 boundary unless it ends in whitespace.
2339
2340 An optional second argument bounds the search; it is a buffer position.
2341 The match found must not extend before that position.
2342 Optional third argument, if t, means if fail just return nil (no error).
2343   If not nil and not t, move to limit of search and return nil.
2344 Optional fourth argument is repeat count--search for successive occurrences.  */)
2345      (string, bound, noerror, count)
2346      Lisp_Object string, bound, noerror, count;
2347 {
2348   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2349 }
2350
2351 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2352        "sWord search: ",
2353        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2354 Set point to the end of the occurrence found, and return point.
2355
2356 Unlike `word-search-forward', the end of STRING need not match a word
2357 boundary unless it ends in whitespace.
2358
2359 An optional second argument bounds the search; it is a buffer position.
2360 The match found must not extend after that position.
2361 Optional third argument, if t, means if fail just return nil (no error).
2362   If not nil and not t, move to limit of search and return nil.
2363 Optional fourth argument is repeat count--search for successive occurrences.  */)
2364      (string, bound, noerror, count)
2365      Lisp_Object string, bound, noerror, count;
2366 {
2367   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2368 }
2369
2370 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2371        "sRE search backward: ",
2372        doc: /* Search backward from point for match for regular expression REGEXP.
2373 Set point to the beginning of the match, and return point.
2374 The match found is the one starting last in the buffer
2375 and yet ending before the origin of the search.
2376 An optional second argument bounds the search; it is a buffer position.
2377 The match found must start at or after that position.
2378 Optional third argument, if t, means if fail just return nil (no error).
2379   If not nil and not t, move to limit of search and return nil.
2380 Optional fourth argument is repeat count--search for successive occurrences.
2381 See also the functions `match-beginning', `match-end', `match-string',
2382 and `replace-match'.  */)
2383      (regexp, bound, noerror, count)
2384      Lisp_Object regexp, bound, noerror, count;
2385 {
2386   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2387 }
2388
2389 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2390        "sRE search: ",
2391        doc: /* Search forward from point for regular expression REGEXP.
2392 Set point to the end of the occurrence found, and return point.
2393 An optional second argument bounds the search; it is a buffer position.
2394 The match found must not extend after that position.
2395 Optional third argument, if t, means if fail just return nil (no error).
2396   If not nil and not t, move to limit of search and return nil.
2397 Optional fourth argument is repeat count--search for successive occurrences.
2398 See also the functions `match-beginning', `match-end', `match-string',
2399 and `replace-match'.  */)
2400      (regexp, bound, noerror, count)
2401      Lisp_Object regexp, bound, noerror, count;
2402 {
2403   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2404 }
2405
2406 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2407        "sPosix search backward: ",
2408        doc: /* Search backward from point for match for regular expression REGEXP.
2409 Find the longest match in accord with Posix regular expression rules.
2410 Set point to the beginning of the match, and return point.
2411 The match found is the one starting last in the buffer
2412 and yet ending before the origin of the search.
2413 An optional second argument bounds the search; it is a buffer position.
2414 The match found must start at or after that position.
2415 Optional third argument, if t, means if fail just return nil (no error).
2416   If not nil and not t, move to limit of search and return nil.
2417 Optional fourth argument is repeat count--search for successive occurrences.
2418 See also the functions `match-beginning', `match-end', `match-string',
2419 and `replace-match'.  */)
2420      (regexp, bound, noerror, count)
2421      Lisp_Object regexp, bound, noerror, count;
2422 {
2423   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2424 }
2425
2426 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2427        "sPosix search: ",
2428        doc: /* Search forward from point for regular expression REGEXP.
2429 Find the longest match in accord with Posix regular expression rules.
2430 Set point to the end of the occurrence found, and return point.
2431 An optional second argument bounds the search; it is a buffer position.
2432 The match found must not extend after that position.
2433 Optional third argument, if t, means if fail just return nil (no error).
2434   If not nil and not t, move to limit of search and return nil.
2435 Optional fourth argument is repeat count--search for successive occurrences.
2436 See also the functions `match-beginning', `match-end', `match-string',
2437 and `replace-match'.  */)
2438      (regexp, bound, noerror, count)
2439      Lisp_Object regexp, bound, noerror, count;
2440 {
2441   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2442 }
2443 \f
2444 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2445        doc: /* Replace text matched by last search with NEWTEXT.
2446 Leave point at the end of the replacement text.
2447
2448 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2449 Otherwise maybe capitalize the whole text, or maybe just word initials,
2450 based on the replaced text.
2451 If the replaced text has only capital letters
2452 and has at least one multiletter word, convert NEWTEXT to all caps.
2453 Otherwise if all words are capitalized in the replaced text,
2454 capitalize each word in NEWTEXT.
2455
2456 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2457 Otherwise treat `\\' as special:
2458   `\\&' in NEWTEXT means substitute original matched text.
2459   `\\N' means substitute what matched the Nth `\\(...\\)'.
2460        If Nth parens didn't match, substitute nothing.
2461   `\\\\' means insert one `\\'.
2462 Case conversion does not apply to these substitutions.
2463
2464 FIXEDCASE and LITERAL are optional arguments.
2465
2466 The optional fourth argument STRING can be a string to modify.
2467 This is meaningful when the previous match was done against STRING,
2468 using `string-match'.  When used this way, `replace-match'
2469 creates and returns a new string made by copying STRING and replacing
2470 the part of STRING that was matched.
2471
2472 The optional fifth argument SUBEXP specifies a subexpression;
2473 it says to replace just that subexpression with NEWTEXT,
2474 rather than replacing the entire matched text.
2475 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2476 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2477 NEWTEXT in place of subexp N.
2478 This is useful only after a regular expression search or match,
2479 since only regular expressions have distinguished subexpressions.  */)
2480      (newtext, fixedcase, literal, string, subexp)
2481      Lisp_Object newtext, fixedcase, literal, string, subexp;
2482 {
2483   enum { nochange, all_caps, cap_initial } case_action;
2484   register int pos, pos_byte;
2485   int some_multiletter_word;
2486   int some_lowercase;
2487   int some_uppercase;
2488   int some_nonuppercase_initial;
2489   register int c, prevc;
2490   int sub;
2491   EMACS_INT opoint, newpoint;
2492
2493   CHECK_STRING (newtext);
2494
2495   if (! NILP (string))
2496     CHECK_STRING (string);
2497
2498   case_action = nochange;       /* We tried an initialization */
2499                                 /* but some C compilers blew it */
2500
2501   if (search_regs.num_regs <= 0)
2502     error ("`replace-match' called before any match found");
2503
2504   if (NILP (subexp))
2505     sub = 0;
2506   else
2507     {
2508       CHECK_NUMBER (subexp);
2509       sub = XINT (subexp);
2510       if (sub < 0 || sub >= search_regs.num_regs)
2511         args_out_of_range (subexp, make_number (search_regs.num_regs));
2512     }
2513
2514   if (NILP (string))
2515     {
2516       if (search_regs.start[sub] < BEGV
2517           || search_regs.start[sub] > search_regs.end[sub]
2518           || search_regs.end[sub] > ZV)
2519         args_out_of_range (make_number (search_regs.start[sub]),
2520                            make_number (search_regs.end[sub]));
2521     }
2522   else
2523     {
2524       if (search_regs.start[sub] < 0
2525           || search_regs.start[sub] > search_regs.end[sub]
2526           || search_regs.end[sub] > SCHARS (string))
2527         args_out_of_range (make_number (search_regs.start[sub]),
2528                            make_number (search_regs.end[sub]));
2529     }
2530
2531   if (NILP (fixedcase))
2532     {
2533       /* Decide how to casify by examining the matched text. */
2534       EMACS_INT last;
2535
2536       pos = search_regs.start[sub];
2537       last = search_regs.end[sub];
2538
2539       if (NILP (string))
2540         pos_byte = CHAR_TO_BYTE (pos);
2541       else
2542         pos_byte = string_char_to_byte (string, pos);
2543
2544       prevc = '\n';
2545       case_action = all_caps;
2546
2547       /* some_multiletter_word is set nonzero if any original word
2548          is more than one letter long. */
2549       some_multiletter_word = 0;
2550       some_lowercase = 0;
2551       some_nonuppercase_initial = 0;
2552       some_uppercase = 0;
2553
2554       while (pos < last)
2555         {
2556           if (NILP (string))
2557             {
2558               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2559               INC_BOTH (pos, pos_byte);
2560             }
2561           else
2562             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2563
2564           if (LOWERCASEP (c))
2565             {
2566               /* Cannot be all caps if any original char is lower case */
2567
2568               some_lowercase = 1;
2569               if (SYNTAX (prevc) != Sword)
2570                 some_nonuppercase_initial = 1;
2571               else
2572                 some_multiletter_word = 1;
2573             }
2574           else if (UPPERCASEP (c))
2575             {
2576               some_uppercase = 1;
2577               if (SYNTAX (prevc) != Sword)
2578                 ;
2579               else
2580                 some_multiletter_word = 1;
2581             }
2582           else
2583             {
2584               /* If the initial is a caseless word constituent,
2585                  treat that like a lowercase initial.  */
2586               if (SYNTAX (prevc) != Sword)
2587                 some_nonuppercase_initial = 1;
2588             }
2589
2590           prevc = c;
2591         }
2592
2593       /* Convert to all caps if the old text is all caps
2594          and has at least one multiletter word.  */
2595       if (! some_lowercase && some_multiletter_word)
2596         case_action = all_caps;
2597       /* Capitalize each word, if the old text has all capitalized words.  */
2598       else if (!some_nonuppercase_initial && some_multiletter_word)
2599         case_action = cap_initial;
2600       else if (!some_nonuppercase_initial && some_uppercase)
2601         /* Should x -> yz, operating on X, give Yz or YZ?
2602            We'll assume the latter.  */
2603         case_action = all_caps;
2604       else
2605         case_action = nochange;
2606     }
2607
2608   /* Do replacement in a string.  */
2609   if (!NILP (string))
2610     {
2611       Lisp_Object before, after;
2612
2613       before = Fsubstring (string, make_number (0),
2614                            make_number (search_regs.start[sub]));
2615       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2616
2617       /* Substitute parts of the match into NEWTEXT
2618          if desired.  */
2619       if (NILP (literal))
2620         {
2621           EMACS_INT lastpos = 0;
2622           EMACS_INT lastpos_byte = 0;
2623           /* We build up the substituted string in ACCUM.  */
2624           Lisp_Object accum;
2625           Lisp_Object middle;
2626           int length = SBYTES (newtext);
2627
2628           accum = Qnil;
2629
2630           for (pos_byte = 0, pos = 0; pos_byte < length;)
2631             {
2632               int substart = -1;
2633               int subend = 0;
2634               int delbackslash = 0;
2635
2636               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2637
2638               if (c == '\\')
2639                 {
2640                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2641
2642                   if (c == '&')
2643                     {
2644                       substart = search_regs.start[sub];
2645                       subend = search_regs.end[sub];
2646                     }
2647                   else if (c >= '1' && c <= '9')
2648                     {
2649                       if (search_regs.start[c - '0'] >= 0
2650                           && c <= search_regs.num_regs + '0')
2651                         {
2652                           substart = search_regs.start[c - '0'];
2653                           subend = search_regs.end[c - '0'];
2654                         }
2655                       else
2656                         {
2657                           /* If that subexp did not match,
2658                              replace \\N with nothing.  */
2659                           substart = 0;
2660                           subend = 0;
2661                         }
2662                     }
2663                   else if (c == '\\')
2664                     delbackslash = 1;
2665                   else
2666                     error ("Invalid use of `\\' in replacement text");
2667                 }
2668               if (substart >= 0)
2669                 {
2670                   if (pos - 2 != lastpos)
2671                     middle = substring_both (newtext, lastpos,
2672                                              lastpos_byte,
2673                                              pos - 2, pos_byte - 2);
2674                   else
2675                     middle = Qnil;
2676                   accum = concat3 (accum, middle,
2677                                    Fsubstring (string,
2678                                                make_number (substart),
2679                                                make_number (subend)));
2680                   lastpos = pos;
2681                   lastpos_byte = pos_byte;
2682                 }
2683               else if (delbackslash)
2684                 {
2685                   middle = substring_both (newtext, lastpos,
2686                                            lastpos_byte,
2687                                            pos - 1, pos_byte - 1);
2688
2689                   accum = concat2 (accum, middle);
2690                   lastpos = pos;
2691                   lastpos_byte = pos_byte;
2692                 }
2693             }
2694
2695           if (pos != lastpos)
2696             middle = substring_both (newtext, lastpos,
2697                                      lastpos_byte,
2698                                      pos, pos_byte);
2699           else
2700             middle = Qnil;
2701
2702           newtext = concat2 (accum, middle);
2703         }
2704
2705       /* Do case substitution in NEWTEXT if desired.  */
2706       if (case_action == all_caps)
2707         newtext = Fupcase (newtext);
2708       else if (case_action == cap_initial)
2709         newtext = Fupcase_initials (newtext);
2710
2711       return concat3 (before, newtext, after);
2712     }
2713
2714   /* Record point, then move (quietly) to the start of the match.  */
2715   if (PT >= search_regs.end[sub])
2716     opoint = PT - ZV;
2717   else if (PT > search_regs.start[sub])
2718     opoint = search_regs.end[sub] - ZV;
2719   else
2720     opoint = PT;
2721
2722   /* If we want non-literal replacement,
2723      perform substitution on the replacement string.  */
2724   if (NILP (literal))
2725     {
2726       int length = SBYTES (newtext);
2727       unsigned char *substed;
2728       int substed_alloc_size, substed_len;
2729       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2730       int str_multibyte = STRING_MULTIBYTE (newtext);
2731       Lisp_Object rev_tbl;
2732       int really_changed = 0;
2733
2734       rev_tbl = Qnil;
2735
2736       substed_alloc_size = length * 2 + 100;
2737       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2738       substed_len = 0;
2739
2740       /* Go thru NEWTEXT, producing the actual text to insert in
2741          SUBSTED while adjusting multibyteness to that of the current
2742          buffer.  */
2743
2744       for (pos_byte = 0, pos = 0; pos_byte < length;)
2745         {
2746           unsigned char str[MAX_MULTIBYTE_LENGTH];
2747           unsigned char *add_stuff = NULL;
2748           int add_len = 0;
2749           int idx = -1;
2750
2751           if (str_multibyte)
2752             {
2753               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2754               if (!buf_multibyte)
2755                 c = multibyte_char_to_unibyte (c, rev_tbl);
2756             }
2757           else
2758             {
2759               /* Note that we don't have to increment POS.  */
2760               c = SREF (newtext, pos_byte++);
2761               if (buf_multibyte)
2762                 c = unibyte_char_to_multibyte (c);
2763             }
2764
2765           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2766              or set IDX to a match index, which means put that part
2767              of the buffer text into SUBSTED.  */
2768
2769           if (c == '\\')
2770             {
2771               really_changed = 1;
2772
2773               if (str_multibyte)
2774                 {
2775                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2776                                                       pos, pos_byte);
2777                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2778                     c = multibyte_char_to_unibyte (c, rev_tbl);
2779                 }
2780               else
2781                 {
2782                   c = SREF (newtext, pos_byte++);
2783                   if (buf_multibyte)
2784                     c = unibyte_char_to_multibyte (c);
2785                 }
2786
2787               if (c == '&')
2788                 idx = sub;
2789               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2790                 {
2791                   if (search_regs.start[c - '0'] >= 1)
2792                     idx = c - '0';
2793                 }
2794               else if (c == '\\')
2795                 add_len = 1, add_stuff = "\\";
2796               else
2797                 {
2798                   xfree (substed);
2799                   error ("Invalid use of `\\' in replacement text");
2800                 }
2801             }
2802           else
2803             {
2804               add_len = CHAR_STRING (c, str);
2805               add_stuff = str;
2806             }
2807
2808           /* If we want to copy part of a previous match,
2809              set up ADD_STUFF and ADD_LEN to point to it.  */
2810           if (idx >= 0)
2811             {
2812               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2813               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2814               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2815                 move_gap (search_regs.start[idx]);
2816               add_stuff = BYTE_POS_ADDR (begbyte);
2817             }
2818
2819           /* Now the stuff we want to add to SUBSTED
2820              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2821
2822           /* Make sure SUBSTED is big enough.  */
2823           if (substed_len + add_len >= substed_alloc_size)
2824             {
2825               substed_alloc_size = substed_len + add_len + 500;
2826               substed = (unsigned char *) xrealloc (substed,
2827                                                     substed_alloc_size + 1);
2828             }
2829
2830           /* Now add to the end of SUBSTED.  */
2831           if (add_stuff)
2832             {
2833               bcopy (add_stuff, substed + substed_len, add_len);
2834               substed_len += add_len;
2835             }
2836         }
2837
2838       if (really_changed)
2839         {
2840           if (buf_multibyte)
2841             {
2842               int nchars = multibyte_chars_in_text (substed, substed_len);
2843
2844               newtext = make_multibyte_string (substed, nchars, substed_len);
2845             }
2846           else
2847             newtext = make_unibyte_string (substed, substed_len);
2848         }
2849       xfree (substed);
2850     }
2851
2852   /* Replace the old text with the new in the cleanest possible way.  */
2853   replace_range (search_regs.start[sub], search_regs.end[sub],
2854                  newtext, 1, 0, 1);
2855   newpoint = search_regs.start[sub] + SCHARS (newtext);
2856
2857   if (case_action == all_caps)
2858     Fupcase_region (make_number (search_regs.start[sub]),
2859                     make_number (newpoint));
2860   else if (case_action == cap_initial)
2861     Fupcase_initials_region (make_number (search_regs.start[sub]),
2862                              make_number (newpoint));
2863
2864   /* Adjust search data for this change.  */
2865   {
2866     EMACS_INT oldend = search_regs.end[sub];
2867     EMACS_INT oldstart = search_regs.start[sub];
2868     EMACS_INT change = newpoint - search_regs.end[sub];
2869     int i;
2870
2871     for (i = 0; i < search_regs.num_regs; i++)
2872       {
2873         if (search_regs.start[i] >= oldend)
2874           search_regs.start[i] += change;
2875         else if (search_regs.start[i] > oldstart)
2876           search_regs.start[i] = oldstart;
2877         if (search_regs.end[i] >= oldend)
2878           search_regs.end[i] += change;
2879         else if (search_regs.end[i] > oldstart)
2880           search_regs.end[i] = oldstart;
2881       }
2882   }
2883
2884   /* Put point back where it was in the text.  */
2885   if (opoint <= 0)
2886     TEMP_SET_PT (opoint + ZV);
2887   else
2888     TEMP_SET_PT (opoint);
2889
2890   /* Now move point "officially" to the start of the inserted replacement.  */
2891   move_if_not_intangible (newpoint);
2892
2893   return Qnil;
2894 }
2895 \f
2896 static Lisp_Object
2897 match_limit (num, beginningp)
2898      Lisp_Object num;
2899      int beginningp;
2900 {
2901   register int n;
2902
2903   CHECK_NUMBER (num);
2904   n = XINT (num);
2905   if (n < 0)
2906     args_out_of_range (num, make_number (0));
2907   if (search_regs.num_regs <= 0)
2908     error ("No match data, because no search succeeded");
2909   if (n >= search_regs.num_regs
2910       || search_regs.start[n] < 0)
2911     return Qnil;
2912   return (make_number ((beginningp) ? search_regs.start[n]
2913                                     : search_regs.end[n]));
2914 }
2915
2916 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2917        doc: /* Return position of start of text matched by last search.
2918 SUBEXP, a number, specifies which parenthesized expression in the last
2919   regexp.
2920 Value is nil if SUBEXPth pair didn't match, or there were less than
2921   SUBEXP pairs.
2922 Zero means the entire text matched by the whole regexp or whole string.  */)
2923      (subexp)
2924      Lisp_Object subexp;
2925 {
2926   return match_limit (subexp, 1);
2927 }
2928
2929 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2930        doc: /* Return position of end of text matched by last search.
2931 SUBEXP, a number, specifies which parenthesized expression in the last
2932   regexp.
2933 Value is nil if SUBEXPth pair didn't match, or there were less than
2934   SUBEXP pairs.
2935 Zero means the entire text matched by the whole regexp or whole string.  */)
2936      (subexp)
2937      Lisp_Object subexp;
2938 {
2939   return match_limit (subexp, 0);
2940 }
2941
2942 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2943        doc: /* Return a list containing all info on what the last search matched.
2944 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2945 All the elements are markers or nil (nil if the Nth pair didn't match)
2946 if the last match was on a buffer; integers or nil if a string was matched.
2947 Use `set-match-data' to reinstate the data in this list.
2948
2949 If INTEGERS (the optional first argument) is non-nil, always use
2950 integers \(rather than markers) to represent buffer positions.  In
2951 this case, and if the last match was in a buffer, the buffer will get
2952 stored as one additional element at the end of the list.
2953
2954 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2955 enough to hold all the values, and if INTEGERS is non-nil, no consing
2956 is done.
2957
2958 If optional third arg RESEAT is non-nil, any previous markers on the
2959 REUSE list will be modified to point to nowhere.
2960
2961 Return value is undefined if the last search failed.  */)
2962   (integers, reuse, reseat)
2963      Lisp_Object integers, reuse, reseat;
2964 {
2965   Lisp_Object tail, prev;
2966   Lisp_Object *data;
2967   int i, len;
2968
2969   if (!NILP (reseat))
2970     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2971       if (MARKERP (XCAR (tail)))
2972         {
2973           unchain_marker (XMARKER (XCAR (tail)));
2974           XSETCAR (tail, Qnil);
2975         }
2976
2977   if (NILP (last_thing_searched))
2978     return Qnil;
2979
2980   prev = Qnil;
2981
2982   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2983                                  * sizeof (Lisp_Object));
2984
2985   len = 0;
2986   for (i = 0; i < search_regs.num_regs; i++)
2987     {
2988       int start = search_regs.start[i];
2989       if (start >= 0)
2990         {
2991           if (EQ (last_thing_searched, Qt)
2992               || ! NILP (integers))
2993             {
2994               XSETFASTINT (data[2 * i], start);
2995               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2996             }
2997           else if (BUFFERP (last_thing_searched))
2998             {
2999               data[2 * i] = Fmake_marker ();
3000               Fset_marker (data[2 * i],
3001                            make_number (start),
3002                            last_thing_searched);
3003               data[2 * i + 1] = Fmake_marker ();
3004               Fset_marker (data[2 * i + 1],
3005                            make_number (search_regs.end[i]),
3006                            last_thing_searched);
3007             }
3008           else
3009             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
3010             abort ();
3011
3012           len = 2 * i + 2;
3013         }
3014       else
3015         data[2 * i] = data[2 * i + 1] = Qnil;
3016     }
3017
3018   if (BUFFERP (last_thing_searched) && !NILP (integers))
3019     {
3020       data[len] = last_thing_searched;
3021       len++;
3022     }
3023
3024   /* If REUSE is not usable, cons up the values and return them.  */
3025   if (! CONSP (reuse))
3026     return Flist (len, data);
3027
3028   /* If REUSE is a list, store as many value elements as will fit
3029      into the elements of REUSE.  */
3030   for (i = 0, tail = reuse; CONSP (tail);
3031        i++, tail = XCDR (tail))
3032     {
3033       if (i < len)
3034         XSETCAR (tail, data[i]);
3035       else
3036         XSETCAR (tail, Qnil);
3037       prev = tail;
3038     }
3039
3040   /* If we couldn't fit all value elements into REUSE,
3041      cons up the rest of them and add them to the end of REUSE.  */
3042   if (i < len)
3043     XSETCDR (prev, Flist (len - i, data + i));
3044
3045   return reuse;
3046 }
3047
3048 /* We used to have an internal use variant of `reseat' described as:
3049
3050       If RESEAT is `evaporate', put the markers back on the free list
3051       immediately.  No other references to the markers must exist in this
3052       case, so it is used only internally on the unwind stack and
3053       save-match-data from Lisp.
3054
3055    But it was ill-conceived: those supposedly-internal markers get exposed via
3056    the undo-list, so freeing them here is unsafe.  */
3057
3058 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3059        doc: /* Set internal data on last search match from elements of LIST.
3060 LIST should have been created by calling `match-data' previously.
3061
3062 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
3063     (list, reseat)
3064      register Lisp_Object list, reseat;
3065 {
3066   register int i;
3067   register Lisp_Object marker;
3068
3069   if (running_asynch_code)
3070     save_search_regs ();
3071
3072   CHECK_LIST (list);
3073
3074   /* Unless we find a marker with a buffer or an explicit buffer
3075      in LIST, assume that this match data came from a string.  */
3076   last_thing_searched = Qt;
3077
3078   /* Allocate registers if they don't already exist.  */
3079   {
3080     int length = XFASTINT (Flength (list)) / 2;
3081
3082     if (length > search_regs.num_regs)
3083       {
3084         if (search_regs.num_regs == 0)
3085           {
3086             search_regs.start
3087               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3088             search_regs.end
3089               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3090           }
3091         else
3092           {
3093             search_regs.start
3094               = (regoff_t *) xrealloc (search_regs.start,
3095                                        length * sizeof (regoff_t));
3096             search_regs.end
3097               = (regoff_t *) xrealloc (search_regs.end,
3098                                        length * sizeof (regoff_t));
3099           }
3100
3101         for (i = search_regs.num_regs; i < length; i++)
3102           search_regs.start[i] = -1;
3103
3104         search_regs.num_regs = length;
3105       }
3106
3107     for (i = 0; CONSP (list); i++)
3108       {
3109         marker = XCAR (list);
3110         if (BUFFERP (marker))
3111           {
3112             last_thing_searched = marker;
3113             break;
3114           }
3115         if (i >= length)
3116           break;
3117         if (NILP (marker))
3118           {
3119             search_regs.start[i] = -1;
3120             list = XCDR (list);
3121           }
3122         else
3123           {
3124             EMACS_INT from;
3125             Lisp_Object m;
3126
3127             m = marker;
3128             if (MARKERP (marker))
3129               {
3130                 if (XMARKER (marker)->buffer == 0)
3131                   XSETFASTINT (marker, 0);
3132                 else
3133                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3134               }
3135
3136             CHECK_NUMBER_COERCE_MARKER (marker);
3137             from = XINT (marker);
3138
3139             if (!NILP (reseat) && MARKERP (m))
3140               {
3141                 unchain_marker (XMARKER (m));
3142                 XSETCAR (list, Qnil);
3143               }
3144
3145             if ((list = XCDR (list), !CONSP (list)))
3146               break;
3147
3148             m = marker = XCAR (list);
3149
3150             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3151               XSETFASTINT (marker, 0);
3152
3153             CHECK_NUMBER_COERCE_MARKER (marker);
3154             search_regs.start[i] = from;
3155             search_regs.end[i] = XINT (marker);
3156
3157             if (!NILP (reseat) && MARKERP (m))
3158               {
3159                 unchain_marker (XMARKER (m));
3160                 XSETCAR (list, Qnil);
3161               }
3162           }
3163         list = XCDR (list);
3164       }
3165
3166     for (; i < search_regs.num_regs; i++)
3167       search_regs.start[i] = -1;
3168   }
3169
3170   return Qnil;
3171 }
3172
3173 /* If non-zero the match data have been saved in saved_search_regs
3174    during the execution of a sentinel or filter. */
3175 static int search_regs_saved;
3176 static struct re_registers saved_search_regs;
3177 static Lisp_Object saved_last_thing_searched;
3178
3179 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3180    if asynchronous code (filter or sentinel) is running. */
3181 static void
3182 save_search_regs ()
3183 {
3184   if (!search_regs_saved)
3185     {
3186       saved_search_regs.num_regs = search_regs.num_regs;
3187       saved_search_regs.start = search_regs.start;
3188       saved_search_regs.end = search_regs.end;
3189       saved_last_thing_searched = last_thing_searched;
3190       last_thing_searched = Qnil;
3191       search_regs.num_regs = 0;
3192       search_regs.start = 0;
3193       search_regs.end = 0;
3194
3195       search_regs_saved = 1;
3196     }
3197 }
3198
3199 /* Called upon exit from filters and sentinels. */
3200 void
3201 restore_search_regs ()
3202 {
3203   if (search_regs_saved)
3204     {
3205       if (search_regs.num_regs > 0)
3206         {
3207           xfree (search_regs.start);
3208           xfree (search_regs.end);
3209         }
3210       search_regs.num_regs = saved_search_regs.num_regs;
3211       search_regs.start = saved_search_regs.start;
3212       search_regs.end = saved_search_regs.end;
3213       last_thing_searched = saved_last_thing_searched;
3214       saved_last_thing_searched = Qnil;
3215       search_regs_saved = 0;
3216     }
3217 }
3218
3219 static Lisp_Object
3220 unwind_set_match_data (list)
3221      Lisp_Object list;
3222 {
3223   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3224   return Fset_match_data (list, Qt);
3225 }
3226
3227 /* Called to unwind protect the match data.  */
3228 void
3229 record_unwind_save_match_data ()
3230 {
3231   record_unwind_protect (unwind_set_match_data,
3232                          Fmatch_data (Qnil, Qnil, Qnil));
3233 }
3234
3235 /* Quote a string to inactivate reg-expr chars */
3236
3237 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3238        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3239      (string)
3240      Lisp_Object string;
3241 {
3242   register unsigned char *in, *out, *end;
3243   register unsigned char *temp;
3244   int backslashes_added = 0;
3245
3246   CHECK_STRING (string);
3247
3248   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3249
3250   /* Now copy the data into the new string, inserting escapes. */
3251
3252   in = SDATA (string);
3253   end = in + SBYTES (string);
3254   out = temp;
3255
3256   for (; in != end; in++)
3257     {
3258       if (*in == '['
3259           || *in == '*' || *in == '.' || *in == '\\'
3260           || *in == '?' || *in == '+'
3261           || *in == '^' || *in == '$')
3262         *out++ = '\\', backslashes_added++;
3263       *out++ = *in;
3264     }
3265
3266   return make_specified_string (temp,
3267                                 SCHARS (string) + backslashes_added,
3268                                 out - temp,
3269                                 STRING_MULTIBYTE (string));
3270 }
3271 \f
3272 void
3273 syms_of_search ()
3274 {
3275   register int i;
3276
3277   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3278     {
3279       searchbufs[i].buf.allocated = 100;
3280       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3281       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3282       searchbufs[i].regexp = Qnil;
3283       searchbufs[i].whitespace_regexp = Qnil;
3284       searchbufs[i].syntax_table = Qnil;
3285       staticpro (&searchbufs[i].regexp);
3286       staticpro (&searchbufs[i].whitespace_regexp);
3287       staticpro (&searchbufs[i].syntax_table);
3288       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3289     }
3290   searchbuf_head = &searchbufs[0];
3291
3292   Qsearch_failed = intern ("search-failed");
3293   staticpro (&Qsearch_failed);
3294   Qinvalid_regexp = intern ("invalid-regexp");
3295   staticpro (&Qinvalid_regexp);
3296
3297   Fput (Qsearch_failed, Qerror_conditions,
3298         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3299   Fput (Qsearch_failed, Qerror_message,
3300         build_string ("Search failed"));
3301
3302   Fput (Qinvalid_regexp, Qerror_conditions,
3303         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3304   Fput (Qinvalid_regexp, Qerror_message,
3305         build_string ("Invalid regexp"));
3306
3307   last_thing_searched = Qnil;
3308   staticpro (&last_thing_searched);
3309
3310   saved_last_thing_searched = Qnil;
3311   staticpro (&saved_last_thing_searched);
3312
3313   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3314       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3315 Some commands use this for user-specified regexps.
3316 Spaces that occur inside character classes or repetition operators
3317 or other such regexp constructs are not replaced with this.
3318 A value of nil (which is the normal value) means treat spaces literally.  */);
3319   Vsearch_spaces_regexp = Qnil;
3320
3321   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3322       doc: /* Internal use only.
3323 If non-nil, the primitive searching and matching functions
3324 such as `looking-at', `string-match', `re-search-forward', etc.,
3325 do not set the match data.  The proper way to use this variable
3326 is to bind it with `let' around a small expression.  */);
3327   Vinhibit_changing_match_data = Qnil;
3328
3329   defsubr (&Slooking_at);
3330   defsubr (&Sposix_looking_at);
3331   defsubr (&Sstring_match);
3332   defsubr (&Sposix_string_match);
3333   defsubr (&Ssearch_forward);
3334   defsubr (&Ssearch_backward);
3335   defsubr (&Sword_search_forward);
3336   defsubr (&Sword_search_backward);
3337   defsubr (&Sword_search_forward_lax);
3338   defsubr (&Sword_search_backward_lax);
3339   defsubr (&Sre_search_forward);
3340   defsubr (&Sre_search_backward);
3341   defsubr (&Sposix_search_forward);
3342   defsubr (&Sposix_search_backward);
3343   defsubr (&Sreplace_match);
3344   defsubr (&Smatch_beginning);
3345   defsubr (&Smatch_end);
3346   defsubr (&Smatch_data);
3347   defsubr (&Sset_match_data);
3348   defsubr (&Sregexp_quote);
3349 }
3350
3351 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3352    (do not change this comment) */