src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* error condition signaled when regexp compile_pattern fails */
  64
  65 Lisp_Object Qinvalid_regexp;
  66
  67 /* Error condition used for failing searches */
  68 Lisp_Object Qsearch_failed;
  69
  70 Lisp_Object impl_Vsearch_spaces_regexp;
  71
  72 /* If non-nil, the match data will not be changed during call to
  73    searching or matching functions.  This variable is for internal use
  74    only.  */
  75 Lisp_Object impl_Vinhibit_changing_match_data;
  76
  77 static void set_search_regs P_ ((EMACS_INT, EMACS_INT));
  78 static void save_search_regs P_ ((void));
  79 static EMACS_INT simple_search P_ ((int, unsigned char *, int, int,
  80                                     Lisp_Object, EMACS_INT, EMACS_INT,
  81                                     EMACS_INT, EMACS_INT));
  82 static EMACS_INT boyer_moore P_ ((int, unsigned char *, int, int,
  83                                   Lisp_Object, Lisp_Object,
  84                                   EMACS_INT, EMACS_INT,
  85                                   EMACS_INT, EMACS_INT, int));
  86 static EMACS_INT search_buffer P_ ((Lisp_Object, EMACS_INT, EMACS_INT,
  87                                     EMACS_INT, EMACS_INT, int, int,
  88                                     Lisp_Object, Lisp_Object, int));
  89 static void matcher_overflow () NO_RETURN;
  90
  91 static void
  92 matcher_overflow ()
  93 {
  94   error ("Stack overflow in regexp matcher");
  95 }
  96
  97 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  98    PATTERN is the pattern to compile.
  99    CP is the place to put the result.
 100    TRANSLATE is a translation table for ignoring case, or nil for none.
 101    REGP is the structure that says where to store the "register"
 102    values that will result from matching this pattern.
 103    If it is 0, we should compile the pattern not to record any
 104    subexpression bounds.
 105    POSIX is nonzero if we want full backtracking (POSIX style)
 106    for this pattern.  0 means backtrack only enough to get a valid match.
 107
 108    The behavior also depends on Vsearch_spaces_regexp.  */
 109
 110 static void
 111 compile_pattern_1 (cp, pattern, translate, regp, posix)
 112      struct regexp_cache *cp;
 113      Lisp_Object pattern;
 114      Lisp_Object translate;
 115      struct re_registers *regp;
 116      int posix;
 117 {
 118   char *val;
 119   reg_syntax_t old;
 120
 121   cp->regexp = Qnil;
 122   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 123   cp->posix = posix;
 124   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 125   cp->buf.charset_unibyte = charset_unibyte;
 126   if (STRINGP (Vsearch_spaces_regexp))
 127     cp->whitespace_regexp = Vsearch_spaces_regexp;
 128   else
 129     cp->whitespace_regexp = Qnil;
 130
 131   /* rms: I think BLOCK_INPUT is not needed here any more,
 132      because regex.c defines malloc to call xmalloc.
 133      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 134      So let's turn it off.  */
 135   /*  BLOCK_INPUT;  */
 136   old = re_set_syntax (RE_SYNTAX_EMACS
 137                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 138
 139   if (STRINGP (Vsearch_spaces_regexp))
 140     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 141   else
 142     re_set_whitespace_regexp (NULL);
 143
 144   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 145                                      SBYTES (pattern), &cp->buf);
 146
 147   /* If the compiled pattern hard codes some of the contents of the
 148      syntax-table, it can only be reused with *this* syntax table.  */
 149   cp->syntax_table = cp->buf.used_syntax ? BUF_SYNTAX_TABLE (current_buffer) : Qt;
 150
 151   re_set_whitespace_regexp (NULL);
 152
 153   re_set_syntax (old);
 154   /* UNBLOCK_INPUT;  */
 155   if (val)
 156     xsignal1 (Qinvalid_regexp, build_string (val));
 157
 158   cp->regexp = Fcopy_sequence (pattern);
 159 }
 160
 161 /* Shrink each compiled regexp buffer in the cache
 162    to the size actually used right now.
 163    This is called from garbage collection.  */
 164
 165 void
 166 shrink_regexp_cache ()
 167 {
 168   struct regexp_cache *cp;
 169
 170   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 171     {
 172       cp->buf.allocated = cp->buf.used;
 173       cp->buf.buffer
 174         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 175     }
 176 }
 177
 178 /* Clear the regexp cache w.r.t. a particular syntax table,
 179    because it was changed.
 180    There is no danger of memory leak here because re_compile_pattern
 181    automagically manages the memory in each re_pattern_buffer struct,
 182    based on its `allocated' and `buffer' values.  */
 183 void
 184 clear_regexp_cache ()
 185 {
 186   int i;
 187
 188   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 189     /* It's tempting to compare with the syntax-table we've actually changed,
 190        but it's not sufficient because char-table inheritance means that
 191        modifying one syntax-table can change others at the same time.  */
 192     if (!EQ (searchbufs[i].syntax_table, Qt))
 193       searchbufs[i].regexp = Qnil;
 194 }
 195
 196 /* Compile a regexp if necessary, but first check to see if there's one in
 197    the cache.
 198    PATTERN is the pattern to compile.
 199    TRANSLATE is a translation table for ignoring case, or nil for none.
 200    REGP is the structure that says where to store the "register"
 201    values that will result from matching this pattern.
 202    If it is 0, we should compile the pattern not to record any
 203    subexpression bounds.
 204    POSIX is nonzero if we want full backtracking (POSIX style)
 205    for this pattern.  0 means backtrack only enough to get a valid match.  */
 206
 207 struct re_pattern_buffer *
 208 compile_pattern (pattern, regp, translate, posix, multibyte)
 209      Lisp_Object pattern;
 210      struct re_registers *regp;
 211      Lisp_Object translate;
 212      int posix, multibyte;
 213 {
 214   struct regexp_cache *cp, **cpp;
 215
 216   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 217     {
 218       cp = *cpp;
 219       /* Entries are initialized to nil, and may be set to nil by
 220          compile_pattern_1 if the pattern isn't valid.  Don't apply
 221          string accessors in those cases.  However, compile_pattern_1
 222          is only applied to the cache entry we pick here to reuse.  So
 223          nil should never appear before a non-nil entry.  */
 224       if (NILP (cp->regexp))
 225         goto compile_it;
 226       if (SCHARS (cp->regexp) == SCHARS (pattern)
 227           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 228           && !NILP (Fstring_equal (cp->regexp, pattern))
 229           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 230           && cp->posix == posix
 231           && (EQ (cp->syntax_table, Qt)
 232               || EQ (cp->syntax_table, BUF_SYNTAX_TABLE (current_buffer)))
 233           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 234           && cp->buf.charset_unibyte == charset_unibyte)
 235         break;
 236
 237       /* If we're at the end of the cache, compile into the nil cell
 238          we found, or the last (least recently used) cell with a
 239          string value.  */
 240       if (cp->next == 0)
 241         {
 242         compile_it:
 243           compile_pattern_1 (cp, pattern, translate, regp, posix);
 244           break;
 245         }
 246     }
 247
 248   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 249      either because we found it in the cache or because we just compiled it.
 250      Move it to the front of the queue to mark it as most recently used.  */
 251   *cpp = cp->next;
 252   cp->next = searchbuf_head;
 253   searchbuf_head = cp;
 254
 255   /* Advise the searching functions about the space we have allocated
 256      for register data.  */
 257   if (regp)
 258     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 259
 260   /* The compiled pattern can be used both for mulitbyte and unibyte
 261      target.  But, we have to tell which the pattern is used for. */
 262   cp->buf.target_multibyte = multibyte;
 263
 264   return &cp->buf;
 265 }
 266
 267 \f
 268 static Lisp_Object
 269 looking_at_1 (string, posix)
 270      Lisp_Object string;
 271      int posix;
 272 {
 273   Lisp_Object val;
 274   unsigned char *p1, *p2;
 275   EMACS_INT s1, s2;
 276   register int i;
 277   struct re_pattern_buffer *bufp;
 278
 279   if (running_asynch_code)
 280     save_search_regs ();
 281
 282   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 283   XCHAR_TABLE (BUF_CASE_CANON_TABLE (current_buffer))->extras[2]
 284     = BUF_CASE_EQV_TABLE (current_buffer);
 285
 286   CHECK_STRING (string);
 287   bufp = compile_pattern (string,
 288                           (NILP (Vinhibit_changing_match_data)
 289                            ? &search_regs : NULL),
 290                           (!NILP (BUF_CASE_FOLD_SEARCH (current_buffer))
 291                            ? BUF_CASE_CANON_TABLE (current_buffer) : Qnil),
 292                           posix,
 293                           !NILP (BUF_ENABLE_MULTIBYTE_CHARACTERS (current_buffer)));
 294
 295   immediate_quit = 1;
 296   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 297
 298   /* Get pointers and sizes of the two strings
 299      that make up the visible portion of the buffer. */
 300
 301   p1 = BEGV_ADDR;
 302   s1 = GPT_BYTE - BEGV_BYTE;
 303   p2 = GAP_END_ADDR;
 304   s2 = ZV_BYTE - GPT_BYTE;
 305   if (s1 < 0)
 306     {
 307       p2 = p1;
 308       s2 = ZV_BYTE - BEGV_BYTE;
 309       s1 = 0;
 310     }
 311   if (s2 < 0)
 312     {
 313       s1 = ZV_BYTE - BEGV_BYTE;
 314       s2 = 0;
 315     }
 316
 317   re_match_object = Qnil;
 318
 319   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 320                   PT_BYTE - BEGV_BYTE,
 321                   (NILP (Vinhibit_changing_match_data)
 322                    ? &search_regs : NULL),
 323                   ZV_BYTE - BEGV_BYTE);
 324   immediate_quit = 0;
 325
 326   if (i == -2)
 327     matcher_overflow ();
 328
 329   val = (0 <= i ? Qt : Qnil);
 330   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 331     for (i = 0; i < search_regs.num_regs; i++)
 332       if (search_regs.start[i] >= 0)
 333         {
 334           search_regs.start[i]
 335             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 336           search_regs.end[i]
 337             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 338         }
 339
 340   /* Set last_thing_searched only when match data is changed.  */
 341   if (NILP (Vinhibit_changing_match_data))
 342     XSETBUFFER (last_thing_searched, current_buffer);
 343
 344   return val;
 345 }
 346
 347 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 348        doc: /* Return t if text after point matches regular expression REGEXP.
 349 This function modifies the match data that `match-beginning',
 350 `match-end' and `match-data' access; save and restore the match
 351 data if you want to preserve them.  */)
 352      (regexp)
 353      Lisp_Object regexp;
 354 {
 355   return looking_at_1 (regexp, 0);
 356 }
 357
 358 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 359        doc: /* Return t if text after point matches regular expression REGEXP.
 360 Find the longest match, in accord with Posix regular expression rules.
 361 This function modifies the match data that `match-beginning',
 362 `match-end' and `match-data' access; save and restore the match
 363 data if you want to preserve them.  */)
 364      (regexp)
 365      Lisp_Object regexp;
 366 {
 367   return looking_at_1 (regexp, 1);
 368 }
 369 \f
 370 static Lisp_Object
 371 string_match_1 (regexp, string, start, posix)
 372      Lisp_Object regexp, string, start;
 373      int posix;
 374 {
 375   int val;
 376   struct re_pattern_buffer *bufp;
 377   EMACS_INT pos, pos_byte;
 378   int i;
 379
 380   if (running_asynch_code)
 381     save_search_regs ();
 382
 383   CHECK_STRING (regexp);
 384   CHECK_STRING (string);
 385
 386   if (NILP (start))
 387     pos = 0, pos_byte = 0;
 388   else
 389     {
 390       int len = SCHARS (string);
 391
 392       CHECK_NUMBER (start);
 393       pos = XINT (start);
 394       if (pos < 0 && -pos <= len)
 395         pos = len + pos;
 396       else if (0 > pos || pos > len)
 397         args_out_of_range (string, start);
 398       pos_byte = string_char_to_byte (string, pos);
 399     }
 400
 401   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 402   XCHAR_TABLE (BUF_CASE_CANON_TABLE (current_buffer))->extras[2]
 403     = BUF_CASE_EQV_TABLE (current_buffer);
 404
 405   bufp = compile_pattern (regexp,
 406                           (NILP (Vinhibit_changing_match_data)
 407                            ? &search_regs : NULL),
 408                           (!NILP (BUF_CASE_FOLD_SEARCH (current_buffer))
 409                            ? BUF_CASE_CANON_TABLE (current_buffer) : Qnil),
 410                           posix,
 411                           STRING_MULTIBYTE (string));
 412   immediate_quit = 1;
 413   re_match_object = string;
 414
 415   val = re_search (bufp, (char *) SDATA (string),
 416                    SBYTES (string), pos_byte,
 417                    SBYTES (string) - pos_byte,
 418                    (NILP (Vinhibit_changing_match_data)
 419                     ? &search_regs : NULL));
 420   immediate_quit = 0;
 421
 422   /* Set last_thing_searched only when match data is changed.  */
 423   if (NILP (Vinhibit_changing_match_data))
 424     last_thing_searched = Qt;
 425
 426   if (val == -2)
 427     matcher_overflow ();
 428   if (val < 0) return Qnil;
 429
 430   if (NILP (Vinhibit_changing_match_data))
 431     for (i = 0; i < search_regs.num_regs; i++)
 432       if (search_regs.start[i] >= 0)
 433         {
 434           search_regs.start[i]
 435             = string_byte_to_char (string, search_regs.start[i]);
 436           search_regs.end[i]
 437             = string_byte_to_char (string, search_regs.end[i]);
 438         }
 439
 440   return make_number (string_byte_to_char (string, val));
 441 }
 442
 443 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 444        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 445 Matching ignores case if `case-fold-search' is non-nil.
 446 If third arg START is non-nil, start search at that index in STRING.
 447 For index of first char beyond the match, do (match-end 0).
 448 `match-end' and `match-beginning' also give indices of substrings
 449 matched by parenthesis constructs in the pattern.
 450
 451 You can use the function `match-string' to extract the substrings
 452 matched by the parenthesis constructions in REGEXP. */)
 453      (regexp, string, start)
 454      Lisp_Object regexp, string, start;
 455 {
 456   return string_match_1 (regexp, string, start, 0);
 457 }
 458
 459 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 460        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 461 Find the longest match, in accord with Posix regular expression rules.
 462 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 463 If third arg START is non-nil, start search at that index in STRING.
 464 For index of first char beyond the match, do (match-end 0).
 465 `match-end' and `match-beginning' also give indices of substrings
 466 matched by parenthesis constructs in the pattern.  */)
 467      (regexp, string, start)
 468      Lisp_Object regexp, string, start;
 469 {
 470   return string_match_1 (regexp, string, start, 1);
 471 }
 472
 473 /* Match REGEXP against STRING, searching all of STRING,
 474    and return the index of the match, or negative on failure.
 475    This does not clobber the match data.  */
 476
 477 int
 478 fast_string_match (regexp, string)
 479      Lisp_Object regexp, string;
 480 {
 481   int val;
 482   struct re_pattern_buffer *bufp;
 483
 484   bufp = compile_pattern (regexp, 0, Qnil,
 485                           0, STRING_MULTIBYTE (string));
 486   immediate_quit = 1;
 487   re_match_object = string;
 488
 489   val = re_search (bufp, (char *) SDATA (string),
 490                    SBYTES (string), 0,
 491                    SBYTES (string), 0);
 492   immediate_quit = 0;
 493   return val;
 494 }
 495
 496 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 497    and return the index of the match, or negative on failure.
 498    This does not clobber the match data.
 499    We assume that STRING contains single-byte characters.  */
 500
 501 extern Lisp_Object Vascii_downcase_table;
 502
 503 int
 504 fast_c_string_match_ignore_case (regexp, string)
 505      Lisp_Object regexp;
 506      const char *string;
 507 {
 508   int val;
 509   struct re_pattern_buffer *bufp;
 510   int len = strlen (string);
 511
 512   regexp = string_make_unibyte (regexp);
 513   re_match_object = Qt;
 514   bufp = compile_pattern (regexp, 0,
 515                           Vascii_canon_table, 0,
 516                           0);
 517   immediate_quit = 1;
 518   val = re_search (bufp, string, len, 0, len, 0);
 519   immediate_quit = 0;
 520   return val;
 521 }
 522
 523 /* Like fast_string_match but ignore case.  */
 524
 525 int
 526 fast_string_match_ignore_case (regexp, string)
 527      Lisp_Object regexp, string;
 528 {
 529   int val;
 530   struct re_pattern_buffer *bufp;
 531
 532   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 533                           0, STRING_MULTIBYTE (string));
 534   immediate_quit = 1;
 535   re_match_object = string;
 536
 537   val = re_search (bufp, (char *) SDATA (string),
 538                    SBYTES (string), 0,
 539                    SBYTES (string), 0);
 540   immediate_quit = 0;
 541   return val;
 542 }
 543 \f
 544 /* Match REGEXP against the characters after POS to LIMIT, and return
 545    the number of matched characters.  If STRING is non-nil, match
 546    against the characters in it.  In that case, POS and LIMIT are
 547    indices into the string.  This function doesn't modify the match
 548    data.  */
 549
 550 EMACS_INT
 551 fast_looking_at (regexp, pos, pos_byte, limit, limit_byte, string)
 552      Lisp_Object regexp;
 553      EMACS_INT pos, pos_byte, limit, limit_byte;
 554      Lisp_Object string;
 555 {
 556   int multibyte;
 557   struct re_pattern_buffer *buf;
 558   unsigned char *p1, *p2;
 559   EMACS_INT s1, s2;
 560   EMACS_INT len;
 561
 562   if (STRINGP (string))
 563     {
 564       if (pos_byte < 0)
 565         pos_byte = string_char_to_byte (string, pos);
 566       if (limit_byte < 0)
 567         limit_byte = string_char_to_byte (string, limit);
 568       p1 = NULL;
 569       s1 = 0;
 570       p2 = SDATA (string);
 571       s2 = SBYTES (string);
 572       re_match_object = string;
 573       multibyte = STRING_MULTIBYTE (string);
 574     }
 575   else
 576     {
 577       if (pos_byte < 0)
 578         pos_byte = CHAR_TO_BYTE (pos);
 579       if (limit_byte < 0)
 580         limit_byte = CHAR_TO_BYTE (limit);
 581       pos_byte -= BEGV_BYTE;
 582       limit_byte -= BEGV_BYTE;
 583       p1 = BEGV_ADDR;
 584       s1 = GPT_BYTE - BEGV_BYTE;
 585       p2 = GAP_END_ADDR;
 586       s2 = ZV_BYTE - GPT_BYTE;
 587       if (s1 < 0)
 588         {
 589           p2 = p1;
 590           s2 = ZV_BYTE - BEGV_BYTE;
 591           s1 = 0;
 592         }
 593       if (s2 < 0)
 594         {
 595           s1 = ZV_BYTE - BEGV_BYTE;
 596           s2 = 0;
 597         }
 598       re_match_object = Qnil;
 599       multibyte = ! NILP (BUF_ENABLE_MULTIBYTE_CHARACTERS (current_buffer));
 600     }
 601
 602   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 603   immediate_quit = 1;
 604   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 605                     pos_byte, NULL, limit_byte);
 606   immediate_quit = 0;
 607
 608   return len;
 609 }
 610
 611 \f
 612 /* The newline cache: remembering which sections of text have no newlines.  */
 613
 614 /* If the user has requested newline caching, make sure it's on.
 615    Otherwise, make sure it's off.
 616    This is our cheezy way of associating an action with the change of
 617    state of a buffer-local variable.  */
 618 static void
 619 newline_cache_on_off (buf)
 620      struct buffer *buf;
 621 {
 622   if (NILP (BUF_CACHE_LONG_LINE_SCANS (buf)))
 623     {
 624       /* It should be off.  */
 625       if (buf->newline_cache)
 626         {
 627           free_region_cache (buf->newline_cache);
 628           buf->newline_cache = 0;
 629         }
 630     }
 631   else
 632     {
 633       /* It should be on.  */
 634       if (buf->newline_cache == 0)
 635         buf->newline_cache = new_region_cache ();
 636     }
 637 }
 638
 639 \f
 640 /* Search for COUNT instances of the character TARGET between START and END.
 641
 642    If COUNT is positive, search forwards; END must be >= START.
 643    If COUNT is negative, search backwards for the -COUNTth instance;
 644       END must be <= START.
 645    If COUNT is zero, do anything you please; run rogue, for all I care.
 646
 647    If END is zero, use BEGV or ZV instead, as appropriate for the
 648    direction indicated by COUNT.
 649
 650    If we find COUNT instances, set *SHORTAGE to zero, and return the
 651    position past the COUNTth match.  Note that for reverse motion
 652    this is not the same as the usual convention for Emacs motion commands.
 653
 654    If we don't find COUNT instances before reaching END, set *SHORTAGE
 655    to the number of TARGETs left unfound, and return END.
 656
 657    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 658    except when inside redisplay.  */
 659
 660 int
 661 scan_buffer (target, start, end, count, shortage, allow_quit)
 662      register int target;
 663      EMACS_INT start, end;
 664      int count;
 665      int *shortage;
 666      int allow_quit;
 667 {
 668   struct region_cache *newline_cache;
 669   int direction;
 670
 671   if (count > 0)
 672     {
 673       direction = 1;
 674       if (! end) end = ZV;
 675     }
 676   else
 677     {
 678       direction = -1;
 679       if (! end) end = BEGV;
 680     }
 681
 682   newline_cache_on_off (current_buffer);
 683   newline_cache = current_buffer->newline_cache;
 684
 685   if (shortage != 0)
 686     *shortage = 0;
 687
 688   immediate_quit = allow_quit;
 689
 690   if (count > 0)
 691     while (start != end)
 692       {
 693         /* Our innermost scanning loop is very simple; it doesn't know
 694            about gaps, buffer ends, or the newline cache.  ceiling is
 695            the position of the last character before the next such
 696            obstacle --- the last character the dumb search loop should
 697            examine.  */
 698         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 699         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 700         EMACS_INT tem;
 701
 702         /* If we're looking for a newline, consult the newline cache
 703            to see where we can avoid some scanning.  */
 704         if (target == '\n' && newline_cache)
 705           {
 706             int next_change;
 707             immediate_quit = 0;
 708             while (region_cache_forward
 709                    (current_buffer, newline_cache, start_byte, &next_change))
 710               start_byte = next_change;
 711             immediate_quit = allow_quit;
 712
 713             /* START should never be after END.  */
 714             if (start_byte > ceiling_byte)
 715               start_byte = ceiling_byte;
 716
 717             /* Now the text after start is an unknown region, and
 718                next_change is the position of the next known region. */
 719             ceiling_byte = min (next_change - 1, ceiling_byte);
 720           }
 721
 722         /* The dumb loop can only scan text stored in contiguous
 723            bytes. BUFFER_CEILING_OF returns the last character
 724            position that is contiguous, so the ceiling is the
 725            position after that.  */
 726         tem = BUFFER_CEILING_OF (start_byte);
 727         ceiling_byte = min (tem, ceiling_byte);
 728
 729         {
 730           /* The termination address of the dumb loop.  */
 731           register unsigned char *ceiling_addr
 732             = BYTE_POS_ADDR (ceiling_byte) + 1;
 733           register unsigned char *cursor
 734             = BYTE_POS_ADDR (start_byte);
 735           unsigned char *base = cursor;
 736
 737           while (cursor < ceiling_addr)
 738             {
 739               unsigned char *scan_start = cursor;
 740
 741               /* The dumb loop.  */
 742               while (*cursor != target && ++cursor < ceiling_addr)
 743                 ;
 744
 745               /* If we're looking for newlines, cache the fact that
 746                  the region from start to cursor is free of them. */
 747               if (target == '\n' && newline_cache)
 748                 know_region_cache (current_buffer, newline_cache,
 749                                    start_byte + scan_start - base,
 750                                    start_byte + cursor - base);
 751
 752               /* Did we find the target character?  */
 753               if (cursor < ceiling_addr)
 754                 {
 755                   if (--count == 0)
 756                     {
 757                       immediate_quit = 0;
 758                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 759                     }
 760                   cursor++;
 761                 }
 762             }
 763
 764           start = BYTE_TO_CHAR (start_byte + cursor - base);
 765         }
 766       }
 767   else
 768     while (start > end)
 769       {
 770         /* The last character to check before the next obstacle.  */
 771         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 772         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 773         EMACS_INT tem;
 774
 775         /* Consult the newline cache, if appropriate.  */
 776         if (target == '\n' && newline_cache)
 777           {
 778             int next_change;
 779             immediate_quit = 0;
 780             while (region_cache_backward
 781                    (current_buffer, newline_cache, start_byte, &next_change))
 782               start_byte = next_change;
 783             immediate_quit = allow_quit;
 784
 785             /* Start should never be at or before end.  */
 786             if (start_byte <= ceiling_byte)
 787               start_byte = ceiling_byte + 1;
 788
 789             /* Now the text before start is an unknown region, and
 790                next_change is the position of the next known region. */
 791             ceiling_byte = max (next_change, ceiling_byte);
 792           }
 793
 794         /* Stop scanning before the gap.  */
 795         tem = BUFFER_FLOOR_OF (start_byte - 1);
 796         ceiling_byte = max (tem, ceiling_byte);
 797
 798         {
 799           /* The termination address of the dumb loop.  */
 800           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 801           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 802           unsigned char *base = cursor;
 803
 804           while (cursor >= ceiling_addr)
 805             {
 806               unsigned char *scan_start = cursor;
 807
 808               while (*cursor != target && --cursor >= ceiling_addr)
 809                 ;
 810
 811               /* If we're looking for newlines, cache the fact that
 812                  the region from after the cursor to start is free of them.  */
 813               if (target == '\n' && newline_cache)
 814                 know_region_cache (current_buffer, newline_cache,
 815                                    start_byte + cursor - base,
 816                                    start_byte + scan_start - base);
 817
 818               /* Did we find the target character?  */
 819               if (cursor >= ceiling_addr)
 820                 {
 821                   if (++count >= 0)
 822                     {
 823                       immediate_quit = 0;
 824                       return BYTE_TO_CHAR (start_byte + cursor - base);
 825                     }
 826                   cursor--;
 827                 }
 828             }
 829
 830           start = BYTE_TO_CHAR (start_byte + cursor - base);
 831         }
 832       }
 833
 834   immediate_quit = 0;
 835   if (shortage != 0)
 836     *shortage = count * direction;
 837   return start;
 838 }
 839 \f
 840 /* Search for COUNT instances of a line boundary, which means either a
 841    newline or (if selective display enabled) a carriage return.
 842    Start at START.  If COUNT is negative, search backwards.
 843
 844    We report the resulting position by calling TEMP_SET_PT_BOTH.
 845
 846    If we find COUNT instances. we position after (always after,
 847    even if scanning backwards) the COUNTth match, and return 0.
 848
 849    If we don't find COUNT instances before reaching the end of the
 850    buffer (or the beginning, if scanning backwards), we return
 851    the number of line boundaries left unfound, and position at
 852    the limit we bumped up against.
 853
 854    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 855    except in special cases.  */
 856
 857 int
 858 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 859      EMACS_INT start, start_byte;
 860      EMACS_INT limit, limit_byte;
 861      register int count;
 862      int allow_quit;
 863 {
 864   int direction = ((count > 0) ? 1 : -1);
 865
 866   register unsigned char *cursor;
 867   unsigned char *base;
 868
 869   EMACS_INT ceiling;
 870   register unsigned char *ceiling_addr;
 871
 872   int old_immediate_quit = immediate_quit;
 873
 874   /* The code that follows is like scan_buffer
 875      but checks for either newline or carriage return.  */
 876
 877   if (allow_quit)
 878     immediate_quit++;
 879
 880   start_byte = CHAR_TO_BYTE (start);
 881
 882   if (count > 0)
 883     {
 884       while (start_byte < limit_byte)
 885         {
 886           ceiling =  BUFFER_CEILING_OF (start_byte);
 887           ceiling = min (limit_byte - 1, ceiling);
 888           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 889           base = (cursor = BYTE_POS_ADDR (start_byte));
 890           while (1)
 891             {
 892               while (*cursor != '\n' && ++cursor != ceiling_addr)
 893                 ;
 894
 895               if (cursor != ceiling_addr)
 896                 {
 897                   if (--count == 0)
 898                     {
 899                       immediate_quit = old_immediate_quit;
 900                       start_byte = start_byte + cursor - base + 1;
 901                       start = BYTE_TO_CHAR (start_byte);
 902                       TEMP_SET_PT_BOTH (start, start_byte);
 903                       return 0;
 904                     }
 905                   else
 906                     if (++cursor == ceiling_addr)
 907                       break;
 908                 }
 909               else
 910                 break;
 911             }
 912           start_byte += cursor - base;
 913         }
 914     }
 915   else
 916     {
 917       while (start_byte > limit_byte)
 918         {
 919           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 920           ceiling = max (limit_byte, ceiling);
 921           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 922           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 923           while (1)
 924             {
 925               while (--cursor != ceiling_addr && *cursor != '\n')
 926                 ;
 927
 928               if (cursor != ceiling_addr)
 929                 {
 930                   if (++count == 0)
 931                     {
 932                       immediate_quit = old_immediate_quit;
 933                       /* Return the position AFTER the match we found.  */
 934                       start_byte = start_byte + cursor - base + 1;
 935                       start = BYTE_TO_CHAR (start_byte);
 936                       TEMP_SET_PT_BOTH (start, start_byte);
 937                       return 0;
 938                     }
 939                 }
 940               else
 941                 break;
 942             }
 943           /* Here we add 1 to compensate for the last decrement
 944              of CURSOR, which took it past the valid range.  */
 945           start_byte += cursor - base + 1;
 946         }
 947     }
 948
 949   TEMP_SET_PT_BOTH (limit, limit_byte);
 950   immediate_quit = old_immediate_quit;
 951
 952   return count * direction;
 953 }
 954
 955 int
 956 find_next_newline_no_quit (from, cnt)
 957      EMACS_INT from;
 958      int cnt;
 959 {
 960   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 961 }
 962
 963 /* Like find_next_newline, but returns position before the newline,
 964    not after, and only search up to TO.  This isn't just
 965    find_next_newline (...)-1, because you might hit TO.  */
 966
 967 int
 968 find_before_next_newline (from, to, cnt)
 969      EMACS_INT from, to;
 970      int cnt;
 971 {
 972   int shortage;
 973   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 974
 975   if (shortage == 0)
 976     pos--;
 977
 978   return pos;
 979 }
 980 \f
 981 /* Subroutines of Lisp buffer search functions. */
 982
 983 static Lisp_Object
 984 search_command (string, bound, noerror, count, direction, RE, posix)
 985      Lisp_Object string, bound, noerror, count;
 986      int direction;
 987      int RE;
 988      int posix;
 989 {
 990   register int np;
 991   int lim, lim_byte;
 992   int n = direction;
 993
 994   if (!NILP (count))
 995     {
 996       CHECK_NUMBER (count);
 997       n *= XINT (count);
 998     }
 999
1000   CHECK_STRING (string);
1001   if (NILP (bound))
1002     {
1003       if (n > 0)
1004         lim = ZV, lim_byte = ZV_BYTE;
1005       else
1006         lim = BEGV, lim_byte = BEGV_BYTE;
1007     }
1008   else
1009     {
1010       CHECK_NUMBER_COERCE_MARKER (bound);
1011       lim = XINT (bound);
1012       if (n > 0 ? lim < PT : lim > PT)
1013         error ("Invalid search bound (wrong side of point)");
1014       if (lim > ZV)
1015         lim = ZV, lim_byte = ZV_BYTE;
1016       else if (lim < BEGV)
1017         lim = BEGV, lim_byte = BEGV_BYTE;
1018       else
1019         lim_byte = CHAR_TO_BYTE (lim);
1020     }
1021
1022   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1023   XCHAR_TABLE (BUF_CASE_CANON_TABLE (current_buffer))->extras[2]
1024     = BUF_CASE_EQV_TABLE (current_buffer);
1025
1026   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1027                       (!NILP (BUF_CASE_FOLD_SEARCH (current_buffer))
1028                        ? BUF_CASE_CANON_TABLE (current_buffer)
1029                        : Qnil),
1030                       (!NILP (BUF_CASE_FOLD_SEARCH (current_buffer))
1031                        ? BUF_CASE_EQV_TABLE (current_buffer)
1032                        : Qnil),
1033                       posix);
1034   if (np <= 0)
1035     {
1036       if (NILP (noerror))
1037         xsignal1 (Qsearch_failed, string);
1038
1039       if (!EQ (noerror, Qt))
1040         {
1041           if (lim < BEGV || lim > ZV)
1042             abort ();
1043           SET_PT_BOTH (lim, lim_byte);
1044           return Qnil;
1045 #if 0 /* This would be clean, but maybe programs depend on
1046          a value of nil here.  */
1047           np = lim;
1048 #endif
1049         }
1050       else
1051         return Qnil;
1052     }
1053
1054   if (np < BEGV || np > ZV)
1055     abort ();
1056
1057   SET_PT (np);
1058
1059   return make_number (np);
1060 }
1061 \f
1062 /* Return 1 if REGEXP it matches just one constant string.  */
1063
1064 static int
1065 trivial_regexp_p (regexp)
1066      Lisp_Object regexp;
1067 {
1068   int len = SBYTES (regexp);
1069   unsigned char *s = SDATA (regexp);
1070   while (--len >= 0)
1071     {
1072       switch (*s++)
1073         {
1074         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1075           return 0;
1076         case '\\':
1077           if (--len < 0)
1078             return 0;
1079           switch (*s++)
1080             {
1081             case '|': case '(': case ')': case '`': case '\'': case 'b':
1082             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1083             case 'S': case '=': case '{': case '}': case '_':
1084             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1085             case '1': case '2': case '3': case '4': case '5':
1086             case '6': case '7': case '8': case '9':
1087               return 0;
1088             }
1089         }
1090     }
1091   return 1;
1092 }
1093
1094 /* Search for the n'th occurrence of STRING in the current buffer,
1095    starting at position POS and stopping at position LIM,
1096    treating STRING as a literal string if RE is false or as
1097    a regular expression if RE is true.
1098
1099    If N is positive, searching is forward and LIM must be greater than POS.
1100    If N is negative, searching is backward and LIM must be less than POS.
1101
1102    Returns -x if x occurrences remain to be found (x > 0),
1103    or else the position at the beginning of the Nth occurrence
1104    (if searching backward) or the end (if searching forward).
1105
1106    POSIX is nonzero if we want full backtracking (POSIX style)
1107    for this pattern.  0 means backtrack only enough to get a valid match.  */
1108
1109 #define TRANSLATE(out, trt, d)                  \
1110 do                                              \
1111   {                                             \
1112     if (! NILP (trt))                           \
1113       {                                         \
1114         Lisp_Object temp;                       \
1115         temp = Faref (trt, make_number (d));    \
1116         if (INTEGERP (temp))                    \
1117           out = XINT (temp);                    \
1118         else                                    \
1119           out = d;                              \
1120       }                                         \
1121     else                                        \
1122       out = d;                                  \
1123   }                                             \
1124 while (0)
1125
1126 /* Only used in search_buffer, to record the end position of the match
1127    when searching regexps and SEARCH_REGS should not be changed
1128    (i.e. Vinhibit_changing_match_data is non-nil).  */
1129 static struct re_registers search_regs_1;
1130
1131 static EMACS_INT
1132 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1133                RE, trt, inverse_trt, posix)
1134      Lisp_Object string;
1135      EMACS_INT pos;
1136      EMACS_INT pos_byte;
1137      EMACS_INT lim;
1138      EMACS_INT lim_byte;
1139      int n;
1140      int RE;
1141      Lisp_Object trt;
1142      Lisp_Object inverse_trt;
1143      int posix;
1144 {
1145   int len = SCHARS (string);
1146   int len_byte = SBYTES (string);
1147   register int i;
1148
1149   if (running_asynch_code)
1150     save_search_regs ();
1151
1152   /* Searching 0 times means don't move.  */
1153   /* Null string is found at starting position.  */
1154   if (len == 0 || n == 0)
1155     {
1156       set_search_regs (pos_byte, 0);
1157       return pos;
1158     }
1159
1160   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1161     {
1162       unsigned char *p1, *p2;
1163       int s1, s2;
1164       struct re_pattern_buffer *bufp;
1165
1166       bufp = compile_pattern (string,
1167                               (NILP (Vinhibit_changing_match_data)
1168                                ? &search_regs : &search_regs_1),
1169                               trt, posix,
1170                               !NILP (BUF_ENABLE_MULTIBYTE_CHARACTERS (current_buffer)));
1171
1172       immediate_quit = 1;       /* Quit immediately if user types ^G,
1173                                    because letting this function finish
1174                                    can take too long. */
1175       QUIT;                     /* Do a pending quit right away,
1176                                    to avoid paradoxical behavior */
1177       /* Get pointers and sizes of the two strings
1178          that make up the visible portion of the buffer. */
1179
1180       p1 = BEGV_ADDR;
1181       s1 = GPT_BYTE - BEGV_BYTE;
1182       p2 = GAP_END_ADDR;
1183       s2 = ZV_BYTE - GPT_BYTE;
1184       if (s1 < 0)
1185         {
1186           p2 = p1;
1187           s2 = ZV_BYTE - BEGV_BYTE;
1188           s1 = 0;
1189         }
1190       if (s2 < 0)
1191         {
1192           s1 = ZV_BYTE - BEGV_BYTE;
1193           s2 = 0;
1194         }
1195       re_match_object = Qnil;
1196
1197       while (n < 0)
1198         {
1199           int val;
1200           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1201                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1202                              (NILP (Vinhibit_changing_match_data)
1203                               ? &search_regs : &search_regs_1),
1204                              /* Don't allow match past current point */
1205                              pos_byte - BEGV_BYTE);
1206           if (val == -2)
1207             {
1208               matcher_overflow ();
1209             }
1210           if (val >= 0)
1211             {
1212               if (NILP (Vinhibit_changing_match_data))
1213                 {
1214                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1215                   for (i = 0; i < search_regs.num_regs; i++)
1216                     if (search_regs.start[i] >= 0)
1217                       {
1218                         search_regs.start[i]
1219                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1220                         search_regs.end[i]
1221                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1222                       }
1223                   XSETBUFFER (last_thing_searched, current_buffer);
1224                   /* Set pos to the new position. */
1225                   pos = search_regs.start[0];
1226                 }
1227               else
1228                 {
1229                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1230                   /* Set pos to the new position.  */
1231                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1232                 }
1233             }
1234           else
1235             {
1236               immediate_quit = 0;
1237               return (n);
1238             }
1239           n++;
1240         }
1241       while (n > 0)
1242         {
1243           int val;
1244           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1245                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1246                              (NILP (Vinhibit_changing_match_data)
1247                               ? &search_regs : &search_regs_1),
1248                              lim_byte - BEGV_BYTE);
1249           if (val == -2)
1250             {
1251               matcher_overflow ();
1252             }
1253           if (val >= 0)
1254             {
1255               if (NILP (Vinhibit_changing_match_data))
1256                 {
1257                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1258                   for (i = 0; i < search_regs.num_regs; i++)
1259                     if (search_regs.start[i] >= 0)
1260                       {
1261                         search_regs.start[i]
1262                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1263                         search_regs.end[i]
1264                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1265                       }
1266                   XSETBUFFER (last_thing_searched, current_buffer);
1267                   pos = search_regs.end[0];
1268                 }
1269               else
1270                 {
1271                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1272                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1273                 }
1274             }
1275           else
1276             {
1277               immediate_quit = 0;
1278               return (0 - n);
1279             }
1280           n--;
1281         }
1282       immediate_quit = 0;
1283       return (pos);
1284     }
1285   else                          /* non-RE case */
1286     {
1287       unsigned char *raw_pattern, *pat;
1288       int raw_pattern_size;
1289       int raw_pattern_size_byte;
1290       unsigned char *patbuf;
1291       int multibyte = !NILP (BUF_ENABLE_MULTIBYTE_CHARACTERS (current_buffer));
1292       unsigned char *base_pat;
1293       /* Set to positive if we find a non-ASCII char that need
1294          translation.  Otherwise set to zero later.  */
1295       int char_base = -1;
1296       int boyer_moore_ok = 1;
1297
1298       /* MULTIBYTE says whether the text to be searched is multibyte.
1299          We must convert PATTERN to match that, or we will not really
1300          find things right.  */
1301
1302       if (multibyte == STRING_MULTIBYTE (string))
1303         {
1304           raw_pattern = (unsigned char *) SDATA (string);
1305           raw_pattern_size = SCHARS (string);
1306           raw_pattern_size_byte = SBYTES (string);
1307         }
1308       else if (multibyte)
1309         {
1310           raw_pattern_size = SCHARS (string);
1311           raw_pattern_size_byte
1312             = count_size_as_multibyte (SDATA (string),
1313                                        raw_pattern_size);
1314           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1315           copy_text (SDATA (string), raw_pattern,
1316                      SCHARS (string), 0, 1);
1317         }
1318       else
1319         {
1320           /* Converting multibyte to single-byte.
1321
1322              ??? Perhaps this conversion should be done in a special way
1323              by subtracting nonascii-insert-offset from each non-ASCII char,
1324              so that only the multibyte chars which really correspond to
1325              the chosen single-byte character set can possibly match.  */
1326           raw_pattern_size = SCHARS (string);
1327           raw_pattern_size_byte = SCHARS (string);
1328           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1329           copy_text (SDATA (string), raw_pattern,
1330                      SBYTES (string), 1, 0);
1331         }
1332
1333       /* Copy and optionally translate the pattern.  */
1334       len = raw_pattern_size;
1335       len_byte = raw_pattern_size_byte;
1336       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1337       pat = patbuf;
1338       base_pat = raw_pattern;
1339       if (multibyte)
1340         {
1341           /* Fill patbuf by translated characters in STRING while
1342              checking if we can use boyer-moore search.  If TRT is
1343              non-nil, we can use boyer-moore search only if TRT can be
1344              represented by the byte array of 256 elements.  For that,
1345              all non-ASCII case-equivalents of all case-senstive
1346              characters in STRING must belong to the same charset and
1347              row.  */
1348
1349           while (--len >= 0)
1350             {
1351               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1352               int c, translated, inverse;
1353               int in_charlen, charlen;
1354
1355               /* If we got here and the RE flag is set, it's because we're
1356                  dealing with a regexp known to be trivial, so the backslash
1357                  just quotes the next character.  */
1358               if (RE && *base_pat == '\\')
1359                 {
1360                   len--;
1361                   raw_pattern_size--;
1362                   len_byte--;
1363                   base_pat++;
1364                 }
1365
1366               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1367
1368               if (NILP (trt))
1369                 {
1370                   str = base_pat;
1371                   charlen = in_charlen;
1372                 }
1373               else
1374                 {
1375                   /* Translate the character.  */
1376                   TRANSLATE (translated, trt, c);
1377                   charlen = CHAR_STRING (translated, str_base);
1378                   str = str_base;
1379
1380                   /* Check if C has any other case-equivalents.  */
1381                   TRANSLATE (inverse, inverse_trt, c);
1382                   /* If so, check if we can use boyer-moore.  */
1383                   if (c != inverse && boyer_moore_ok)
1384                     {
1385                       /* Check if all equivalents belong to the same
1386                          group of characters.  Note that the check of C
1387                          itself is done by the last iteration.  */
1388                       int this_char_base = -1;
1389
1390                       while (boyer_moore_ok)
1391                         {
1392                           if (ASCII_BYTE_P (inverse))
1393                             {
1394                               if (this_char_base > 0)
1395                                 boyer_moore_ok = 0;
1396                               else
1397                                 this_char_base = 0;
1398                             }
1399                           else if (CHAR_BYTE8_P (inverse))
1400                             /* Boyer-moore search can't handle a
1401                                translation of an eight-bit
1402                                character.  */
1403                             boyer_moore_ok = 0;
1404                           else if (this_char_base < 0)
1405                             {
1406                               this_char_base = inverse & ~0x3F;
1407                               if (char_base < 0)
1408                                 char_base = this_char_base;
1409                               else if (this_char_base != char_base)
1410                                 boyer_moore_ok = 0;
1411                             }
1412                           else if ((inverse & ~0x3F) != this_char_base)
1413                             boyer_moore_ok = 0;
1414                           if (c == inverse)
1415                             break;
1416                           TRANSLATE (inverse, inverse_trt, inverse);
1417                         }
1418                     }
1419                 }
1420
1421               /* Store this character into the translated pattern.  */
1422               bcopy (str, pat, charlen);
1423               pat += charlen;
1424               base_pat += in_charlen;
1425               len_byte -= in_charlen;
1426             }
1427
1428           /* If char_base is still negative we didn't find any translated
1429              non-ASCII characters.  */
1430           if (char_base < 0)
1431             char_base = 0;
1432         }
1433       else
1434         {
1435           /* Unibyte buffer.  */
1436           char_base = 0;
1437           while (--len >= 0)
1438             {
1439               int c, translated;
1440
1441               /* If we got here and the RE flag is set, it's because we're
1442                  dealing with a regexp known to be trivial, so the backslash
1443                  just quotes the next character.  */
1444               if (RE && *base_pat == '\\')
1445                 {
1446                   len--;
1447                   raw_pattern_size--;
1448                   base_pat++;
1449                 }
1450               c = *base_pat++;
1451               TRANSLATE (translated, trt, c);
1452               *pat++ = translated;
1453             }
1454         }
1455
1456       len_byte = pat - patbuf;
1457       len = raw_pattern_size;
1458       pat = base_pat = patbuf;
1459
1460       if (boyer_moore_ok)
1461         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1462                             pos, pos_byte, lim, lim_byte,
1463                             char_base);
1464       else
1465         return simple_search (n, pat, len, len_byte, trt,
1466                               pos, pos_byte, lim, lim_byte);
1467     }
1468 }
1469 \f
1470 /* Do a simple string search N times for the string PAT,
1471    whose length is LEN/LEN_BYTE,
1472    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1473    TRT is the translation table.
1474
1475    Return the character position where the match is found.
1476    Otherwise, if M matches remained to be found, return -M.
1477
1478    This kind of search works regardless of what is in PAT and
1479    regardless of what is in TRT.  It is used in cases where
1480    boyer_moore cannot work.  */
1481
1482 static EMACS_INT
1483 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1484      int n;
1485      unsigned char *pat;
1486      int len, len_byte;
1487      Lisp_Object trt;
1488      EMACS_INT pos, pos_byte;
1489      EMACS_INT lim, lim_byte;
1490 {
1491   int multibyte = ! NILP (BUF_ENABLE_MULTIBYTE_CHARACTERS (current_buffer));
1492   int forward = n > 0;
1493   /* Number of buffer bytes matched.  Note that this may be different
1494      from len_byte in a multibyte buffer.  */
1495   int match_byte;
1496
1497   if (lim > pos && multibyte)
1498     while (n > 0)
1499       {
1500         while (1)
1501           {
1502             /* Try matching at position POS.  */
1503             EMACS_INT this_pos = pos;
1504             EMACS_INT this_pos_byte = pos_byte;
1505             int this_len = len;
1506             unsigned char *p = pat;
1507             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1508               goto stop;
1509
1510             while (this_len > 0)
1511               {
1512                 int charlen, buf_charlen;
1513                 int pat_ch, buf_ch;
1514
1515                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1516                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1517                                                  buf_charlen);
1518                 TRANSLATE (buf_ch, trt, buf_ch);
1519
1520                 if (buf_ch != pat_ch)
1521                   break;
1522
1523                 this_len--;
1524                 p += charlen;
1525
1526                 this_pos_byte += buf_charlen;
1527                 this_pos++;
1528               }
1529
1530             if (this_len == 0)
1531               {
1532                 match_byte = this_pos_byte - pos_byte;
1533                 pos += len;
1534                 pos_byte += match_byte;
1535                 break;
1536               }
1537
1538             INC_BOTH (pos, pos_byte);
1539           }
1540
1541         n--;
1542       }
1543   else if (lim > pos)
1544     while (n > 0)
1545       {
1546         while (1)
1547           {
1548             /* Try matching at position POS.  */
1549             EMACS_INT this_pos = pos;
1550             int this_len = len;
1551             unsigned char *p = pat;
1552
1553             if (pos + len > lim)
1554               goto stop;
1555
1556             while (this_len > 0)
1557               {
1558                 int pat_ch = *p++;
1559                 int buf_ch = FETCH_BYTE (this_pos);
1560                 TRANSLATE (buf_ch, trt, buf_ch);
1561
1562                 if (buf_ch != pat_ch)
1563                   break;
1564
1565                 this_len--;
1566                 this_pos++;
1567               }
1568
1569             if (this_len == 0)
1570               {
1571                 match_byte = len;
1572                 pos += len;
1573                 break;
1574               }
1575
1576             pos++;
1577           }
1578
1579         n--;
1580       }
1581   /* Backwards search.  */
1582   else if (lim < pos && multibyte)
1583     while (n < 0)
1584       {
1585         while (1)
1586           {
1587             /* Try matching at position POS.  */
1588             EMACS_INT this_pos = pos;
1589             EMACS_INT this_pos_byte = pos_byte;
1590             int this_len = len;
1591             const unsigned char *p = pat + len_byte;
1592
1593             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1594               goto stop;
1595
1596             while (this_len > 0)
1597               {
1598                 int charlen;
1599                 int pat_ch, buf_ch;
1600
1601                 DEC_BOTH (this_pos, this_pos_byte);
1602                 PREV_CHAR_BOUNDARY (p, pat);
1603                 pat_ch = STRING_CHAR (p);
1604                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1605                 TRANSLATE (buf_ch, trt, buf_ch);
1606
1607                 if (buf_ch != pat_ch)
1608                   break;
1609
1610                 this_len--;
1611               }
1612
1613             if (this_len == 0)
1614               {
1615                 match_byte = pos_byte - this_pos_byte;
1616                 pos = this_pos;
1617                 pos_byte = this_pos_byte;
1618                 break;
1619               }
1620
1621             DEC_BOTH (pos, pos_byte);
1622           }
1623
1624         n++;
1625       }
1626   else if (lim < pos)
1627     while (n < 0)
1628       {
1629         while (1)
1630           {
1631             /* Try matching at position POS.  */
1632             EMACS_INT this_pos = pos - len;
1633             int this_len = len;
1634             unsigned char *p = pat;
1635
1636             if (this_pos < lim)
1637               goto stop;
1638
1639             while (this_len > 0)
1640               {
1641                 int pat_ch = *p++;
1642                 int buf_ch = FETCH_BYTE (this_pos);
1643                 TRANSLATE (buf_ch, trt, buf_ch);
1644
1645                 if (buf_ch != pat_ch)
1646                   break;
1647                 this_len--;
1648                 this_pos++;
1649               }
1650
1651             if (this_len == 0)
1652               {
1653                 match_byte = len;
1654                 pos -= len;
1655                 break;
1656               }
1657
1658             pos--;
1659           }
1660
1661         n++;
1662       }
1663
1664  stop:
1665   if (n == 0)
1666     {
1667       if (forward)
1668         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1669       else
1670         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1671
1672       return pos;
1673     }
1674   else if (n > 0)
1675     return -n;
1676   else
1677     return n;
1678 }
1679 \f
1680 /* Do Boyer-Moore search N times for the string BASE_PAT,
1681    whose length is LEN/LEN_BYTE,
1682    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1683    DIRECTION says which direction we search in.
1684    TRT and INVERSE_TRT are translation tables.
1685    Characters in PAT are already translated by TRT.
1686
1687    This kind of search works if all the characters in BASE_PAT that
1688    have nontrivial translation are the same aside from the last byte.
1689    This makes it possible to translate just the last byte of a
1690    character, and do so after just a simple test of the context.
1691    CHAR_BASE is nonzero if there is such a non-ASCII character.
1692
1693    If that criterion is not satisfied, do not call this function.  */
1694
1695 static EMACS_INT
1696 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1697              pos, pos_byte, lim, lim_byte, char_base)
1698      int n;
1699      unsigned char *base_pat;
1700      int len, len_byte;
1701      Lisp_Object trt;
1702      Lisp_Object inverse_trt;
1703      EMACS_INT pos, pos_byte;
1704      EMACS_INT lim, lim_byte;
1705      int char_base;
1706 {
1707   int direction = ((n > 0) ? 1 : -1);
1708   register int dirlen;
1709   EMACS_INT limit;
1710   int stride_for_teases = 0;
1711   int BM_tab[0400];
1712   register unsigned char *cursor, *p_limit;
1713   register int i, j;
1714   unsigned char *pat, *pat_end;
1715   int multibyte = ! NILP (BUF_ENABLE_MULTIBYTE_CHARACTERS (current_buffer));
1716
1717   unsigned char simple_translate[0400];
1718   /* These are set to the preceding bytes of a byte to be translated
1719      if char_base is nonzero.  As the maximum byte length of a
1720      multibyte character is 5, we have to check at most four previous
1721      bytes.  */
1722   int translate_prev_byte1 = 0;
1723   int translate_prev_byte2 = 0;
1724   int translate_prev_byte3 = 0;
1725   int translate_prev_byte4 = 0;
1726
1727   /* The general approach is that we are going to maintain that we know
1728      the first (closest to the present position, in whatever direction
1729      we're searching) character that could possibly be the last
1730      (furthest from present position) character of a valid match.  We
1731      advance the state of our knowledge by looking at that character
1732      and seeing whether it indeed matches the last character of the
1733      pattern.  If it does, we take a closer look.  If it does not, we
1734      move our pointer (to putative last characters) as far as is
1735      logically possible.  This amount of movement, which I call a
1736      stride, will be the length of the pattern if the actual character
1737      appears nowhere in the pattern, otherwise it will be the distance
1738      from the last occurrence of that character to the end of the
1739      pattern.  If the amount is zero we have a possible match.  */
1740
1741   /* Here we make a "mickey mouse" BM table.  The stride of the search
1742      is determined only by the last character of the putative match.
1743      If that character does not match, we will stride the proper
1744      distance to propose a match that superimposes it on the last
1745      instance of a character that matches it (per trt), or misses
1746      it entirely if there is none. */
1747
1748   dirlen = len_byte * direction;
1749
1750   /* Record position after the end of the pattern.  */
1751   pat_end = base_pat + len_byte;
1752   /* BASE_PAT points to a character that we start scanning from.
1753      It is the first character in a forward search,
1754      the last character in a backward search.  */
1755   if (direction < 0)
1756     base_pat = pat_end - 1;
1757
1758   /* A character that does not appear in the pattern induces a
1759      stride equal to the pattern length.  */
1760   for (i = 0; i < 0400; i++)
1761     BM_tab[i] = dirlen;
1762
1763   /* We use this for translation, instead of TRT itself.
1764      We fill this in to handle the characters that actually
1765      occur in the pattern.  Others don't matter anyway!  */
1766   for (i = 0; i < 0400; i++)
1767     simple_translate[i] = i;
1768
1769   if (char_base)
1770     {
1771       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1772          byte following them are the target of translation.  */
1773       unsigned char str[MAX_MULTIBYTE_LENGTH];
1774       int len = CHAR_STRING (char_base, str);
1775
1776       translate_prev_byte1 = str[len - 2];
1777       if (len > 2)
1778         {
1779           translate_prev_byte2 = str[len - 3];
1780           if (len > 3)
1781             {
1782               translate_prev_byte3 = str[len - 4];
1783               if (len > 4)
1784                 translate_prev_byte4 = str[len - 5];
1785             }
1786         }
1787     }
1788
1789   i = 0;
1790   while (i != dirlen)
1791     {
1792       unsigned char *ptr = base_pat + i;
1793       i += direction;
1794       if (! NILP (trt))
1795         {
1796           /* If the byte currently looking at is the last of a
1797              character to check case-equivalents, set CH to that
1798              character.  An ASCII character and a non-ASCII character
1799              matching with CHAR_BASE are to be checked.  */
1800           int ch = -1;
1801
1802           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1803             ch = *ptr;
1804           else if (char_base
1805                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1806             {
1807               unsigned char *charstart = ptr - 1;
1808
1809               while (! (CHAR_HEAD_P (*charstart)))
1810                 charstart--;
1811               ch = STRING_CHAR (charstart);
1812               if (char_base != (ch & ~0x3F))
1813                 ch = -1;
1814             }
1815
1816           if (ch >= 0200)
1817             j = (ch & 0x3F) | 0200;
1818           else
1819             j = *ptr;
1820
1821           if (i == dirlen)
1822             stride_for_teases = BM_tab[j];
1823
1824           BM_tab[j] = dirlen - i;
1825           /* A translation table is accompanied by its inverse -- see */
1826           /* comment following downcase_table for details */
1827           if (ch >= 0)
1828             {
1829               int starting_ch = ch;
1830               int starting_j = j;
1831
1832               while (1)
1833                 {
1834                   TRANSLATE (ch, inverse_trt, ch);
1835                   if (ch >= 0200)
1836                     j = (ch & 0x3F) | 0200;
1837                   else
1838                     j = ch;
1839
1840                   /* For all the characters that map into CH,
1841                      set up simple_translate to map the last byte
1842                      into STARTING_J.  */
1843                   simple_translate[j] = starting_j;
1844                   if (ch == starting_ch)
1845                     break;
1846                   BM_tab[j] = dirlen - i;
1847                 }
1848             }
1849         }
1850       else
1851         {
1852           j = *ptr;
1853
1854           if (i == dirlen)
1855             stride_for_teases = BM_tab[j];
1856           BM_tab[j] = dirlen - i;
1857         }
1858       /* stride_for_teases tells how much to stride if we get a
1859          match on the far character but are subsequently
1860          disappointed, by recording what the stride would have been
1861          for that character if the last character had been
1862          different.  */
1863     }
1864   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1865   /* loop invariant - POS_BYTE points at where last char (first
1866      char if reverse) of pattern would align in a possible match.  */
1867   while (n != 0)
1868     {
1869       EMACS_INT tail_end;
1870       unsigned char *tail_end_ptr;
1871
1872       /* It's been reported that some (broken) compiler thinks that
1873          Boolean expressions in an arithmetic context are unsigned.
1874          Using an explicit ?1:0 prevents this.  */
1875       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1876           < 0)
1877         return (n * (0 - direction));
1878       /* First we do the part we can by pointers (maybe nothing) */
1879       QUIT;
1880       pat = base_pat;
1881       limit = pos_byte - dirlen + direction;
1882       if (direction > 0)
1883         {
1884           limit = BUFFER_CEILING_OF (limit);
1885           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1886              can take on without hitting edge of buffer or the gap.  */
1887           limit = min (limit, pos_byte + 20000);
1888           limit = min (limit, lim_byte - 1);
1889         }
1890       else
1891         {
1892           limit = BUFFER_FLOOR_OF (limit);
1893           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1894              can take on without hitting edge of buffer or the gap.  */
1895           limit = max (limit, pos_byte - 20000);
1896           limit = max (limit, lim_byte);
1897         }
1898       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1899       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1900
1901       if ((limit - pos_byte) * direction > 20)
1902         {
1903           unsigned char *p2;
1904
1905           p_limit = BYTE_POS_ADDR (limit);
1906           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1907           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1908           while (1)             /* use one cursor setting as long as i can */
1909             {
1910               if (direction > 0) /* worth duplicating */
1911                 {
1912                   while (cursor <= p_limit)
1913                     {
1914                       if (BM_tab[*cursor] == 0)
1915                         goto hit;
1916                       cursor += BM_tab[*cursor];
1917                     }
1918                 }
1919               else
1920                 {
1921                   while (cursor >= p_limit)
1922                     {
1923                       if (BM_tab[*cursor] == 0)
1924                         goto hit;
1925                       cursor += BM_tab[*cursor];
1926                     }
1927                 }
1928               /* If you are here, cursor is beyond the end of the
1929                  searched region.  You fail to match within the
1930                  permitted region and would otherwise try a character
1931                  beyond that region.  */
1932               break;
1933
1934             hit:
1935               i = dirlen - direction;
1936               if (! NILP (trt))
1937                 {
1938                   while ((i -= direction) + direction != 0)
1939                     {
1940                       int ch;
1941                       cursor -= direction;
1942                       /* Translate only the last byte of a character.  */
1943                       if (! multibyte
1944                           || ((cursor == tail_end_ptr
1945                                || CHAR_HEAD_P (cursor[1]))
1946                               && (CHAR_HEAD_P (cursor[0])
1947                                   /* Check if this is the last byte of
1948                                      a translable character.  */
1949                                   || (translate_prev_byte1 == cursor[-1]
1950                                       && (CHAR_HEAD_P (translate_prev_byte1)
1951                                           || (translate_prev_byte2 == cursor[-2]
1952                                               && (CHAR_HEAD_P (translate_prev_byte2)
1953                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1954                         ch = simple_translate[*cursor];
1955                       else
1956                         ch = *cursor;
1957                       if (pat[i] != ch)
1958                         break;
1959                     }
1960                 }
1961               else
1962                 {
1963                   while ((i -= direction) + direction != 0)
1964                     {
1965                       cursor -= direction;
1966                       if (pat[i] != *cursor)
1967                         break;
1968                     }
1969                 }
1970               cursor += dirlen - i - direction; /* fix cursor */
1971               if (i + direction == 0)
1972                 {
1973                   EMACS_INT position, start, end;
1974
1975                   cursor -= direction;
1976
1977                   position = pos_byte + cursor - p2 + ((direction > 0)
1978                                                        ? 1 - len_byte : 0);
1979                   set_search_regs (position, len_byte);
1980
1981                   if (NILP (Vinhibit_changing_match_data))
1982                     {
1983                       start = search_regs.start[0];
1984                       end = search_regs.end[0];
1985                     }
1986                   else
1987                     /* If Vinhibit_changing_match_data is non-nil,
1988                        search_regs will not be changed.  So let's
1989                        compute start and end here.  */
1990                     {
1991                       start = BYTE_TO_CHAR (position);
1992                       end = BYTE_TO_CHAR (position + len_byte);
1993                     }
1994
1995                   if ((n -= direction) != 0)
1996                     cursor += dirlen; /* to resume search */
1997                   else
1998                     return direction > 0 ? end : start;
1999                 }
2000               else
2001                 cursor += stride_for_teases; /* <sigh> we lose -  */
2002             }
2003           pos_byte += cursor - p2;
2004         }
2005       else
2006         /* Now we'll pick up a clump that has to be done the hard
2007            way because it covers a discontinuity.  */
2008         {
2009           limit = ((direction > 0)
2010                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2011                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2012           limit = ((direction > 0)
2013                    ? min (limit + len_byte, lim_byte - 1)
2014                    : max (limit - len_byte, lim_byte));
2015           /* LIMIT is now the last value POS_BYTE can have
2016              and still be valid for a possible match.  */
2017           while (1)
2018             {
2019               /* This loop can be coded for space rather than
2020                  speed because it will usually run only once.
2021                  (the reach is at most len + 21, and typically
2022                  does not exceed len).  */
2023               while ((limit - pos_byte) * direction >= 0)
2024                 {
2025                   int ch = FETCH_BYTE (pos_byte);
2026                   if (BM_tab[ch] == 0)
2027                     goto hit2;
2028                   pos_byte += BM_tab[ch];
2029                 }
2030               break;    /* ran off the end */
2031
2032             hit2:
2033               /* Found what might be a match.  */
2034               i = dirlen - direction;
2035               while ((i -= direction) + direction != 0)
2036                 {
2037                   int ch;
2038                   unsigned char *ptr;
2039                   pos_byte -= direction;
2040                   ptr = BYTE_POS_ADDR (pos_byte);
2041                   /* Translate only the last byte of a character.  */
2042                   if (! multibyte
2043                       || ((ptr == tail_end_ptr
2044                            || CHAR_HEAD_P (ptr[1]))
2045                           && (CHAR_HEAD_P (ptr[0])
2046                               /* Check if this is the last byte of a
2047                                  translable character.  */
2048                               || (translate_prev_byte1 == ptr[-1]
2049                                   && (CHAR_HEAD_P (translate_prev_byte1)
2050                                       || (translate_prev_byte2 == ptr[-2]
2051                                           && (CHAR_HEAD_P (translate_prev_byte2)
2052                                               || translate_prev_byte3 == ptr[-3])))))))
2053                     ch = simple_translate[*ptr];
2054                   else
2055                     ch = *ptr;
2056                   if (pat[i] != ch)
2057                     break;
2058                 }
2059               /* Above loop has moved POS_BYTE part or all the way
2060                  back to the first pos (last pos if reverse).
2061                  Set it once again at the last (first if reverse) char.  */
2062               pos_byte += dirlen - i - direction;
2063               if (i + direction == 0)
2064                 {
2065                   EMACS_INT position, start, end;
2066                   pos_byte -= direction;
2067
2068                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2069                   set_search_regs (position, len_byte);
2070
2071                   if (NILP (Vinhibit_changing_match_data))
2072                     {
2073                       start = search_regs.start[0];
2074                       end = search_regs.end[0];
2075                     }
2076                   else
2077                     /* If Vinhibit_changing_match_data is non-nil,
2078                        search_regs will not be changed.  So let's
2079                        compute start and end here.  */
2080                     {
2081                       start = BYTE_TO_CHAR (position);
2082                       end = BYTE_TO_CHAR (position + len_byte);
2083                     }
2084
2085                   if ((n -= direction) != 0)
2086                     pos_byte += dirlen; /* to resume search */
2087                   else
2088                     return direction > 0 ? end : start;
2089                 }
2090               else
2091                 pos_byte += stride_for_teases;
2092             }
2093           }
2094       /* We have done one clump.  Can we continue? */
2095       if ((lim_byte - pos_byte) * direction < 0)
2096         return ((0 - n) * direction);
2097     }
2098   return BYTE_TO_CHAR (pos_byte);
2099 }
2100
2101 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2102    for the overall match just found in the current buffer.
2103    Also clear out the match data for registers 1 and up.  */
2104
2105 static void
2106 set_search_regs (beg_byte, nbytes)
2107      EMACS_INT beg_byte, nbytes;
2108 {
2109   int i;
2110
2111   if (!NILP (Vinhibit_changing_match_data))
2112     return;
2113
2114   /* Make sure we have registers in which to store
2115      the match position.  */
2116   if (search_regs.num_regs == 0)
2117     {
2118       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2119       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2120       search_regs.num_regs = 2;
2121     }
2122
2123   /* Clear out the other registers.  */
2124   for (i = 1; i < search_regs.num_regs; i++)
2125     {
2126       search_regs.start[i] = -1;
2127       search_regs.end[i] = -1;
2128     }
2129
2130   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2131   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2132   XSETBUFFER (last_thing_searched, current_buffer);
2133 }
2134 \f
2135 /* Given STRING, a string of words separated by word delimiters,
2136    compute a regexp that matches those exact words separated by
2137    arbitrary punctuation.  If LAX is nonzero, the end of the string
2138    need not match a word boundary unless it ends in whitespace.  */
2139
2140 static Lisp_Object
2141 wordify (string, lax)
2142      Lisp_Object string;
2143      int lax;
2144 {
2145   register unsigned char *p, *o;
2146   register int i, i_byte, len, punct_count = 0, word_count = 0;
2147   Lisp_Object val;
2148   int prev_c = 0;
2149   int adjust, whitespace_at_end;
2150
2151   CHECK_STRING (string);
2152   p = SDATA (string);
2153   len = SCHARS (string);
2154
2155   for (i = 0, i_byte = 0; i < len; )
2156     {
2157       int c;
2158
2159       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2160
2161       if (SYNTAX (c) != Sword)
2162         {
2163           punct_count++;
2164           if (i > 0 && SYNTAX (prev_c) == Sword)
2165             word_count++;
2166         }
2167
2168       prev_c = c;
2169     }
2170
2171   if (SYNTAX (prev_c) == Sword)
2172     {
2173       word_count++;
2174       whitespace_at_end = 0;
2175     }
2176   else
2177     whitespace_at_end = 1;
2178
2179   if (!word_count)
2180     return empty_unibyte_string;
2181
2182   adjust = - punct_count + 5 * (word_count - 1)
2183     + ((lax && !whitespace_at_end) ? 2 : 4);
2184   if (STRING_MULTIBYTE (string))
2185     val = make_uninit_multibyte_string (len + adjust,
2186                                         SBYTES (string)
2187                                         + adjust);
2188   else
2189     val = make_uninit_string (len + adjust);
2190
2191   o = SDATA (val);
2192   *o++ = '\\';
2193   *o++ = 'b';
2194   prev_c = 0;
2195
2196   for (i = 0, i_byte = 0; i < len; )
2197     {
2198       int c;
2199       int i_byte_orig = i_byte;
2200
2201       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2202
2203       if (SYNTAX (c) == Sword)
2204         {
2205           bcopy (SDATA (string) + i_byte_orig, o,
2206                  i_byte - i_byte_orig);
2207           o += i_byte - i_byte_orig;
2208         }
2209       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2210         {
2211           *o++ = '\\';
2212           *o++ = 'W';
2213           *o++ = '\\';
2214           *o++ = 'W';
2215           *o++ = '*';
2216         }
2217
2218       prev_c = c;
2219     }
2220
2221   if (!lax || whitespace_at_end)
2222     {
2223       *o++ = '\\';
2224       *o++ = 'b';
2225     }
2226
2227   return val;
2228 }
2229 \f
2230 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2231        "MSearch backward: ",
2232        doc: /* Search backward from point for STRING.
2233 Set point to the beginning of the occurrence found, and return point.
2234 An optional second argument bounds the search; it is a buffer position.
2235 The match found must not extend before that position.
2236 Optional third argument, if t, means if fail just return nil (no error).
2237  If not nil and not t, position at limit of search and return nil.
2238 Optional fourth argument is repeat count--search for successive occurrences.
2239
2240 Search case-sensitivity is determined by the value of the variable
2241 `case-fold-search', which see.
2242
2243 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2244      (string, bound, noerror, count)
2245      Lisp_Object string, bound, noerror, count;
2246 {
2247   return search_command (string, bound, noerror, count, -1, 0, 0);
2248 }
2249
2250 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2251        doc: /* Search forward from point for STRING.
2252 Set point to the end of the occurrence found, and return point.
2253 An optional second argument bounds the search; it is a buffer position.
2254 The match found must not extend after that position.  A value of nil is
2255   equivalent to (point-max).
2256 Optional third argument, if t, means if fail just return nil (no error).
2257   If not nil and not t, move to limit of search and return nil.
2258 Optional fourth argument is repeat count--search for successive occurrences.
2259
2260 Search case-sensitivity is determined by the value of the variable
2261 `case-fold-search', which see.
2262
2263 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2264      (string, bound, noerror, count)
2265      Lisp_Object string, bound, noerror, count;
2266 {
2267   return search_command (string, bound, noerror, count, 1, 0, 0);
2268 }
2269
2270 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2271        "sWord search backward: ",
2272        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2273 Set point to the beginning of the occurrence found, and return point.
2274 An optional second argument bounds the search; it is a buffer position.
2275 The match found must not extend before that position.
2276 Optional third argument, if t, means if fail just return nil (no error).
2277   If not nil and not t, move to limit of search and return nil.
2278 Optional fourth argument is repeat count--search for successive occurrences.  */)
2279      (string, bound, noerror, count)
2280      Lisp_Object string, bound, noerror, count;
2281 {
2282   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2283 }
2284
2285 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2286        "sWord search: ",
2287        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2288 Set point to the end of the occurrence found, and return point.
2289 An optional second argument bounds the search; it is a buffer position.
2290 The match found must not extend after that position.
2291 Optional third argument, if t, means if fail just return nil (no error).
2292   If not nil and not t, move to limit of search and return nil.
2293 Optional fourth argument is repeat count--search for successive occurrences.  */)
2294      (string, bound, noerror, count)
2295      Lisp_Object string, bound, noerror, count;
2296 {
2297   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2298 }
2299
2300 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2301        "sWord search backward: ",
2302        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2303 Set point to the beginning of the occurrence found, and return point.
2304
2305 Unlike `word-search-backward', the end of STRING need not match a word
2306 boundary unless it ends in whitespace.
2307
2308 An optional second argument bounds the search; it is a buffer position.
2309 The match found must not extend before that position.
2310 Optional third argument, if t, means if fail just return nil (no error).
2311   If not nil and not t, move to limit of search and return nil.
2312 Optional fourth argument is repeat count--search for successive occurrences.  */)
2313      (string, bound, noerror, count)
2314      Lisp_Object string, bound, noerror, count;
2315 {
2316   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2317 }
2318
2319 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2320        "sWord search: ",
2321        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2322 Set point to the end of the occurrence found, and return point.
2323
2324 Unlike `word-search-forward', the end of STRING need not match a word
2325 boundary unless it ends in whitespace.
2326
2327 An optional second argument bounds the search; it is a buffer position.
2328 The match found must not extend after that position.
2329 Optional third argument, if t, means if fail just return nil (no error).
2330   If not nil and not t, move to limit of search and return nil.
2331 Optional fourth argument is repeat count--search for successive occurrences.  */)
2332      (string, bound, noerror, count)
2333      Lisp_Object string, bound, noerror, count;
2334 {
2335   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2336 }
2337
2338 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2339        "sRE search backward: ",
2340        doc: /* Search backward from point for match for regular expression REGEXP.
2341 Set point to the beginning of the match, and return point.
2342 The match found is the one starting last in the buffer
2343 and yet ending before the origin of the search.
2344 An optional second argument bounds the search; it is a buffer position.
2345 The match found must start at or after that position.
2346 Optional third argument, if t, means if fail just return nil (no error).
2347   If not nil and not t, move to limit of search and return nil.
2348 Optional fourth argument is repeat count--search for successive occurrences.
2349 See also the functions `match-beginning', `match-end', `match-string',
2350 and `replace-match'.  */)
2351      (regexp, bound, noerror, count)
2352      Lisp_Object regexp, bound, noerror, count;
2353 {
2354   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2355 }
2356
2357 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2358        "sRE search: ",
2359        doc: /* Search forward from point for regular expression REGEXP.
2360 Set point to the end of the occurrence found, and return point.
2361 An optional second argument bounds the search; it is a buffer position.
2362 The match found must not extend after that position.
2363 Optional third argument, if t, means if fail just return nil (no error).
2364   If not nil and not t, move to limit of search and return nil.
2365 Optional fourth argument is repeat count--search for successive occurrences.
2366 See also the functions `match-beginning', `match-end', `match-string',
2367 and `replace-match'.  */)
2368      (regexp, bound, noerror, count)
2369      Lisp_Object regexp, bound, noerror, count;
2370 {
2371   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2372 }
2373
2374 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2375        "sPosix search backward: ",
2376        doc: /* Search backward from point for match for regular expression REGEXP.
2377 Find the longest match in accord with Posix regular expression rules.
2378 Set point to the beginning of the match, and return point.
2379 The match found is the one starting last in the buffer
2380 and yet ending before the origin of the search.
2381 An optional second argument bounds the search; it is a buffer position.
2382 The match found must start at or after that position.
2383 Optional third argument, if t, means if fail just return nil (no error).
2384   If not nil and not t, move to limit of search and return nil.
2385 Optional fourth argument is repeat count--search for successive occurrences.
2386 See also the functions `match-beginning', `match-end', `match-string',
2387 and `replace-match'.  */)
2388      (regexp, bound, noerror, count)
2389      Lisp_Object regexp, bound, noerror, count;
2390 {
2391   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2392 }
2393
2394 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2395        "sPosix search: ",
2396        doc: /* Search forward from point for regular expression REGEXP.
2397 Find the longest match in accord with Posix regular expression rules.
2398 Set point to the end of the occurrence found, and return point.
2399 An optional second argument bounds the search; it is a buffer position.
2400 The match found must not extend after that position.
2401 Optional third argument, if t, means if fail just return nil (no error).
2402   If not nil and not t, move to limit of search and return nil.
2403 Optional fourth argument is repeat count--search for successive occurrences.
2404 See also the functions `match-beginning', `match-end', `match-string',
2405 and `replace-match'.  */)
2406      (regexp, bound, noerror, count)
2407      Lisp_Object regexp, bound, noerror, count;
2408 {
2409   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2410 }
2411 \f
2412 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2413        doc: /* Replace text matched by last search with NEWTEXT.
2414 Leave point at the end of the replacement text.
2415
2416 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2417 Otherwise maybe capitalize the whole text, or maybe just word initials,
2418 based on the replaced text.
2419 If the replaced text has only capital letters
2420 and has at least one multiletter word, convert NEWTEXT to all caps.
2421 Otherwise if all words are capitalized in the replaced text,
2422 capitalize each word in NEWTEXT.
2423
2424 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2425 Otherwise treat `\\' as special:
2426   `\\&' in NEWTEXT means substitute original matched text.
2427   `\\N' means substitute what matched the Nth `\\(...\\)'.
2428        If Nth parens didn't match, substitute nothing.
2429   `\\\\' means insert one `\\'.
2430 Case conversion does not apply to these substitutions.
2431
2432 FIXEDCASE and LITERAL are optional arguments.
2433
2434 The optional fourth argument STRING can be a string to modify.
2435 This is meaningful when the previous match was done against STRING,
2436 using `string-match'.  When used this way, `replace-match'
2437 creates and returns a new string made by copying STRING and replacing
2438 the part of STRING that was matched.
2439
2440 The optional fifth argument SUBEXP specifies a subexpression;
2441 it says to replace just that subexpression with NEWTEXT,
2442 rather than replacing the entire matched text.
2443 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2444 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2445 NEWTEXT in place of subexp N.
2446 This is useful only after a regular expression search or match,
2447 since only regular expressions have distinguished subexpressions.  */)
2448      (newtext, fixedcase, literal, string, subexp)
2449      Lisp_Object newtext, fixedcase, literal, string, subexp;
2450 {
2451   enum { nochange, all_caps, cap_initial } case_action;
2452   register int pos, pos_byte;
2453   int some_multiletter_word;
2454   int some_lowercase;
2455   int some_uppercase;
2456   int some_nonuppercase_initial;
2457   register int c, prevc;
2458   int sub;
2459   EMACS_INT opoint, newpoint;
2460
2461   CHECK_STRING (newtext);
2462
2463   if (! NILP (string))
2464     CHECK_STRING (string);
2465
2466   case_action = nochange;       /* We tried an initialization */
2467                                 /* but some C compilers blew it */
2468
2469   if (search_regs.num_regs <= 0)
2470     error ("`replace-match' called before any match found");
2471
2472   if (NILP (subexp))
2473     sub = 0;
2474   else
2475     {
2476       CHECK_NUMBER (subexp);
2477       sub = XINT (subexp);
2478       if (sub < 0 || sub >= search_regs.num_regs)
2479         args_out_of_range (subexp, make_number (search_regs.num_regs));
2480     }
2481
2482   if (NILP (string))
2483     {
2484       if (search_regs.start[sub] < BEGV
2485           || search_regs.start[sub] > search_regs.end[sub]
2486           || search_regs.end[sub] > ZV)
2487         args_out_of_range (make_number (search_regs.start[sub]),
2488                            make_number (search_regs.end[sub]));
2489     }
2490   else
2491     {
2492       if (search_regs.start[sub] < 0
2493           || search_regs.start[sub] > search_regs.end[sub]
2494           || search_regs.end[sub] > SCHARS (string))
2495         args_out_of_range (make_number (search_regs.start[sub]),
2496                            make_number (search_regs.end[sub]));
2497     }
2498
2499   if (NILP (fixedcase))
2500     {
2501       /* Decide how to casify by examining the matched text. */
2502       EMACS_INT last;
2503
2504       pos = search_regs.start[sub];
2505       last = search_regs.end[sub];
2506
2507       if (NILP (string))
2508         pos_byte = CHAR_TO_BYTE (pos);
2509       else
2510         pos_byte = string_char_to_byte (string, pos);
2511
2512       prevc = '\n';
2513       case_action = all_caps;
2514
2515       /* some_multiletter_word is set nonzero if any original word
2516          is more than one letter long. */
2517       some_multiletter_word = 0;
2518       some_lowercase = 0;
2519       some_nonuppercase_initial = 0;
2520       some_uppercase = 0;
2521
2522       while (pos < last)
2523         {
2524           if (NILP (string))
2525             {
2526               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2527               INC_BOTH (pos, pos_byte);
2528             }
2529           else
2530             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2531
2532           if (LOWERCASEP (c))
2533             {
2534               /* Cannot be all caps if any original char is lower case */
2535
2536               some_lowercase = 1;
2537               if (SYNTAX (prevc) != Sword)
2538                 some_nonuppercase_initial = 1;
2539               else
2540                 some_multiletter_word = 1;
2541             }
2542           else if (UPPERCASEP (c))
2543             {
2544               some_uppercase = 1;
2545               if (SYNTAX (prevc) != Sword)
2546                 ;
2547               else
2548                 some_multiletter_word = 1;
2549             }
2550           else
2551             {
2552               /* If the initial is a caseless word constituent,
2553                  treat that like a lowercase initial.  */
2554               if (SYNTAX (prevc) != Sword)
2555                 some_nonuppercase_initial = 1;
2556             }
2557
2558           prevc = c;
2559         }
2560
2561       /* Convert to all caps if the old text is all caps
2562          and has at least one multiletter word.  */
2563       if (! some_lowercase && some_multiletter_word)
2564         case_action = all_caps;
2565       /* Capitalize each word, if the old text has all capitalized words.  */
2566       else if (!some_nonuppercase_initial && some_multiletter_word)
2567         case_action = cap_initial;
2568       else if (!some_nonuppercase_initial && some_uppercase)
2569         /* Should x -> yz, operating on X, give Yz or YZ?
2570            We'll assume the latter.  */
2571         case_action = all_caps;
2572       else
2573         case_action = nochange;
2574     }
2575
2576   /* Do replacement in a string.  */
2577   if (!NILP (string))
2578     {
2579       Lisp_Object before, after;
2580
2581       before = Fsubstring (string, make_number (0),
2582                            make_number (search_regs.start[sub]));
2583       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2584
2585       /* Substitute parts of the match into NEWTEXT
2586          if desired.  */
2587       if (NILP (literal))
2588         {
2589           EMACS_INT lastpos = 0;
2590           EMACS_INT lastpos_byte = 0;
2591           /* We build up the substituted string in ACCUM.  */
2592           Lisp_Object accum;
2593           Lisp_Object middle;
2594           int length = SBYTES (newtext);
2595
2596           accum = Qnil;
2597
2598           for (pos_byte = 0, pos = 0; pos_byte < length;)
2599             {
2600               int substart = -1;
2601               int subend = 0;
2602               int delbackslash = 0;
2603
2604               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2605
2606               if (c == '\\')
2607                 {
2608                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2609
2610                   if (c == '&')
2611                     {
2612                       substart = search_regs.start[sub];
2613                       subend = search_regs.end[sub];
2614                     }
2615                   else if (c >= '1' && c <= '9')
2616                     {
2617                       if (search_regs.start[c - '0'] >= 0
2618                           && c <= search_regs.num_regs + '0')
2619                         {
2620                           substart = search_regs.start[c - '0'];
2621                           subend = search_regs.end[c - '0'];
2622                         }
2623                       else
2624                         {
2625                           /* If that subexp did not match,
2626                              replace \\N with nothing.  */
2627                           substart = 0;
2628                           subend = 0;
2629                         }
2630                     }
2631                   else if (c == '\\')
2632                     delbackslash = 1;
2633                   else
2634                     error ("Invalid use of `\\' in replacement text");
2635                 }
2636               if (substart >= 0)
2637                 {
2638                   if (pos - 2 != lastpos)
2639                     middle = substring_both (newtext, lastpos,
2640                                              lastpos_byte,
2641                                              pos - 2, pos_byte - 2);
2642                   else
2643                     middle = Qnil;
2644                   accum = concat3 (accum, middle,
2645                                    Fsubstring (string,
2646                                                make_number (substart),
2647                                                make_number (subend)));
2648                   lastpos = pos;
2649                   lastpos_byte = pos_byte;
2650                 }
2651               else if (delbackslash)
2652                 {
2653                   middle = substring_both (newtext, lastpos,
2654                                            lastpos_byte,
2655                                            pos - 1, pos_byte - 1);
2656
2657                   accum = concat2 (accum, middle);
2658                   lastpos = pos;
2659                   lastpos_byte = pos_byte;
2660                 }
2661             }
2662
2663           if (pos != lastpos)
2664             middle = substring_both (newtext, lastpos,
2665                                      lastpos_byte,
2666                                      pos, pos_byte);
2667           else
2668             middle = Qnil;
2669
2670           newtext = concat2 (accum, middle);
2671         }
2672
2673       /* Do case substitution in NEWTEXT if desired.  */
2674       if (case_action == all_caps)
2675         newtext = Fupcase (newtext);
2676       else if (case_action == cap_initial)
2677         newtext = Fupcase_initials (newtext);
2678
2679       return concat3 (before, newtext, after);
2680     }
2681
2682   /* Record point, then move (quietly) to the start of the match.  */
2683   if (PT >= search_regs.end[sub])
2684     opoint = PT - ZV;
2685   else if (PT > search_regs.start[sub])
2686     opoint = search_regs.end[sub] - ZV;
2687   else
2688     opoint = PT;
2689
2690   /* If we want non-literal replacement,
2691      perform substitution on the replacement string.  */
2692   if (NILP (literal))
2693     {
2694       int length = SBYTES (newtext);
2695       unsigned char *substed;
2696       int substed_alloc_size, substed_len;
2697       int buf_multibyte = !NILP (BUF_ENABLE_MULTIBYTE_CHARACTERS (current_buffer));
2698       int str_multibyte = STRING_MULTIBYTE (newtext);
2699       Lisp_Object rev_tbl;
2700       int really_changed = 0;
2701
2702       rev_tbl = Qnil;
2703
2704       substed_alloc_size = length * 2 + 100;
2705       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2706       substed_len = 0;
2707
2708       /* Go thru NEWTEXT, producing the actual text to insert in
2709          SUBSTED while adjusting multibyteness to that of the current
2710          buffer.  */
2711
2712       for (pos_byte = 0, pos = 0; pos_byte < length;)
2713         {
2714           unsigned char str[MAX_MULTIBYTE_LENGTH];
2715           unsigned char *add_stuff = NULL;
2716           int add_len = 0;
2717           int idx = -1;
2718
2719           if (str_multibyte)
2720             {
2721               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2722               if (!buf_multibyte)
2723                 c = multibyte_char_to_unibyte (c, rev_tbl);
2724             }
2725           else
2726             {
2727               /* Note that we don't have to increment POS.  */
2728               c = SREF (newtext, pos_byte++);
2729               if (buf_multibyte)
2730                 MAKE_CHAR_MULTIBYTE (c);
2731             }
2732
2733           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2734              or set IDX to a match index, which means put that part
2735              of the buffer text into SUBSTED.  */
2736
2737           if (c == '\\')
2738             {
2739               really_changed = 1;
2740
2741               if (str_multibyte)
2742                 {
2743                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2744                                                       pos, pos_byte);
2745                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2746                     c = multibyte_char_to_unibyte (c, rev_tbl);
2747                 }
2748               else
2749                 {
2750                   c = SREF (newtext, pos_byte++);
2751                   if (buf_multibyte)
2752                     MAKE_CHAR_MULTIBYTE (c);
2753                 }
2754
2755               if (c == '&')
2756                 idx = sub;
2757               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2758                 {
2759                   if (search_regs.start[c - '0'] >= 1)
2760                     idx = c - '0';
2761                 }
2762               else if (c == '\\')
2763                 add_len = 1, add_stuff = "\\";
2764               else
2765                 {
2766                   xfree (substed);
2767                   error ("Invalid use of `\\' in replacement text");
2768                 }
2769             }
2770           else
2771             {
2772               add_len = CHAR_STRING (c, str);
2773               add_stuff = str;
2774             }
2775
2776           /* If we want to copy part of a previous match,
2777              set up ADD_STUFF and ADD_LEN to point to it.  */
2778           if (idx >= 0)
2779             {
2780               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2781               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2782               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2783                 move_gap (search_regs.start[idx]);
2784               add_stuff = BYTE_POS_ADDR (begbyte);
2785             }
2786
2787           /* Now the stuff we want to add to SUBSTED
2788              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2789
2790           /* Make sure SUBSTED is big enough.  */
2791           if (substed_len + add_len >= substed_alloc_size)
2792             {
2793               substed_alloc_size = substed_len + add_len + 500;
2794               substed = (unsigned char *) xrealloc (substed,
2795                                                     substed_alloc_size + 1);
2796             }
2797
2798           /* Now add to the end of SUBSTED.  */
2799           if (add_stuff)
2800             {
2801               bcopy (add_stuff, substed + substed_len, add_len);
2802               substed_len += add_len;
2803             }
2804         }
2805
2806       if (really_changed)
2807         {
2808           if (buf_multibyte)
2809             {
2810               int nchars = multibyte_chars_in_text (substed, substed_len);
2811
2812               newtext = make_multibyte_string (substed, nchars, substed_len);
2813             }
2814           else
2815             newtext = make_unibyte_string (substed, substed_len);
2816         }
2817       xfree (substed);
2818     }
2819
2820   /* Replace the old text with the new in the cleanest possible way.  */
2821   replace_range (search_regs.start[sub], search_regs.end[sub],
2822                  newtext, 1, 0, 1);
2823   newpoint = search_regs.start[sub] + SCHARS (newtext);
2824
2825   if (case_action == all_caps)
2826     Fupcase_region (make_number (search_regs.start[sub]),
2827                     make_number (newpoint));
2828   else if (case_action == cap_initial)
2829     Fupcase_initials_region (make_number (search_regs.start[sub]),
2830                              make_number (newpoint));
2831
2832   /* Adjust search data for this change.  */
2833   {
2834     EMACS_INT oldend = search_regs.end[sub];
2835     EMACS_INT oldstart = search_regs.start[sub];
2836     EMACS_INT change = newpoint - search_regs.end[sub];
2837     int i;
2838
2839     for (i = 0; i < search_regs.num_regs; i++)
2840       {
2841         if (search_regs.start[i] >= oldend)
2842           search_regs.start[i] += change;
2843         else if (search_regs.start[i] > oldstart)
2844           search_regs.start[i] = oldstart;
2845         if (search_regs.end[i] >= oldend)
2846           search_regs.end[i] += change;
2847         else if (search_regs.end[i] > oldstart)
2848           search_regs.end[i] = oldstart;
2849       }
2850   }
2851
2852   /* Put point back where it was in the text.  */
2853   if (opoint <= 0)
2854     TEMP_SET_PT (opoint + ZV);
2855   else
2856     TEMP_SET_PT (opoint);
2857
2858   /* Now move point "officially" to the start of the inserted replacement.  */
2859   move_if_not_intangible (newpoint);
2860
2861   return Qnil;
2862 }
2863 \f
2864 static Lisp_Object
2865 match_limit (num, beginningp)
2866      Lisp_Object num;
2867      int beginningp;
2868 {
2869   register int n;
2870
2871   CHECK_NUMBER (num);
2872   n = XINT (num);
2873   if (n < 0)
2874     args_out_of_range (num, make_number (0));
2875   if (search_regs.num_regs <= 0)
2876     error ("No match data, because no search succeeded");
2877   if (n >= search_regs.num_regs
2878       || search_regs.start[n] < 0)
2879     return Qnil;
2880   return (make_number ((beginningp) ? search_regs.start[n]
2881                                     : search_regs.end[n]));
2882 }
2883
2884 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2885        doc: /* Return position of start of text matched by last search.
2886 SUBEXP, a number, specifies which parenthesized expression in the last
2887   regexp.
2888 Value is nil if SUBEXPth pair didn't match, or there were less than
2889   SUBEXP pairs.
2890 Zero means the entire text matched by the whole regexp or whole string.  */)
2891      (subexp)
2892      Lisp_Object subexp;
2893 {
2894   return match_limit (subexp, 1);
2895 }
2896
2897 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2898        doc: /* Return position of end of text matched by last search.
2899 SUBEXP, a number, specifies which parenthesized expression in the last
2900   regexp.
2901 Value is nil if SUBEXPth pair didn't match, or there were less than
2902   SUBEXP pairs.
2903 Zero means the entire text matched by the whole regexp or whole string.  */)
2904      (subexp)
2905      Lisp_Object subexp;
2906 {
2907   return match_limit (subexp, 0);
2908 }
2909
2910 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2911        doc: /* Return a list containing all info on what the last search matched.
2912 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2913 All the elements are markers or nil (nil if the Nth pair didn't match)
2914 if the last match was on a buffer; integers or nil if a string was matched.
2915 Use `set-match-data' to reinstate the data in this list.
2916
2917 If INTEGERS (the optional first argument) is non-nil, always use
2918 integers \(rather than markers) to represent buffer positions.  In
2919 this case, and if the last match was in a buffer, the buffer will get
2920 stored as one additional element at the end of the list.
2921
2922 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2923 enough to hold all the values, and if INTEGERS is non-nil, no consing
2924 is done.
2925
2926 If optional third arg RESEAT is non-nil, any previous markers on the
2927 REUSE list will be modified to point to nowhere.
2928
2929 Return value is undefined if the last search failed.  */)
2930   (integers, reuse, reseat)
2931      Lisp_Object integers, reuse, reseat;
2932 {
2933   Lisp_Object tail, prev;
2934   Lisp_Object *data;
2935   int i, len;
2936
2937   if (!NILP (reseat))
2938     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2939       if (MARKERP (XCAR (tail)))
2940         {
2941           unchain_marker (XMARKER (XCAR (tail)));
2942           XSETCAR (tail, Qnil);
2943         }
2944
2945   if (NILP (last_thing_searched))
2946     return Qnil;
2947
2948   prev = Qnil;
2949
2950   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2951                                  * sizeof (Lisp_Object));
2952
2953   len = 0;
2954   for (i = 0; i < search_regs.num_regs; i++)
2955     {
2956       int start = search_regs.start[i];
2957       if (start >= 0)
2958         {
2959           if (EQ (last_thing_searched, Qt)
2960               || ! NILP (integers))
2961             {
2962               XSETFASTINT (data[2 * i], start);
2963               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2964             }
2965           else if (BUFFERP (last_thing_searched))
2966             {
2967               data[2 * i] = Fmake_marker ();
2968               Fset_marker (data[2 * i],
2969                            make_number (start),
2970                            last_thing_searched);
2971               data[2 * i + 1] = Fmake_marker ();
2972               Fset_marker (data[2 * i + 1],
2973                            make_number (search_regs.end[i]),
2974                            last_thing_searched);
2975             }
2976           else
2977             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2978             abort ();
2979
2980           len = 2 * i + 2;
2981         }
2982       else
2983         data[2 * i] = data[2 * i + 1] = Qnil;
2984     }
2985
2986   if (BUFFERP (last_thing_searched) && !NILP (integers))
2987     {
2988       data[len] = last_thing_searched;
2989       len++;
2990     }
2991
2992   /* If REUSE is not usable, cons up the values and return them.  */
2993   if (! CONSP (reuse))
2994     return Flist (len, data);
2995
2996   /* If REUSE is a list, store as many value elements as will fit
2997      into the elements of REUSE.  */
2998   for (i = 0, tail = reuse; CONSP (tail);
2999        i++, tail = XCDR (tail))
3000     {
3001       if (i < len)
3002         XSETCAR (tail, data[i]);
3003       else
3004         XSETCAR (tail, Qnil);
3005       prev = tail;
3006     }
3007
3008   /* If we couldn't fit all value elements into REUSE,
3009      cons up the rest of them and add them to the end of REUSE.  */
3010   if (i < len)
3011     XSETCDR (prev, Flist (len - i, data + i));
3012
3013   return reuse;
3014 }
3015
3016 /* We used to have an internal use variant of `reseat' described as:
3017
3018       If RESEAT is `evaporate', put the markers back on the free list
3019       immediately.  No other references to the markers must exist in this
3020       case, so it is used only internally on the unwind stack and
3021       save-match-data from Lisp.
3022
3023    But it was ill-conceived: those supposedly-internal markers get exposed via
3024    the undo-list, so freeing them here is unsafe.  */
3025
3026 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3027        doc: /* Set internal data on last search match from elements of LIST.
3028 LIST should have been created by calling `match-data' previously.
3029
3030 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
3031     (list, reseat)
3032      register Lisp_Object list, reseat;
3033 {
3034   register int i;
3035   register Lisp_Object marker;
3036
3037   if (running_asynch_code)
3038     save_search_regs ();
3039
3040   CHECK_LIST (list);
3041
3042   /* Unless we find a marker with a buffer or an explicit buffer
3043      in LIST, assume that this match data came from a string.  */
3044   last_thing_searched = Qt;
3045
3046   /* Allocate registers if they don't already exist.  */
3047   {
3048     int length = XFASTINT (Flength (list)) / 2;
3049
3050     if (length > search_regs.num_regs)
3051       {
3052         if (search_regs.num_regs == 0)
3053           {
3054             search_regs.start
3055               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3056             search_regs.end
3057               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3058           }
3059         else
3060           {
3061             search_regs.start
3062               = (regoff_t *) xrealloc (search_regs.start,
3063                                        length * sizeof (regoff_t));
3064             search_regs.end
3065               = (regoff_t *) xrealloc (search_regs.end,
3066                                        length * sizeof (regoff_t));
3067           }
3068
3069         for (i = search_regs.num_regs; i < length; i++)
3070           search_regs.start[i] = -1;
3071
3072         search_regs.num_regs = length;
3073       }
3074
3075     for (i = 0; CONSP (list); i++)
3076       {
3077         marker = XCAR (list);
3078         if (BUFFERP (marker))
3079           {
3080             last_thing_searched = marker;
3081             break;
3082           }
3083         if (i >= length)
3084           break;
3085         if (NILP (marker))
3086           {
3087             search_regs.start[i] = -1;
3088             list = XCDR (list);
3089           }
3090         else
3091           {
3092             EMACS_INT from;
3093             Lisp_Object m;
3094
3095             m = marker;
3096             if (MARKERP (marker))
3097               {
3098                 if (XMARKER (marker)->buffer == 0)
3099                   XSETFASTINT (marker, 0);
3100                 else
3101                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3102               }
3103
3104             CHECK_NUMBER_COERCE_MARKER (marker);
3105             from = XINT (marker);
3106
3107             if (!NILP (reseat) && MARKERP (m))
3108               {
3109                 unchain_marker (XMARKER (m));
3110                 XSETCAR (list, Qnil);
3111               }
3112
3113             if ((list = XCDR (list), !CONSP (list)))
3114               break;
3115
3116             m = marker = XCAR (list);
3117
3118             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3119               XSETFASTINT (marker, 0);
3120
3121             CHECK_NUMBER_COERCE_MARKER (marker);
3122             search_regs.start[i] = from;
3123             search_regs.end[i] = XINT (marker);
3124
3125             if (!NILP (reseat) && MARKERP (m))
3126               {
3127                 unchain_marker (XMARKER (m));
3128                 XSETCAR (list, Qnil);
3129               }
3130           }
3131         list = XCDR (list);
3132       }
3133
3134     for (; i < search_regs.num_regs; i++)
3135       search_regs.start[i] = -1;
3136   }
3137
3138   return Qnil;
3139 }
3140
3141 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3142    if asynchronous code (filter or sentinel) is running. */
3143 static void
3144 save_search_regs ()
3145 {
3146   if (!search_regs_saved)
3147     {
3148       saved_search_regs.num_regs = search_regs.num_regs;
3149       saved_search_regs.start = search_regs.start;
3150       saved_search_regs.end = search_regs.end;
3151       saved_last_thing_searched = last_thing_searched;
3152       last_thing_searched = Qnil;
3153       search_regs.num_regs = 0;
3154       search_regs.start = 0;
3155       search_regs.end = 0;
3156
3157       search_regs_saved = 1;
3158     }
3159 }
3160
3161 /* Called upon exit from filters and sentinels. */
3162 void
3163 restore_search_regs ()
3164 {
3165   if (search_regs_saved)
3166     {
3167       if (search_regs.num_regs > 0)
3168         {
3169           xfree (search_regs.start);
3170           xfree (search_regs.end);
3171         }
3172       search_regs.num_regs = saved_search_regs.num_regs;
3173       search_regs.start = saved_search_regs.start;
3174       search_regs.end = saved_search_regs.end;
3175       last_thing_searched = saved_last_thing_searched;
3176       saved_last_thing_searched = Qnil;
3177       search_regs_saved = 0;
3178     }
3179 }
3180
3181 static Lisp_Object
3182 unwind_set_match_data (list)
3183      Lisp_Object list;
3184 {
3185   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3186   return Fset_match_data (list, Qt);
3187 }
3188
3189 /* Called to unwind protect the match data.  */
3190 void
3191 record_unwind_save_match_data ()
3192 {
3193   record_unwind_protect (unwind_set_match_data,
3194                          Fmatch_data (Qnil, Qnil, Qnil));
3195 }
3196
3197 /* Quote a string to inactivate reg-expr chars */
3198
3199 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3200        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3201      (string)
3202      Lisp_Object string;
3203 {
3204   register unsigned char *in, *out, *end;
3205   register unsigned char *temp;
3206   int backslashes_added = 0;
3207
3208   CHECK_STRING (string);
3209
3210   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3211
3212   /* Now copy the data into the new string, inserting escapes. */
3213
3214   in = SDATA (string);
3215   end = in + SBYTES (string);
3216   out = temp;
3217
3218   for (; in != end; in++)
3219     {
3220       if (*in == '['
3221           || *in == '*' || *in == '.' || *in == '\\'
3222           || *in == '?' || *in == '+'
3223           || *in == '^' || *in == '$')
3224         *out++ = '\\', backslashes_added++;
3225       *out++ = *in;
3226     }
3227
3228   return make_specified_string (temp,
3229                                 SCHARS (string) + backslashes_added,
3230                                 out - temp,
3231                                 STRING_MULTIBYTE (string));
3232 }
3233 \f
3234 void
3235 syms_of_search ()
3236 {
3237   register int i;
3238
3239   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3240     {
3241       searchbufs[i].buf.allocated = 100;
3242       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3243       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3244       searchbufs[i].regexp = Qnil;
3245       searchbufs[i].whitespace_regexp = Qnil;
3246       searchbufs[i].syntax_table = Qnil;
3247       staticpro (&searchbufs[i].regexp);
3248       staticpro (&searchbufs[i].whitespace_regexp);
3249       staticpro (&searchbufs[i].syntax_table);
3250       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3251     }
3252   searchbuf_head = &searchbufs[0];
3253
3254   Qsearch_failed = intern_c_string ("search-failed");
3255   staticpro (&Qsearch_failed);
3256   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3257   staticpro (&Qinvalid_regexp);
3258
3259   Fput (Qsearch_failed, Qerror_conditions,
3260         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3261   Fput (Qsearch_failed, Qerror_message,
3262         make_pure_c_string ("Search failed"));
3263
3264   Fput (Qinvalid_regexp, Qerror_conditions,
3265         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3266   Fput (Qinvalid_regexp, Qerror_message,
3267         make_pure_c_string ("Invalid regexp"));
3268
3269   last_thing_searched = Qnil;
3270   saved_last_thing_searched = Qnil;
3271
3272   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3273       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3274 Some commands use this for user-specified regexps.
3275 Spaces that occur inside character classes or repetition operators
3276 or other such regexp constructs are not replaced with this.
3277 A value of nil (which is the normal value) means treat spaces literally.  */);
3278   Vsearch_spaces_regexp = Qnil;
3279
3280   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3281       doc: /* Internal use only.
3282 If non-nil, the primitive searching and matching functions
3283 such as `looking-at', `string-match', `re-search-forward', etc.,
3284 do not set the match data.  The proper way to use this variable
3285 is to bind it with `let' around a small expression.  */);
3286   Vinhibit_changing_match_data = Qnil;
3287
3288   defsubr (&Slooking_at);
3289   defsubr (&Sposix_looking_at);
3290   defsubr (&Sstring_match);
3291   defsubr (&Sposix_string_match);
3292   defsubr (&Ssearch_forward);
3293   defsubr (&Ssearch_backward);
3294   defsubr (&Sword_search_forward);
3295   defsubr (&Sword_search_backward);
3296   defsubr (&Sword_search_forward_lax);
3297   defsubr (&Sword_search_backward_lax);
3298   defsubr (&Sre_search_forward);
3299   defsubr (&Sre_search_backward);
3300   defsubr (&Sposix_search_forward);
3301   defsubr (&Sposix_search_backward);
3302   defsubr (&Sreplace_match);
3303   defsubr (&Smatch_beginning);
3304   defsubr (&Smatch_end);
3305   defsubr (&Smatch_data);
3306   defsubr (&Sset_match_data);
3307   defsubr (&Sregexp_quote);
3308 }
3309
3310 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3311    (do not change this comment) */