src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21 Boston, MA 02110-1301, USA.  */
  22
  23
  24 #include <config.h>
  25 #include "lisp.h"
  26 #include "syntax.h"
  27 #include "category.h"
  28 #include "buffer.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 static void set_search_regs ();
  97 static void save_search_regs ();
  98 static int simple_search ();
  99 static int boyer_moore ();
 100 static int search_buffer ();
 101 static void matcher_overflow () NO_RETURN;
 102
 103 static void
 104 matcher_overflow ()
 105 {
 106   error ("Stack overflow in regexp matcher");
 107 }
 108
 109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 110    PATTERN is the pattern to compile.
 111    CP is the place to put the result.
 112    TRANSLATE is a translation table for ignoring case, or nil for none.
 113    REGP is the structure that says where to store the "register"
 114    values that will result from matching this pattern.
 115    If it is 0, we should compile the pattern not to record any
 116    subexpression bounds.
 117    POSIX is nonzero if we want full backtracking (POSIX style)
 118    for this pattern.  0 means backtrack only enough to get a valid match.
 119    MULTIBYTE is nonzero if we want to handle multibyte characters in
 120    PATTERN.  0 means all multibyte characters are recognized just as
 121    sequences of binary data.
 122
 123    The behavior also depends on Vsearch_spaces_regexp.  */
 124
 125 static void
 126 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 127      struct regexp_cache *cp;
 128      Lisp_Object pattern;
 129      Lisp_Object translate;
 130      struct re_registers *regp;
 131      int posix;
 132      int multibyte;
 133 {
 134   unsigned char *raw_pattern;
 135   int raw_pattern_size;
 136   char *val;
 137   reg_syntax_t old;
 138
 139   /* MULTIBYTE says whether the text to be searched is multibyte.
 140      We must convert PATTERN to match that, or we will not really
 141      find things right.  */
 142
 143   if (multibyte == STRING_MULTIBYTE (pattern))
 144     {
 145       raw_pattern = (unsigned char *) SDATA (pattern);
 146       raw_pattern_size = SBYTES (pattern);
 147     }
 148   else if (multibyte)
 149     {
 150       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 151                                                   SCHARS (pattern));
 152       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 153       copy_text (SDATA (pattern), raw_pattern,
 154                  SCHARS (pattern), 0, 1);
 155     }
 156   else
 157     {
 158       /* Converting multibyte to single-byte.
 159
 160          ??? Perhaps this conversion should be done in a special way
 161          by subtracting nonascii-insert-offset from each non-ASCII char,
 162          so that only the multibyte chars which really correspond to
 163          the chosen single-byte character set can possibly match.  */
 164       raw_pattern_size = SCHARS (pattern);
 165       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 166       copy_text (SDATA (pattern), raw_pattern,
 167                  SBYTES (pattern), 1, 0);
 168     }
 169
 170   cp->regexp = Qnil;
 171   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 172   cp->posix = posix;
 173   cp->buf.multibyte = multibyte;
 174   if (STRINGP (Vsearch_spaces_regexp))
 175     cp->whitespace_regexp = Vsearch_spaces_regexp;
 176   else
 177     cp->whitespace_regexp = Qnil;
 178
 179   /* rms: I think BLOCK_INPUT is not needed here any more,
 180      because regex.c defines malloc to call xmalloc.
 181      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 182      So let's turn it off.  */
 183   /*  BLOCK_INPUT;  */
 184   old = re_set_syntax (RE_SYNTAX_EMACS
 185                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 186
 187   if (STRINGP (Vsearch_spaces_regexp))
 188     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 189   else
 190     re_set_whitespace_regexp (NULL);
 191
 192   val = (char *) re_compile_pattern ((char *)raw_pattern,
 193                                      raw_pattern_size, &cp->buf);
 194
 195   /* If the compiled pattern hard codes some of the contents of the
 196      syntax-table, it can only be reused with *this* syntax table.  */
 197   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 198
 199   re_set_whitespace_regexp (NULL);
 200
 201   re_set_syntax (old);
 202   /* UNBLOCK_INPUT;  */
 203   if (val)
 204     xsignal1 (Qinvalid_regexp, build_string (val));
 205
 206   cp->regexp = Fcopy_sequence (pattern);
 207 }
 208
 209 /* Shrink each compiled regexp buffer in the cache
 210    to the size actually used right now.
 211    This is called from garbage collection.  */
 212
 213 void
 214 shrink_regexp_cache ()
 215 {
 216   struct regexp_cache *cp;
 217
 218   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 219     {
 220       cp->buf.allocated = cp->buf.used;
 221       cp->buf.buffer
 222         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 223     }
 224 }
 225
 226 /* Clear the regexp cache w.r.t. a particular syntax table,
 227    because it was changed.
 228    There is no danger of memory leak here because re_compile_pattern
 229    automagically manages the memory in each re_pattern_buffer struct,
 230    based on its `allocated' and `buffer' values.  */
 231 void
 232 clear_regexp_cache ()
 233 {
 234   int i;
 235
 236   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 237     /* It's tempting to compare with the syntax-table we've actually changd,
 238        but it's not sufficient because char-table inheritance mewans that
 239        modifying one syntax-table can change others at the same time.  */
 240     if (!EQ (searchbufs[i].syntax_table, Qt))
 241       searchbufs[i].regexp = Qnil;
 242 }
 243
 244 /* Compile a regexp if necessary, but first check to see if there's one in
 245    the cache.
 246    PATTERN is the pattern to compile.
 247    TRANSLATE is a translation table for ignoring case, or nil for none.
 248    REGP is the structure that says where to store the "register"
 249    values that will result from matching this pattern.
 250    If it is 0, we should compile the pattern not to record any
 251    subexpression bounds.
 252    POSIX is nonzero if we want full backtracking (POSIX style)
 253    for this pattern.  0 means backtrack only enough to get a valid match.  */
 254
 255 struct re_pattern_buffer *
 256 compile_pattern (pattern, regp, translate, posix, multibyte)
 257      Lisp_Object pattern;
 258      struct re_registers *regp;
 259      Lisp_Object translate;
 260      int posix, multibyte;
 261 {
 262   struct regexp_cache *cp, **cpp;
 263
 264   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 265     {
 266       cp = *cpp;
 267       /* Entries are initialized to nil, and may be set to nil by
 268          compile_pattern_1 if the pattern isn't valid.  Don't apply
 269          string accessors in those cases.  However, compile_pattern_1
 270          is only applied to the cache entry we pick here to reuse.  So
 271          nil should never appear before a non-nil entry.  */
 272       if (NILP (cp->regexp))
 273         goto compile_it;
 274       if (SCHARS (cp->regexp) == SCHARS (pattern)
 275           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 276           && !NILP (Fstring_equal (cp->regexp, pattern))
 277           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 278           && cp->posix == posix
 279           && cp->buf.multibyte == multibyte
 280           && (EQ (cp->syntax_table, Qt)
 281               || EQ (cp->syntax_table, current_buffer->syntax_table))
 282           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 283         break;
 284
 285       /* If we're at the end of the cache, compile into the nil cell
 286          we found, or the last (least recently used) cell with a
 287          string value.  */
 288       if (cp->next == 0)
 289         {
 290         compile_it:
 291           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 292           break;
 293         }
 294     }
 295
 296   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 297      either because we found it in the cache or because we just compiled it.
 298      Move it to the front of the queue to mark it as most recently used.  */
 299   *cpp = cp->next;
 300   cp->next = searchbuf_head;
 301   searchbuf_head = cp;
 302
 303   /* Advise the searching functions about the space we have allocated
 304      for register data.  */
 305   if (regp)
 306     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 307
 308   return &cp->buf;
 309 }
 310
 311 \f
 312 static Lisp_Object
 313 looking_at_1 (string, posix)
 314      Lisp_Object string;
 315      int posix;
 316 {
 317   Lisp_Object val;
 318   unsigned char *p1, *p2;
 319   int s1, s2;
 320   register int i;
 321   struct re_pattern_buffer *bufp;
 322
 323   if (running_asynch_code)
 324     save_search_regs ();
 325
 326   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 327   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 328     = current_buffer->case_eqv_table;
 329
 330   CHECK_STRING (string);
 331   bufp = compile_pattern (string, &search_regs,
 332                           (!NILP (current_buffer->case_fold_search)
 333                            ? current_buffer->case_canon_table : Qnil),
 334                           posix,
 335                           !NILP (current_buffer->enable_multibyte_characters));
 336
 337   immediate_quit = 1;
 338   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 339
 340   /* Get pointers and sizes of the two strings
 341      that make up the visible portion of the buffer. */
 342
 343   p1 = BEGV_ADDR;
 344   s1 = GPT_BYTE - BEGV_BYTE;
 345   p2 = GAP_END_ADDR;
 346   s2 = ZV_BYTE - GPT_BYTE;
 347   if (s1 < 0)
 348     {
 349       p2 = p1;
 350       s2 = ZV_BYTE - BEGV_BYTE;
 351       s1 = 0;
 352     }
 353   if (s2 < 0)
 354     {
 355       s1 = ZV_BYTE - BEGV_BYTE;
 356       s2 = 0;
 357     }
 358
 359   re_match_object = Qnil;
 360
 361   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 362                   PT_BYTE - BEGV_BYTE, &search_regs,
 363                   ZV_BYTE - BEGV_BYTE);
 364   immediate_quit = 0;
 365
 366   if (i == -2)
 367     matcher_overflow ();
 368
 369   val = (0 <= i ? Qt : Qnil);
 370   if (i >= 0)
 371     for (i = 0; i < search_regs.num_regs; i++)
 372       if (search_regs.start[i] >= 0)
 373         {
 374           search_regs.start[i]
 375             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 376           search_regs.end[i]
 377             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 378         }
 379   XSETBUFFER (last_thing_searched, current_buffer);
 380   return val;
 381 }
 382
 383 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 384        doc: /* Return t if text after point matches regular expression REGEXP.
 385 This function modifies the match data that `match-beginning',
 386 `match-end' and `match-data' access; save and restore the match
 387 data if you want to preserve them.  */)
 388      (regexp)
 389      Lisp_Object regexp;
 390 {
 391   return looking_at_1 (regexp, 0);
 392 }
 393
 394 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 395        doc: /* Return t if text after point matches regular expression REGEXP.
 396 Find the longest match, in accord with Posix regular expression rules.
 397 This function modifies the match data that `match-beginning',
 398 `match-end' and `match-data' access; save and restore the match
 399 data if you want to preserve them.  */)
 400      (regexp)
 401      Lisp_Object regexp;
 402 {
 403   return looking_at_1 (regexp, 1);
 404 }
 405 \f
 406 static Lisp_Object
 407 string_match_1 (regexp, string, start, posix)
 408      Lisp_Object regexp, string, start;
 409      int posix;
 410 {
 411   int val;
 412   struct re_pattern_buffer *bufp;
 413   int pos, pos_byte;
 414   int i;
 415
 416   if (running_asynch_code)
 417     save_search_regs ();
 418
 419   CHECK_STRING (regexp);
 420   CHECK_STRING (string);
 421
 422   if (NILP (start))
 423     pos = 0, pos_byte = 0;
 424   else
 425     {
 426       int len = SCHARS (string);
 427
 428       CHECK_NUMBER (start);
 429       pos = XINT (start);
 430       if (pos < 0 && -pos <= len)
 431         pos = len + pos;
 432       else if (0 > pos || pos > len)
 433         args_out_of_range (string, start);
 434       pos_byte = string_char_to_byte (string, pos);
 435     }
 436
 437   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 438   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 439     = current_buffer->case_eqv_table;
 440
 441   bufp = compile_pattern (regexp, &search_regs,
 442                           (!NILP (current_buffer->case_fold_search)
 443                            ? current_buffer->case_canon_table : Qnil),
 444                           posix,
 445                           STRING_MULTIBYTE (string));
 446   immediate_quit = 1;
 447   re_match_object = string;
 448
 449   val = re_search (bufp, (char *) SDATA (string),
 450                    SBYTES (string), pos_byte,
 451                    SBYTES (string) - pos_byte,
 452                    &search_regs);
 453   immediate_quit = 0;
 454   last_thing_searched = Qt;
 455   if (val == -2)
 456     matcher_overflow ();
 457   if (val < 0) return Qnil;
 458
 459   for (i = 0; i < search_regs.num_regs; i++)
 460     if (search_regs.start[i] >= 0)
 461       {
 462         search_regs.start[i]
 463           = string_byte_to_char (string, search_regs.start[i]);
 464         search_regs.end[i]
 465           = string_byte_to_char (string, search_regs.end[i]);
 466       }
 467
 468   return make_number (string_byte_to_char (string, val));
 469 }
 470
 471 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 472        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 473 Matching ignores case if `case-fold-search' is non-nil.
 474 If third arg START is non-nil, start search at that index in STRING.
 475 For index of first char beyond the match, do (match-end 0).
 476 `match-end' and `match-beginning' also give indices of substrings
 477 matched by parenthesis constructs in the pattern.
 478
 479 You can use the function `match-string' to extract the substrings
 480 matched by the parenthesis constructions in REGEXP. */)
 481      (regexp, string, start)
 482      Lisp_Object regexp, string, start;
 483 {
 484   return string_match_1 (regexp, string, start, 0);
 485 }
 486
 487 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 488        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 489 Find the longest match, in accord with Posix regular expression rules.
 490 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 491 If third arg START is non-nil, start search at that index in STRING.
 492 For index of first char beyond the match, do (match-end 0).
 493 `match-end' and `match-beginning' also give indices of substrings
 494 matched by parenthesis constructs in the pattern.  */)
 495      (regexp, string, start)
 496      Lisp_Object regexp, string, start;
 497 {
 498   return string_match_1 (regexp, string, start, 1);
 499 }
 500
 501 /* Match REGEXP against STRING, searching all of STRING,
 502    and return the index of the match, or negative on failure.
 503    This does not clobber the match data.  */
 504
 505 int
 506 fast_string_match (regexp, string)
 507      Lisp_Object regexp, string;
 508 {
 509   int val;
 510   struct re_pattern_buffer *bufp;
 511
 512   bufp = compile_pattern (regexp, 0, Qnil,
 513                           0, STRING_MULTIBYTE (string));
 514   immediate_quit = 1;
 515   re_match_object = string;
 516
 517   val = re_search (bufp, (char *) SDATA (string),
 518                    SBYTES (string), 0,
 519                    SBYTES (string), 0);
 520   immediate_quit = 0;
 521   return val;
 522 }
 523
 524 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 525    and return the index of the match, or negative on failure.
 526    This does not clobber the match data.
 527    We assume that STRING contains single-byte characters.  */
 528
 529 extern Lisp_Object Vascii_downcase_table;
 530
 531 int
 532 fast_c_string_match_ignore_case (regexp, string)
 533      Lisp_Object regexp;
 534      const char *string;
 535 {
 536   int val;
 537   struct re_pattern_buffer *bufp;
 538   int len = strlen (string);
 539
 540   regexp = string_make_unibyte (regexp);
 541   re_match_object = Qt;
 542   bufp = compile_pattern (regexp, 0,
 543                           Vascii_canon_table, 0,
 544                           0);
 545   immediate_quit = 1;
 546   val = re_search (bufp, string, len, 0, len, 0);
 547   immediate_quit = 0;
 548   return val;
 549 }
 550
 551 /* Like fast_string_match but ignore case.  */
 552
 553 int
 554 fast_string_match_ignore_case (regexp, string)
 555      Lisp_Object regexp, string;
 556 {
 557   int val;
 558   struct re_pattern_buffer *bufp;
 559
 560   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 561                           0, STRING_MULTIBYTE (string));
 562   immediate_quit = 1;
 563   re_match_object = string;
 564
 565   val = re_search (bufp, (char *) SDATA (string),
 566                    SBYTES (string), 0,
 567                    SBYTES (string), 0);
 568   immediate_quit = 0;
 569   return val;
 570 }
 571 \f
 572 /* The newline cache: remembering which sections of text have no newlines.  */
 573
 574 /* If the user has requested newline caching, make sure it's on.
 575    Otherwise, make sure it's off.
 576    This is our cheezy way of associating an action with the change of
 577    state of a buffer-local variable.  */
 578 static void
 579 newline_cache_on_off (buf)
 580      struct buffer *buf;
 581 {
 582   if (NILP (buf->cache_long_line_scans))
 583     {
 584       /* It should be off.  */
 585       if (buf->newline_cache)
 586         {
 587           free_region_cache (buf->newline_cache);
 588           buf->newline_cache = 0;
 589         }
 590     }
 591   else
 592     {
 593       /* It should be on.  */
 594       if (buf->newline_cache == 0)
 595         buf->newline_cache = new_region_cache ();
 596     }
 597 }
 598
 599 \f
 600 /* Search for COUNT instances of the character TARGET between START and END.
 601
 602    If COUNT is positive, search forwards; END must be >= START.
 603    If COUNT is negative, search backwards for the -COUNTth instance;
 604       END must be <= START.
 605    If COUNT is zero, do anything you please; run rogue, for all I care.
 606
 607    If END is zero, use BEGV or ZV instead, as appropriate for the
 608    direction indicated by COUNT.
 609
 610    If we find COUNT instances, set *SHORTAGE to zero, and return the
 611    position past the COUNTth match.  Note that for reverse motion
 612    this is not the same as the usual convention for Emacs motion commands.
 613
 614    If we don't find COUNT instances before reaching END, set *SHORTAGE
 615    to the number of TARGETs left unfound, and return END.
 616
 617    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 618    except when inside redisplay.  */
 619
 620 int
 621 scan_buffer (target, start, end, count, shortage, allow_quit)
 622      register int target;
 623      int start, end;
 624      int count;
 625      int *shortage;
 626      int allow_quit;
 627 {
 628   struct region_cache *newline_cache;
 629   int direction;
 630
 631   if (count > 0)
 632     {
 633       direction = 1;
 634       if (! end) end = ZV;
 635     }
 636   else
 637     {
 638       direction = -1;
 639       if (! end) end = BEGV;
 640     }
 641
 642   newline_cache_on_off (current_buffer);
 643   newline_cache = current_buffer->newline_cache;
 644
 645   if (shortage != 0)
 646     *shortage = 0;
 647
 648   immediate_quit = allow_quit;
 649
 650   if (count > 0)
 651     while (start != end)
 652       {
 653         /* Our innermost scanning loop is very simple; it doesn't know
 654            about gaps, buffer ends, or the newline cache.  ceiling is
 655            the position of the last character before the next such
 656            obstacle --- the last character the dumb search loop should
 657            examine.  */
 658         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 659         int start_byte = CHAR_TO_BYTE (start);
 660         int tem;
 661
 662         /* If we're looking for a newline, consult the newline cache
 663            to see where we can avoid some scanning.  */
 664         if (target == '\n' && newline_cache)
 665           {
 666             int next_change;
 667             immediate_quit = 0;
 668             while (region_cache_forward
 669                    (current_buffer, newline_cache, start_byte, &next_change))
 670               start_byte = next_change;
 671             immediate_quit = allow_quit;
 672
 673             /* START should never be after END.  */
 674             if (start_byte > ceiling_byte)
 675               start_byte = ceiling_byte;
 676
 677             /* Now the text after start is an unknown region, and
 678                next_change is the position of the next known region. */
 679             ceiling_byte = min (next_change - 1, ceiling_byte);
 680           }
 681
 682         /* The dumb loop can only scan text stored in contiguous
 683            bytes. BUFFER_CEILING_OF returns the last character
 684            position that is contiguous, so the ceiling is the
 685            position after that.  */
 686         tem = BUFFER_CEILING_OF (start_byte);
 687         ceiling_byte = min (tem, ceiling_byte);
 688
 689         {
 690           /* The termination address of the dumb loop.  */
 691           register unsigned char *ceiling_addr
 692             = BYTE_POS_ADDR (ceiling_byte) + 1;
 693           register unsigned char *cursor
 694             = BYTE_POS_ADDR (start_byte);
 695           unsigned char *base = cursor;
 696
 697           while (cursor < ceiling_addr)
 698             {
 699               unsigned char *scan_start = cursor;
 700
 701               /* The dumb loop.  */
 702               while (*cursor != target && ++cursor < ceiling_addr)
 703                 ;
 704
 705               /* If we're looking for newlines, cache the fact that
 706                  the region from start to cursor is free of them. */
 707               if (target == '\n' && newline_cache)
 708                 know_region_cache (current_buffer, newline_cache,
 709                                    start_byte + scan_start - base,
 710                                    start_byte + cursor - base);
 711
 712               /* Did we find the target character?  */
 713               if (cursor < ceiling_addr)
 714                 {
 715                   if (--count == 0)
 716                     {
 717                       immediate_quit = 0;
 718                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 719                     }
 720                   cursor++;
 721                 }
 722             }
 723
 724           start = BYTE_TO_CHAR (start_byte + cursor - base);
 725         }
 726       }
 727   else
 728     while (start > end)
 729       {
 730         /* The last character to check before the next obstacle.  */
 731         int ceiling_byte = CHAR_TO_BYTE (end);
 732         int start_byte = CHAR_TO_BYTE (start);
 733         int tem;
 734
 735         /* Consult the newline cache, if appropriate.  */
 736         if (target == '\n' && newline_cache)
 737           {
 738             int next_change;
 739             immediate_quit = 0;
 740             while (region_cache_backward
 741                    (current_buffer, newline_cache, start_byte, &next_change))
 742               start_byte = next_change;
 743             immediate_quit = allow_quit;
 744
 745             /* Start should never be at or before end.  */
 746             if (start_byte <= ceiling_byte)
 747               start_byte = ceiling_byte + 1;
 748
 749             /* Now the text before start is an unknown region, and
 750                next_change is the position of the next known region. */
 751             ceiling_byte = max (next_change, ceiling_byte);
 752           }
 753
 754         /* Stop scanning before the gap.  */
 755         tem = BUFFER_FLOOR_OF (start_byte - 1);
 756         ceiling_byte = max (tem, ceiling_byte);
 757
 758         {
 759           /* The termination address of the dumb loop.  */
 760           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 761           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 762           unsigned char *base = cursor;
 763
 764           while (cursor >= ceiling_addr)
 765             {
 766               unsigned char *scan_start = cursor;
 767
 768               while (*cursor != target && --cursor >= ceiling_addr)
 769                 ;
 770
 771               /* If we're looking for newlines, cache the fact that
 772                  the region from after the cursor to start is free of them.  */
 773               if (target == '\n' && newline_cache)
 774                 know_region_cache (current_buffer, newline_cache,
 775                                    start_byte + cursor - base,
 776                                    start_byte + scan_start - base);
 777
 778               /* Did we find the target character?  */
 779               if (cursor >= ceiling_addr)
 780                 {
 781                   if (++count >= 0)
 782                     {
 783                       immediate_quit = 0;
 784                       return BYTE_TO_CHAR (start_byte + cursor - base);
 785                     }
 786                   cursor--;
 787                 }
 788             }
 789
 790           start = BYTE_TO_CHAR (start_byte + cursor - base);
 791         }
 792       }
 793
 794   immediate_quit = 0;
 795   if (shortage != 0)
 796     *shortage = count * direction;
 797   return start;
 798 }
 799 \f
 800 /* Search for COUNT instances of a line boundary, which means either a
 801    newline or (if selective display enabled) a carriage return.
 802    Start at START.  If COUNT is negative, search backwards.
 803
 804    We report the resulting position by calling TEMP_SET_PT_BOTH.
 805
 806    If we find COUNT instances. we position after (always after,
 807    even if scanning backwards) the COUNTth match, and return 0.
 808
 809    If we don't find COUNT instances before reaching the end of the
 810    buffer (or the beginning, if scanning backwards), we return
 811    the number of line boundaries left unfound, and position at
 812    the limit we bumped up against.
 813
 814    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 815    except in special cases.  */
 816
 817 int
 818 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 819      int start, start_byte;
 820      int limit, limit_byte;
 821      register int count;
 822      int allow_quit;
 823 {
 824   int direction = ((count > 0) ? 1 : -1);
 825
 826   register unsigned char *cursor;
 827   unsigned char *base;
 828
 829   register int ceiling;
 830   register unsigned char *ceiling_addr;
 831
 832   int old_immediate_quit = immediate_quit;
 833
 834   /* The code that follows is like scan_buffer
 835      but checks for either newline or carriage return.  */
 836
 837   if (allow_quit)
 838     immediate_quit++;
 839
 840   start_byte = CHAR_TO_BYTE (start);
 841
 842   if (count > 0)
 843     {
 844       while (start_byte < limit_byte)
 845         {
 846           ceiling =  BUFFER_CEILING_OF (start_byte);
 847           ceiling = min (limit_byte - 1, ceiling);
 848           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 849           base = (cursor = BYTE_POS_ADDR (start_byte));
 850           while (1)
 851             {
 852               while (*cursor != '\n' && ++cursor != ceiling_addr)
 853                 ;
 854
 855               if (cursor != ceiling_addr)
 856                 {
 857                   if (--count == 0)
 858                     {
 859                       immediate_quit = old_immediate_quit;
 860                       start_byte = start_byte + cursor - base + 1;
 861                       start = BYTE_TO_CHAR (start_byte);
 862                       TEMP_SET_PT_BOTH (start, start_byte);
 863                       return 0;
 864                     }
 865                   else
 866                     if (++cursor == ceiling_addr)
 867                       break;
 868                 }
 869               else
 870                 break;
 871             }
 872           start_byte += cursor - base;
 873         }
 874     }
 875   else
 876     {
 877       while (start_byte > limit_byte)
 878         {
 879           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 880           ceiling = max (limit_byte, ceiling);
 881           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 882           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 883           while (1)
 884             {
 885               while (--cursor != ceiling_addr && *cursor != '\n')
 886                 ;
 887
 888               if (cursor != ceiling_addr)
 889                 {
 890                   if (++count == 0)
 891                     {
 892                       immediate_quit = old_immediate_quit;
 893                       /* Return the position AFTER the match we found.  */
 894                       start_byte = start_byte + cursor - base + 1;
 895                       start = BYTE_TO_CHAR (start_byte);
 896                       TEMP_SET_PT_BOTH (start, start_byte);
 897                       return 0;
 898                     }
 899                 }
 900               else
 901                 break;
 902             }
 903           /* Here we add 1 to compensate for the last decrement
 904              of CURSOR, which took it past the valid range.  */
 905           start_byte += cursor - base + 1;
 906         }
 907     }
 908
 909   TEMP_SET_PT_BOTH (limit, limit_byte);
 910   immediate_quit = old_immediate_quit;
 911
 912   return count * direction;
 913 }
 914
 915 int
 916 find_next_newline_no_quit (from, cnt)
 917      register int from, cnt;
 918 {
 919   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 920 }
 921
 922 /* Like find_next_newline, but returns position before the newline,
 923    not after, and only search up to TO.  This isn't just
 924    find_next_newline (...)-1, because you might hit TO.  */
 925
 926 int
 927 find_before_next_newline (from, to, cnt)
 928      int from, to, cnt;
 929 {
 930   int shortage;
 931   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 932
 933   if (shortage == 0)
 934     pos--;
 935
 936   return pos;
 937 }
 938 \f
 939 /* Subroutines of Lisp buffer search functions. */
 940
 941 static Lisp_Object
 942 search_command (string, bound, noerror, count, direction, RE, posix)
 943      Lisp_Object string, bound, noerror, count;
 944      int direction;
 945      int RE;
 946      int posix;
 947 {
 948   register int np;
 949   int lim, lim_byte;
 950   int n = direction;
 951
 952   if (!NILP (count))
 953     {
 954       CHECK_NUMBER (count);
 955       n *= XINT (count);
 956     }
 957
 958   CHECK_STRING (string);
 959   if (NILP (bound))
 960     {
 961       if (n > 0)
 962         lim = ZV, lim_byte = ZV_BYTE;
 963       else
 964         lim = BEGV, lim_byte = BEGV_BYTE;
 965     }
 966   else
 967     {
 968       CHECK_NUMBER_COERCE_MARKER (bound);
 969       lim = XINT (bound);
 970       if (n > 0 ? lim < PT : lim > PT)
 971         error ("Invalid search bound (wrong side of point)");
 972       if (lim > ZV)
 973         lim = ZV, lim_byte = ZV_BYTE;
 974       else if (lim < BEGV)
 975         lim = BEGV, lim_byte = BEGV_BYTE;
 976       else
 977         lim_byte = CHAR_TO_BYTE (lim);
 978     }
 979
 980   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 981   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 982     = current_buffer->case_eqv_table;
 983
 984   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 985                       (!NILP (current_buffer->case_fold_search)
 986                        ? current_buffer->case_canon_table
 987                        : Qnil),
 988                       (!NILP (current_buffer->case_fold_search)
 989                        ? current_buffer->case_eqv_table
 990                        : Qnil),
 991                       posix);
 992   if (np <= 0)
 993     {
 994       if (NILP (noerror))
 995         xsignal1 (Qsearch_failed, string);
 996
 997       if (!EQ (noerror, Qt))
 998         {
 999           if (lim < BEGV || lim > ZV)
1000             abort ();
1001           SET_PT_BOTH (lim, lim_byte);
1002           return Qnil;
1003 #if 0 /* This would be clean, but maybe programs depend on
1004          a value of nil here.  */
1005           np = lim;
1006 #endif
1007         }
1008       else
1009         return Qnil;
1010     }
1011
1012   if (np < BEGV || np > ZV)
1013     abort ();
1014
1015   SET_PT (np);
1016
1017   return make_number (np);
1018 }
1019 \f
1020 /* Return 1 if REGEXP it matches just one constant string.  */
1021
1022 static int
1023 trivial_regexp_p (regexp)
1024      Lisp_Object regexp;
1025 {
1026   int len = SBYTES (regexp);
1027   unsigned char *s = SDATA (regexp);
1028   while (--len >= 0)
1029     {
1030       switch (*s++)
1031         {
1032         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1033           return 0;
1034         case '\\':
1035           if (--len < 0)
1036             return 0;
1037           switch (*s++)
1038             {
1039             case '|': case '(': case ')': case '`': case '\'': case 'b':
1040             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1041             case 'S': case '=': case '{': case '}': case '_':
1042             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1043             case '1': case '2': case '3': case '4': case '5':
1044             case '6': case '7': case '8': case '9':
1045               return 0;
1046             }
1047         }
1048     }
1049   return 1;
1050 }
1051
1052 /* Search for the n'th occurrence of STRING in the current buffer,
1053    starting at position POS and stopping at position LIM,
1054    treating STRING as a literal string if RE is false or as
1055    a regular expression if RE is true.
1056
1057    If N is positive, searching is forward and LIM must be greater than POS.
1058    If N is negative, searching is backward and LIM must be less than POS.
1059
1060    Returns -x if x occurrences remain to be found (x > 0),
1061    or else the position at the beginning of the Nth occurrence
1062    (if searching backward) or the end (if searching forward).
1063
1064    POSIX is nonzero if we want full backtracking (POSIX style)
1065    for this pattern.  0 means backtrack only enough to get a valid match.  */
1066
1067 #define TRANSLATE(out, trt, d)                  \
1068 do                                              \
1069   {                                             \
1070     if (! NILP (trt))                           \
1071       {                                         \
1072         Lisp_Object temp;                       \
1073         temp = Faref (trt, make_number (d));    \
1074         if (INTEGERP (temp))                    \
1075           out = XINT (temp);                    \
1076         else                                    \
1077           out = d;                              \
1078       }                                         \
1079     else                                        \
1080       out = d;                                  \
1081   }                                             \
1082 while (0)
1083
1084 static int
1085 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1086                RE, trt, inverse_trt, posix)
1087      Lisp_Object string;
1088      int pos;
1089      int pos_byte;
1090      int lim;
1091      int lim_byte;
1092      int n;
1093      int RE;
1094      Lisp_Object trt;
1095      Lisp_Object inverse_trt;
1096      int posix;
1097 {
1098   int len = SCHARS (string);
1099   int len_byte = SBYTES (string);
1100   register int i;
1101
1102   if (running_asynch_code)
1103     save_search_regs ();
1104
1105   /* Searching 0 times means don't move.  */
1106   /* Null string is found at starting position.  */
1107   if (len == 0 || n == 0)
1108     {
1109       set_search_regs (pos_byte, 0);
1110       return pos;
1111     }
1112
1113   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1114     {
1115       unsigned char *p1, *p2;
1116       int s1, s2;
1117       struct re_pattern_buffer *bufp;
1118
1119       bufp = compile_pattern (string, &search_regs, trt, posix,
1120                               !NILP (current_buffer->enable_multibyte_characters));
1121
1122       immediate_quit = 1;       /* Quit immediately if user types ^G,
1123                                    because letting this function finish
1124                                    can take too long. */
1125       QUIT;                     /* Do a pending quit right away,
1126                                    to avoid paradoxical behavior */
1127       /* Get pointers and sizes of the two strings
1128          that make up the visible portion of the buffer. */
1129
1130       p1 = BEGV_ADDR;
1131       s1 = GPT_BYTE - BEGV_BYTE;
1132       p2 = GAP_END_ADDR;
1133       s2 = ZV_BYTE - GPT_BYTE;
1134       if (s1 < 0)
1135         {
1136           p2 = p1;
1137           s2 = ZV_BYTE - BEGV_BYTE;
1138           s1 = 0;
1139         }
1140       if (s2 < 0)
1141         {
1142           s1 = ZV_BYTE - BEGV_BYTE;
1143           s2 = 0;
1144         }
1145       re_match_object = Qnil;
1146
1147       while (n < 0)
1148         {
1149           int val;
1150           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1151                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1152                              &search_regs,
1153                              /* Don't allow match past current point */
1154                              pos_byte - BEGV_BYTE);
1155           if (val == -2)
1156             {
1157               matcher_overflow ();
1158             }
1159           if (val >= 0)
1160             {
1161               pos_byte = search_regs.start[0] + BEGV_BYTE;
1162               for (i = 0; i < search_regs.num_regs; i++)
1163                 if (search_regs.start[i] >= 0)
1164                   {
1165                     search_regs.start[i]
1166                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1167                     search_regs.end[i]
1168                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1169                   }
1170               XSETBUFFER (last_thing_searched, current_buffer);
1171               /* Set pos to the new position. */
1172               pos = search_regs.start[0];
1173             }
1174           else
1175             {
1176               immediate_quit = 0;
1177               return (n);
1178             }
1179           n++;
1180         }
1181       while (n > 0)
1182         {
1183           int val;
1184           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1185                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1186                              &search_regs,
1187                              lim_byte - BEGV_BYTE);
1188           if (val == -2)
1189             {
1190               matcher_overflow ();
1191             }
1192           if (val >= 0)
1193             {
1194               pos_byte = search_regs.end[0] + BEGV_BYTE;
1195               for (i = 0; i < search_regs.num_regs; i++)
1196                 if (search_regs.start[i] >= 0)
1197                   {
1198                     search_regs.start[i]
1199                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1200                     search_regs.end[i]
1201                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1202                   }
1203               XSETBUFFER (last_thing_searched, current_buffer);
1204               pos = search_regs.end[0];
1205             }
1206           else
1207             {
1208               immediate_quit = 0;
1209               return (0 - n);
1210             }
1211           n--;
1212         }
1213       immediate_quit = 0;
1214       return (pos);
1215     }
1216   else                          /* non-RE case */
1217     {
1218       unsigned char *raw_pattern, *pat;
1219       int raw_pattern_size;
1220       int raw_pattern_size_byte;
1221       unsigned char *patbuf;
1222       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1223       unsigned char *base_pat;
1224       /* Set to positive if we find a non-ASCII char that need
1225          translation.  Otherwise set to zero later.  */
1226       int charset_base = -1;
1227       int boyer_moore_ok = 1;
1228
1229       /* MULTIBYTE says whether the text to be searched is multibyte.
1230          We must convert PATTERN to match that, or we will not really
1231          find things right.  */
1232
1233       if (multibyte == STRING_MULTIBYTE (string))
1234         {
1235           raw_pattern = (unsigned char *) SDATA (string);
1236           raw_pattern_size = SCHARS (string);
1237           raw_pattern_size_byte = SBYTES (string);
1238         }
1239       else if (multibyte)
1240         {
1241           raw_pattern_size = SCHARS (string);
1242           raw_pattern_size_byte
1243             = count_size_as_multibyte (SDATA (string),
1244                                        raw_pattern_size);
1245           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1246           copy_text (SDATA (string), raw_pattern,
1247                      SCHARS (string), 0, 1);
1248         }
1249       else
1250         {
1251           /* Converting multibyte to single-byte.
1252
1253              ??? Perhaps this conversion should be done in a special way
1254              by subtracting nonascii-insert-offset from each non-ASCII char,
1255              so that only the multibyte chars which really correspond to
1256              the chosen single-byte character set can possibly match.  */
1257           raw_pattern_size = SCHARS (string);
1258           raw_pattern_size_byte = SCHARS (string);
1259           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1260           copy_text (SDATA (string), raw_pattern,
1261                      SBYTES (string), 1, 0);
1262         }
1263
1264       /* Copy and optionally translate the pattern.  */
1265       len = raw_pattern_size;
1266       len_byte = raw_pattern_size_byte;
1267       patbuf = (unsigned char *) alloca (len_byte);
1268       pat = patbuf;
1269       base_pat = raw_pattern;
1270       if (multibyte)
1271         {
1272           /* Fill patbuf by translated characters in STRING while
1273              checking if we can use boyer-moore search.  If TRT is
1274              non-nil, we can use boyer-moore search only if TRT can be
1275              represented by the byte array of 256 elements.  For that,
1276              all non-ASCII case-equivalents of all case-senstive
1277              characters in STRING must belong to the same charset and
1278              row.  */
1279
1280           while (--len >= 0)
1281             {
1282               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1283               int c, translated, inverse;
1284               int in_charlen, charlen;
1285
1286               /* If we got here and the RE flag is set, it's because we're
1287                  dealing with a regexp known to be trivial, so the backslash
1288                  just quotes the next character.  */
1289               if (RE && *base_pat == '\\')
1290                 {
1291                   len--;
1292                   raw_pattern_size--;
1293                   len_byte--;
1294                   base_pat++;
1295                 }
1296
1297               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1298
1299               if (NILP (trt))
1300                 {
1301                   str = base_pat;
1302                   charlen = in_charlen;
1303                 }
1304               else
1305                 {
1306                   /* Translate the character.  */
1307                   TRANSLATE (translated, trt, c);
1308                   charlen = CHAR_STRING (translated, str_base);
1309                   str = str_base;
1310
1311                   /* Check if C has any other case-equivalents.  */
1312                   TRANSLATE (inverse, inverse_trt, c);
1313                   /* If so, check if we can use boyer-moore.  */
1314                   if (c != inverse && boyer_moore_ok)
1315                     {
1316                       /* Check if all equivalents belong to the same
1317                          charset & row.  Note that the check of C
1318                          itself is done by the last iteration.  Note
1319                          also that we don't have to check ASCII
1320                          characters because boyer-moore search can
1321                          always handle their translation.  */
1322                       while (1)
1323                         {
1324                           if (ASCII_BYTE_P (inverse))
1325                             {
1326                               if (charset_base > 0)
1327                                 {
1328                                   boyer_moore_ok = 0;
1329                                   break;
1330                                 }
1331                               charset_base = 0;
1332                             }
1333                           else if (SINGLE_BYTE_CHAR_P (inverse))
1334                             {
1335                               /* Boyer-moore search can't handle a
1336                                  translation of an eight-bit
1337                                  character.  */
1338                               boyer_moore_ok = 0;
1339                               break;
1340                             }
1341                           else if (charset_base < 0)
1342                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1343                           else if ((inverse & ~CHAR_FIELD3_MASK)
1344                                    != charset_base)
1345                             {
1346                               boyer_moore_ok = 0;
1347                               break;
1348                             }
1349                           if (c == inverse)
1350                             break;
1351                           TRANSLATE (inverse, inverse_trt, inverse);
1352                         }
1353                     }
1354                 }
1355               if (charset_base < 0)
1356                 charset_base = 0;
1357
1358               /* Store this character into the translated pattern.  */
1359               bcopy (str, pat, charlen);
1360               pat += charlen;
1361               base_pat += in_charlen;
1362               len_byte -= in_charlen;
1363             }
1364         }
1365       else
1366         {
1367           /* Unibyte buffer.  */
1368           charset_base = 0;
1369           while (--len >= 0)
1370             {
1371               int c, translated;
1372
1373               /* If we got here and the RE flag is set, it's because we're
1374                  dealing with a regexp known to be trivial, so the backslash
1375                  just quotes the next character.  */
1376               if (RE && *base_pat == '\\')
1377                 {
1378                   len--;
1379                   raw_pattern_size--;
1380                   base_pat++;
1381                 }
1382               c = *base_pat++;
1383               TRANSLATE (translated, trt, c);
1384               *pat++ = translated;
1385             }
1386         }
1387
1388       len_byte = pat - patbuf;
1389       len = raw_pattern_size;
1390       pat = base_pat = patbuf;
1391
1392       if (boyer_moore_ok)
1393         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1394                             pos, pos_byte, lim, lim_byte,
1395                             charset_base);
1396       else
1397         return simple_search (n, pat, len, len_byte, trt,
1398                               pos, pos_byte, lim, lim_byte);
1399     }
1400 }
1401 \f
1402 /* Do a simple string search N times for the string PAT,
1403    whose length is LEN/LEN_BYTE,
1404    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1405    TRT is the translation table.
1406
1407    Return the character position where the match is found.
1408    Otherwise, if M matches remained to be found, return -M.
1409
1410    This kind of search works regardless of what is in PAT and
1411    regardless of what is in TRT.  It is used in cases where
1412    boyer_moore cannot work.  */
1413
1414 static int
1415 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1416      int n;
1417      unsigned char *pat;
1418      int len, len_byte;
1419      Lisp_Object trt;
1420      int pos, pos_byte;
1421      int lim, lim_byte;
1422 {
1423   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1424   int forward = n > 0;
1425
1426   if (lim > pos && multibyte)
1427     while (n > 0)
1428       {
1429         while (1)
1430           {
1431             /* Try matching at position POS.  */
1432             int this_pos = pos;
1433             int this_pos_byte = pos_byte;
1434             int this_len = len;
1435             int this_len_byte = len_byte;
1436             unsigned char *p = pat;
1437             if (pos + len > lim)
1438               goto stop;
1439
1440             while (this_len > 0)
1441               {
1442                 int charlen, buf_charlen;
1443                 int pat_ch, buf_ch;
1444
1445                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1446                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1447                                                  ZV_BYTE - this_pos_byte,
1448                                                  buf_charlen);
1449                 TRANSLATE (buf_ch, trt, buf_ch);
1450
1451                 if (buf_ch != pat_ch)
1452                   break;
1453
1454                 this_len_byte -= charlen;
1455                 this_len--;
1456                 p += charlen;
1457
1458                 this_pos_byte += buf_charlen;
1459                 this_pos++;
1460               }
1461
1462             if (this_len == 0)
1463               {
1464                 pos += len;
1465                 pos_byte += len_byte;
1466                 break;
1467               }
1468
1469             INC_BOTH (pos, pos_byte);
1470           }
1471
1472         n--;
1473       }
1474   else if (lim > pos)
1475     while (n > 0)
1476       {
1477         while (1)
1478           {
1479             /* Try matching at position POS.  */
1480             int this_pos = pos;
1481             int this_len = len;
1482             unsigned char *p = pat;
1483
1484             if (pos + len > lim)
1485               goto stop;
1486
1487             while (this_len > 0)
1488               {
1489                 int pat_ch = *p++;
1490                 int buf_ch = FETCH_BYTE (this_pos);
1491                 TRANSLATE (buf_ch, trt, buf_ch);
1492
1493                 if (buf_ch != pat_ch)
1494                   break;
1495
1496                 this_len--;
1497                 this_pos++;
1498               }
1499
1500             if (this_len == 0)
1501               {
1502                 pos += len;
1503                 break;
1504               }
1505
1506             pos++;
1507           }
1508
1509         n--;
1510       }
1511   /* Backwards search.  */
1512   else if (lim < pos && multibyte)
1513     while (n < 0)
1514       {
1515         while (1)
1516           {
1517             /* Try matching at position POS.  */
1518             int this_pos = pos - len;
1519             int this_pos_byte = pos_byte - len_byte;
1520             int this_len = len;
1521             int this_len_byte = len_byte;
1522             unsigned char *p = pat;
1523
1524             if (this_pos < lim || this_pos_byte < lim_byte)
1525               goto stop;
1526
1527             while (this_len > 0)
1528               {
1529                 int charlen, buf_charlen;
1530                 int pat_ch, buf_ch;
1531
1532                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1533                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1534                                                  ZV_BYTE - this_pos_byte,
1535                                                  buf_charlen);
1536                 TRANSLATE (buf_ch, trt, buf_ch);
1537
1538                 if (buf_ch != pat_ch)
1539                   break;
1540
1541                 this_len_byte -= charlen;
1542                 this_len--;
1543                 p += charlen;
1544                 this_pos_byte += buf_charlen;
1545                 this_pos++;
1546               }
1547
1548             if (this_len == 0)
1549               {
1550                 pos -= len;
1551                 pos_byte -= len_byte;
1552                 break;
1553               }
1554
1555             DEC_BOTH (pos, pos_byte);
1556           }
1557
1558         n++;
1559       }
1560   else if (lim < pos)
1561     while (n < 0)
1562       {
1563         while (1)
1564           {
1565             /* Try matching at position POS.  */
1566             int this_pos = pos - len;
1567             int this_len = len;
1568             unsigned char *p = pat;
1569
1570             if (pos - len < lim)
1571               goto stop;
1572
1573             while (this_len > 0)
1574               {
1575                 int pat_ch = *p++;
1576                 int buf_ch = FETCH_BYTE (this_pos);
1577                 TRANSLATE (buf_ch, trt, buf_ch);
1578
1579                 if (buf_ch != pat_ch)
1580                   break;
1581                 this_len--;
1582                 this_pos++;
1583               }
1584
1585             if (this_len == 0)
1586               {
1587                 pos -= len;
1588                 break;
1589               }
1590
1591             pos--;
1592           }
1593
1594         n++;
1595       }
1596
1597  stop:
1598   if (n == 0)
1599     {
1600       if (forward)
1601         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1602       else
1603         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1604
1605       return pos;
1606     }
1607   else if (n > 0)
1608     return -n;
1609   else
1610     return n;
1611 }
1612 \f
1613 /* Do Boyer-Moore search N times for the string BASE_PAT,
1614    whose length is LEN/LEN_BYTE,
1615    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1616    DIRECTION says which direction we search in.
1617    TRT and INVERSE_TRT are translation tables.
1618    Characters in PAT are already translated by TRT.
1619
1620    This kind of search works if all the characters in BASE_PAT that
1621    have nontrivial translation are the same aside from the last byte.
1622    This makes it possible to translate just the last byte of a
1623    character, and do so after just a simple test of the context.
1624    CHARSET_BASE is nonzero if there is such a non-ASCII character.
1625
1626    If that criterion is not satisfied, do not call this function.  */
1627
1628 static int
1629 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1630              pos, pos_byte, lim, lim_byte, charset_base)
1631      int n;
1632      unsigned char *base_pat;
1633      int len, len_byte;
1634      Lisp_Object trt;
1635      Lisp_Object inverse_trt;
1636      int pos, pos_byte;
1637      int lim, lim_byte;
1638      int charset_base;
1639 {
1640   int direction = ((n > 0) ? 1 : -1);
1641   register int dirlen;
1642   int infinity, limit, stride_for_teases = 0;
1643   register int *BM_tab;
1644   int *BM_tab_base;
1645   register unsigned char *cursor, *p_limit;
1646   register int i, j;
1647   unsigned char *pat, *pat_end;
1648   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1649
1650   unsigned char simple_translate[0400];
1651   /* These are set to the preceding bytes of a byte to be translated
1652      if charset_base is nonzero.  As the maximum byte length of a
1653      multibyte character is 4, we have to check at most three previous
1654      bytes.  */
1655   int translate_prev_byte1 = 0;
1656   int translate_prev_byte2 = 0;
1657   int translate_prev_byte3 = 0;
1658
1659 #ifdef C_ALLOCA
1660   int BM_tab_space[0400];
1661   BM_tab = &BM_tab_space[0];
1662 #else
1663   BM_tab = (int *) alloca (0400 * sizeof (int));
1664 #endif
1665   /* The general approach is that we are going to maintain that we know */
1666   /* the first (closest to the present position, in whatever direction */
1667   /* we're searching) character that could possibly be the last */
1668   /* (furthest from present position) character of a valid match.  We */
1669   /* advance the state of our knowledge by looking at that character */
1670   /* and seeing whether it indeed matches the last character of the */
1671   /* pattern.  If it does, we take a closer look.  If it does not, we */
1672   /* move our pointer (to putative last characters) as far as is */
1673   /* logically possible.  This amount of movement, which I call a */
1674   /* stride, will be the length of the pattern if the actual character */
1675   /* appears nowhere in the pattern, otherwise it will be the distance */
1676   /* from the last occurrence of that character to the end of the */
1677   /* pattern. */
1678   /* As a coding trick, an enormous stride is coded into the table for */
1679   /* characters that match the last character.  This allows use of only */
1680   /* a single test, a test for having gone past the end of the */
1681   /* permissible match region, to test for both possible matches (when */
1682   /* the stride goes past the end immediately) and failure to */
1683   /* match (where you get nudged past the end one stride at a time). */
1684
1685   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1686   /* is determined only by the last character of the putative match. */
1687   /* If that character does not match, we will stride the proper */
1688   /* distance to propose a match that superimposes it on the last */
1689   /* instance of a character that matches it (per trt), or misses */
1690   /* it entirely if there is none. */
1691
1692   dirlen = len_byte * direction;
1693   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1694
1695   /* Record position after the end of the pattern.  */
1696   pat_end = base_pat + len_byte;
1697   /* BASE_PAT points to a character that we start scanning from.
1698      It is the first character in a forward search,
1699      the last character in a backward search.  */
1700   if (direction < 0)
1701     base_pat = pat_end - 1;
1702
1703   BM_tab_base = BM_tab;
1704   BM_tab += 0400;
1705   j = dirlen;           /* to get it in a register */
1706   /* A character that does not appear in the pattern induces a */
1707   /* stride equal to the pattern length. */
1708   while (BM_tab_base != BM_tab)
1709     {
1710       *--BM_tab = j;
1711       *--BM_tab = j;
1712       *--BM_tab = j;
1713       *--BM_tab = j;
1714     }
1715
1716   /* We use this for translation, instead of TRT itself.
1717      We fill this in to handle the characters that actually
1718      occur in the pattern.  Others don't matter anyway!  */
1719   bzero (simple_translate, sizeof simple_translate);
1720   for (i = 0; i < 0400; i++)
1721     simple_translate[i] = i;
1722
1723   if (charset_base)
1724     {
1725       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1726          byte following them are the target of translation.  */
1727       int sample_char = charset_base | 0x20;
1728       unsigned char str[MAX_MULTIBYTE_LENGTH];
1729       int len = CHAR_STRING (sample_char, str);
1730
1731       translate_prev_byte1 = str[len - 2];
1732       if (len > 2)
1733         {
1734           translate_prev_byte2 = str[len - 3];
1735           if (len > 3)
1736             translate_prev_byte3 = str[len - 4];
1737         }
1738     }
1739
1740   i = 0;
1741   while (i != infinity)
1742     {
1743       unsigned char *ptr = base_pat + i;
1744       i += direction;
1745       if (i == dirlen)
1746         i = infinity;
1747       if (! NILP (trt))
1748         {
1749           /* If the byte currently looking at is the last of a
1750              character to check case-equivalents, set CH to that
1751              character.  An ASCII character and a non-ASCII character
1752              matching with CHARSET_BASE are to be checked.  */
1753           int ch = -1;
1754
1755           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1756             ch = *ptr;
1757           else if (charset_base
1758                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1759             {
1760               unsigned char *charstart = ptr - 1;
1761
1762               while (! (CHAR_HEAD_P (*charstart)))
1763                 charstart--;
1764               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1765               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1766                 ch = -1;
1767             }
1768
1769           if (ch >= 0400)
1770             j = ((unsigned char) ch) | 0200;
1771           else
1772             j = *ptr;
1773
1774           if (i == infinity)
1775             stride_for_teases = BM_tab[j];
1776
1777           BM_tab[j] = dirlen - i;
1778           /* A translation table is accompanied by its inverse -- see */
1779           /* comment following downcase_table for details */
1780           if (ch >= 0)
1781             {
1782               int starting_ch = ch;
1783               int starting_j = j;
1784
1785               while (1)
1786                 {
1787                   TRANSLATE (ch, inverse_trt, ch);
1788                   if (ch >= 0400)
1789                     j = ((unsigned char) ch) | 0200;
1790                   else
1791                     j = (unsigned char) ch;
1792
1793                   /* For all the characters that map into CH,
1794                      set up simple_translate to map the last byte
1795                      into STARTING_J.  */
1796                   simple_translate[j] = starting_j;
1797                   if (ch == starting_ch)
1798                     break;
1799                   BM_tab[j] = dirlen - i;
1800                 }
1801             }
1802         }
1803       else
1804         {
1805           j = *ptr;
1806
1807           if (i == infinity)
1808             stride_for_teases = BM_tab[j];
1809           BM_tab[j] = dirlen - i;
1810         }
1811       /* stride_for_teases tells how much to stride if we get a */
1812       /* match on the far character but are subsequently */
1813       /* disappointed, by recording what the stride would have been */
1814       /* for that character if the last character had been */
1815       /* different. */
1816     }
1817   infinity = dirlen - infinity;
1818   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1819   /* loop invariant - POS_BYTE points at where last char (first
1820      char if reverse) of pattern would align in a possible match.  */
1821   while (n != 0)
1822     {
1823       int tail_end;
1824       unsigned char *tail_end_ptr;
1825
1826       /* It's been reported that some (broken) compiler thinks that
1827          Boolean expressions in an arithmetic context are unsigned.
1828          Using an explicit ?1:0 prevents this.  */
1829       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1830           < 0)
1831         return (n * (0 - direction));
1832       /* First we do the part we can by pointers (maybe nothing) */
1833       QUIT;
1834       pat = base_pat;
1835       limit = pos_byte - dirlen + direction;
1836       if (direction > 0)
1837         {
1838           limit = BUFFER_CEILING_OF (limit);
1839           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1840              can take on without hitting edge of buffer or the gap.  */
1841           limit = min (limit, pos_byte + 20000);
1842           limit = min (limit, lim_byte - 1);
1843         }
1844       else
1845         {
1846           limit = BUFFER_FLOOR_OF (limit);
1847           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1848              can take on without hitting edge of buffer or the gap.  */
1849           limit = max (limit, pos_byte - 20000);
1850           limit = max (limit, lim_byte);
1851         }
1852       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1853       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1854
1855       if ((limit - pos_byte) * direction > 20)
1856         {
1857           unsigned char *p2;
1858
1859           p_limit = BYTE_POS_ADDR (limit);
1860           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1861           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1862           while (1)             /* use one cursor setting as long as i can */
1863             {
1864               if (direction > 0) /* worth duplicating */
1865                 {
1866                   /* Use signed comparison if appropriate
1867                      to make cursor+infinity sure to be > p_limit.
1868                      Assuming that the buffer lies in a range of addresses
1869                      that are all "positive" (as ints) or all "negative",
1870                      either kind of comparison will work as long
1871                      as we don't step by infinity.  So pick the kind
1872                      that works when we do step by infinity.  */
1873                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1874                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1875                       cursor += BM_tab[*cursor];
1876                   else
1877                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1878                       cursor += BM_tab[*cursor];
1879                 }
1880               else
1881                 {
1882                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1883                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1884                       cursor += BM_tab[*cursor];
1885                   else
1886                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1887                       cursor += BM_tab[*cursor];
1888                 }
1889 /* If you are here, cursor is beyond the end of the searched region. */
1890 /* This can happen if you match on the far character of the pattern, */
1891 /* because the "stride" of that character is infinity, a number able */
1892 /* to throw you well beyond the end of the search.  It can also */
1893 /* happen if you fail to match within the permitted region and would */
1894 /* otherwise try a character beyond that region */
1895               if ((cursor - p_limit) * direction <= len_byte)
1896                 break;  /* a small overrun is genuine */
1897               cursor -= infinity; /* large overrun = hit */
1898               i = dirlen - direction;
1899               if (! NILP (trt))
1900                 {
1901                   while ((i -= direction) + direction != 0)
1902                     {
1903                       int ch;
1904                       cursor -= direction;
1905                       /* Translate only the last byte of a character.  */
1906                       if (! multibyte
1907                           || ((cursor == tail_end_ptr
1908                                || CHAR_HEAD_P (cursor[1]))
1909                               && (CHAR_HEAD_P (cursor[0])
1910                                   /* Check if this is the last byte of
1911                                      a translable character.  */
1912                                   || (translate_prev_byte1 == cursor[-1]
1913                                       && (CHAR_HEAD_P (translate_prev_byte1)
1914                                           || (translate_prev_byte2 == cursor[-2]
1915                                               && (CHAR_HEAD_P (translate_prev_byte2)
1916                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1917                         ch = simple_translate[*cursor];
1918                       else
1919                         ch = *cursor;
1920                       if (pat[i] != ch)
1921                         break;
1922                     }
1923                 }
1924               else
1925                 {
1926                   while ((i -= direction) + direction != 0)
1927                     {
1928                       cursor -= direction;
1929                       if (pat[i] != *cursor)
1930                         break;
1931                     }
1932                 }
1933               cursor += dirlen - i - direction; /* fix cursor */
1934               if (i + direction == 0)
1935                 {
1936                   int position;
1937
1938                   cursor -= direction;
1939
1940                   position = pos_byte + cursor - p2 + ((direction > 0)
1941                                                        ? 1 - len_byte : 0);
1942                   set_search_regs (position, len_byte);
1943
1944                   if ((n -= direction) != 0)
1945                     cursor += dirlen; /* to resume search */
1946                   else
1947                     return ((direction > 0)
1948                             ? search_regs.end[0] : search_regs.start[0]);
1949                 }
1950               else
1951                 cursor += stride_for_teases; /* <sigh> we lose -  */
1952             }
1953           pos_byte += cursor - p2;
1954         }
1955       else
1956         /* Now we'll pick up a clump that has to be done the hard */
1957         /* way because it covers a discontinuity */
1958         {
1959           limit = ((direction > 0)
1960                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1961                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1962           limit = ((direction > 0)
1963                    ? min (limit + len_byte, lim_byte - 1)
1964                    : max (limit - len_byte, lim_byte));
1965           /* LIMIT is now the last value POS_BYTE can have
1966              and still be valid for a possible match.  */
1967           while (1)
1968             {
1969               /* This loop can be coded for space rather than */
1970               /* speed because it will usually run only once. */
1971               /* (the reach is at most len + 21, and typically */
1972               /* does not exceed len) */
1973               while ((limit - pos_byte) * direction >= 0)
1974                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1975               /* now run the same tests to distinguish going off the */
1976               /* end, a match or a phony match. */
1977               if ((pos_byte - limit) * direction <= len_byte)
1978                 break;  /* ran off the end */
1979               /* Found what might be a match.
1980                  Set POS_BYTE back to last (first if reverse) pos.  */
1981               pos_byte -= infinity;
1982               i = dirlen - direction;
1983               while ((i -= direction) + direction != 0)
1984                 {
1985                   int ch;
1986                   unsigned char *ptr;
1987                   pos_byte -= direction;
1988                   ptr = BYTE_POS_ADDR (pos_byte);
1989                   /* Translate only the last byte of a character.  */
1990                   if (! multibyte
1991                       || ((ptr == tail_end_ptr
1992                            || CHAR_HEAD_P (ptr[1]))
1993                           && (CHAR_HEAD_P (ptr[0])
1994                               /* Check if this is the last byte of a
1995                                  translable character.  */
1996                               || (translate_prev_byte1 == ptr[-1]
1997                                   && (CHAR_HEAD_P (translate_prev_byte1)
1998                                       || (translate_prev_byte2 == ptr[-2]
1999                                           && (CHAR_HEAD_P (translate_prev_byte2)
2000                                               || translate_prev_byte3 == ptr[-3])))))))
2001                     ch = simple_translate[*ptr];
2002                   else
2003                     ch = *ptr;
2004                   if (pat[i] != ch)
2005                     break;
2006                 }
2007               /* Above loop has moved POS_BYTE part or all the way
2008                  back to the first pos (last pos if reverse).
2009                  Set it once again at the last (first if reverse) char.  */
2010               pos_byte += dirlen - i- direction;
2011               if (i + direction == 0)
2012                 {
2013                   int position;
2014                   pos_byte -= direction;
2015
2016                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2017
2018                   set_search_regs (position, len_byte);
2019
2020                   if ((n -= direction) != 0)
2021                     pos_byte += dirlen; /* to resume search */
2022                   else
2023                     return ((direction > 0)
2024                             ? search_regs.end[0] : search_regs.start[0]);
2025                 }
2026               else
2027                 pos_byte += stride_for_teases;
2028             }
2029           }
2030       /* We have done one clump.  Can we continue? */
2031       if ((lim_byte - pos_byte) * direction < 0)
2032         return ((0 - n) * direction);
2033     }
2034   return BYTE_TO_CHAR (pos_byte);
2035 }
2036
2037 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2038    for the overall match just found in the current buffer.
2039    Also clear out the match data for registers 1 and up.  */
2040
2041 static void
2042 set_search_regs (beg_byte, nbytes)
2043      int beg_byte, nbytes;
2044 {
2045   int i;
2046
2047   /* Make sure we have registers in which to store
2048      the match position.  */
2049   if (search_regs.num_regs == 0)
2050     {
2051       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2052       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2053       search_regs.num_regs = 2;
2054     }
2055
2056   /* Clear out the other registers.  */
2057   for (i = 1; i < search_regs.num_regs; i++)
2058     {
2059       search_regs.start[i] = -1;
2060       search_regs.end[i] = -1;
2061     }
2062
2063   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2064   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2065   XSETBUFFER (last_thing_searched, current_buffer);
2066 }
2067 \f
2068 /* Given a string of words separated by word delimiters,
2069   compute a regexp that matches those exact words
2070   separated by arbitrary punctuation.  */
2071
2072 static Lisp_Object
2073 wordify (string)
2074      Lisp_Object string;
2075 {
2076   register unsigned char *p, *o;
2077   register int i, i_byte, len, punct_count = 0, word_count = 0;
2078   Lisp_Object val;
2079   int prev_c = 0;
2080   int adjust;
2081
2082   CHECK_STRING (string);
2083   p = SDATA (string);
2084   len = SCHARS (string);
2085
2086   for (i = 0, i_byte = 0; i < len; )
2087     {
2088       int c;
2089
2090       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2091
2092       if (SYNTAX (c) != Sword)
2093         {
2094           punct_count++;
2095           if (i > 0 && SYNTAX (prev_c) == Sword)
2096             word_count++;
2097         }
2098
2099       prev_c = c;
2100     }
2101
2102   if (SYNTAX (prev_c) == Sword)
2103     word_count++;
2104   if (!word_count)
2105     return empty_string;
2106
2107   adjust = - punct_count + 5 * (word_count - 1) + 4;
2108   if (STRING_MULTIBYTE (string))
2109     val = make_uninit_multibyte_string (len + adjust,
2110                                         SBYTES (string)
2111                                         + adjust);
2112   else
2113     val = make_uninit_string (len + adjust);
2114
2115   o = SDATA (val);
2116   *o++ = '\\';
2117   *o++ = 'b';
2118   prev_c = 0;
2119
2120   for (i = 0, i_byte = 0; i < len; )
2121     {
2122       int c;
2123       int i_byte_orig = i_byte;
2124
2125       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2126
2127       if (SYNTAX (c) == Sword)
2128         {
2129           bcopy (SDATA (string) + i_byte_orig, o,
2130                  i_byte - i_byte_orig);
2131           o += i_byte - i_byte_orig;
2132         }
2133       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2134         {
2135           *o++ = '\\';
2136           *o++ = 'W';
2137           *o++ = '\\';
2138           *o++ = 'W';
2139           *o++ = '*';
2140         }
2141
2142       prev_c = c;
2143     }
2144
2145   *o++ = '\\';
2146   *o++ = 'b';
2147
2148   return val;
2149 }
2150 \f
2151 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2152        "MSearch backward: ",
2153        doc: /* Search backward from point for STRING.
2154 Set point to the beginning of the occurrence found, and return point.
2155 An optional second argument bounds the search; it is a buffer position.
2156 The match found must not extend before that position.
2157 Optional third argument, if t, means if fail just return nil (no error).
2158  If not nil and not t, position at limit of search and return nil.
2159 Optional fourth argument is repeat count--search for successive occurrences.
2160
2161 Search case-sensitivity is determined by the value of the variable
2162 `case-fold-search', which see.
2163
2164 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2165      (string, bound, noerror, count)
2166      Lisp_Object string, bound, noerror, count;
2167 {
2168   return search_command (string, bound, noerror, count, -1, 0, 0);
2169 }
2170
2171 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2172        doc: /* Search forward from point for STRING.
2173 Set point to the end of the occurrence found, and return point.
2174 An optional second argument bounds the search; it is a buffer position.
2175 The match found must not extend after that position.  A value of nil is
2176   equivalent to (point-max).
2177 Optional third argument, if t, means if fail just return nil (no error).
2178   If not nil and not t, move to limit of search and return nil.
2179 Optional fourth argument is repeat count--search for successive occurrences.
2180
2181 Search case-sensitivity is determined by the value of the variable
2182 `case-fold-search', which see.
2183
2184 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2185      (string, bound, noerror, count)
2186      Lisp_Object string, bound, noerror, count;
2187 {
2188   return search_command (string, bound, noerror, count, 1, 0, 0);
2189 }
2190
2191 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2192        "sWord search backward: ",
2193        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2194 Set point to the beginning of the occurrence found, and return point.
2195 An optional second argument bounds the search; it is a buffer position.
2196 The match found must not extend before that position.
2197 Optional third argument, if t, means if fail just return nil (no error).
2198   If not nil and not t, move to limit of search and return nil.
2199 Optional fourth argument is repeat count--search for successive occurrences.  */)
2200      (string, bound, noerror, count)
2201      Lisp_Object string, bound, noerror, count;
2202 {
2203   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2204 }
2205
2206 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2207        "sWord search: ",
2208        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2209 Set point to the end of the occurrence found, and return point.
2210 An optional second argument bounds the search; it is a buffer position.
2211 The match found must not extend after that position.
2212 Optional third argument, if t, means if fail just return nil (no error).
2213   If not nil and not t, move to limit of search and return nil.
2214 Optional fourth argument is repeat count--search for successive occurrences.  */)
2215      (string, bound, noerror, count)
2216      Lisp_Object string, bound, noerror, count;
2217 {
2218   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2219 }
2220
2221 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2222        "sRE search backward: ",
2223        doc: /* Search backward from point for match for regular expression REGEXP.
2224 Set point to the beginning of the match, and return point.
2225 The match found is the one starting last in the buffer
2226 and yet ending before the origin of the search.
2227 An optional second argument bounds the search; it is a buffer position.
2228 The match found must start at or after that position.
2229 Optional third argument, if t, means if fail just return nil (no error).
2230   If not nil and not t, move to limit of search and return nil.
2231 Optional fourth argument is repeat count--search for successive occurrences.
2232 See also the functions `match-beginning', `match-end', `match-string',
2233 and `replace-match'.  */)
2234      (regexp, bound, noerror, count)
2235      Lisp_Object regexp, bound, noerror, count;
2236 {
2237   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2238 }
2239
2240 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2241        "sRE search: ",
2242        doc: /* Search forward from point for regular expression REGEXP.
2243 Set point to the end of the occurrence found, and return point.
2244 An optional second argument bounds the search; it is a buffer position.
2245 The match found must not extend after that position.
2246 Optional third argument, if t, means if fail just return nil (no error).
2247   If not nil and not t, move to limit of search and return nil.
2248 Optional fourth argument is repeat count--search for successive occurrences.
2249 See also the functions `match-beginning', `match-end', `match-string',
2250 and `replace-match'.  */)
2251      (regexp, bound, noerror, count)
2252      Lisp_Object regexp, bound, noerror, count;
2253 {
2254   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2255 }
2256
2257 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2258        "sPosix search backward: ",
2259        doc: /* Search backward from point for match for regular expression REGEXP.
2260 Find the longest match in accord with Posix regular expression rules.
2261 Set point to the beginning of the match, and return point.
2262 The match found is the one starting last in the buffer
2263 and yet ending before the origin of the search.
2264 An optional second argument bounds the search; it is a buffer position.
2265 The match found must start at or after that position.
2266 Optional third argument, if t, means if fail just return nil (no error).
2267   If not nil and not t, move to limit of search and return nil.
2268 Optional fourth argument is repeat count--search for successive occurrences.
2269 See also the functions `match-beginning', `match-end', `match-string',
2270 and `replace-match'.  */)
2271      (regexp, bound, noerror, count)
2272      Lisp_Object regexp, bound, noerror, count;
2273 {
2274   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2275 }
2276
2277 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2278        "sPosix search: ",
2279        doc: /* Search forward from point for regular expression REGEXP.
2280 Find the longest match in accord with Posix regular expression rules.
2281 Set point to the end of the occurrence found, and return point.
2282 An optional second argument bounds the search; it is a buffer position.
2283 The match found must not extend after that position.
2284 Optional third argument, if t, means if fail just return nil (no error).
2285   If not nil and not t, move to limit of search and return nil.
2286 Optional fourth argument is repeat count--search for successive occurrences.
2287 See also the functions `match-beginning', `match-end', `match-string',
2288 and `replace-match'.  */)
2289      (regexp, bound, noerror, count)
2290      Lisp_Object regexp, bound, noerror, count;
2291 {
2292   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2293 }
2294 \f
2295 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2296        doc: /* Replace text matched by last search with NEWTEXT.
2297 Leave point at the end of the replacement text.
2298
2299 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2300 Otherwise maybe capitalize the whole text, or maybe just word initials,
2301 based on the replaced text.
2302 If the replaced text has only capital letters
2303 and has at least one multiletter word, convert NEWTEXT to all caps.
2304 Otherwise if all words are capitalized in the replaced text,
2305 capitalize each word in NEWTEXT.
2306
2307 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2308 Otherwise treat `\\' as special:
2309   `\\&' in NEWTEXT means substitute original matched text.
2310   `\\N' means substitute what matched the Nth `\\(...\\)'.
2311        If Nth parens didn't match, substitute nothing.
2312   `\\\\' means insert one `\\'.
2313 Case conversion does not apply to these substitutions.
2314
2315 FIXEDCASE and LITERAL are optional arguments.
2316
2317 The optional fourth argument STRING can be a string to modify.
2318 This is meaningful when the previous match was done against STRING,
2319 using `string-match'.  When used this way, `replace-match'
2320 creates and returns a new string made by copying STRING and replacing
2321 the part of STRING that was matched.
2322
2323 The optional fifth argument SUBEXP specifies a subexpression;
2324 it says to replace just that subexpression with NEWTEXT,
2325 rather than replacing the entire matched text.
2326 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2327 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2328 NEWTEXT in place of subexp N.
2329 This is useful only after a regular expression search or match,
2330 since only regular expressions have distinguished subexpressions.  */)
2331      (newtext, fixedcase, literal, string, subexp)
2332      Lisp_Object newtext, fixedcase, literal, string, subexp;
2333 {
2334   enum { nochange, all_caps, cap_initial } case_action;
2335   register int pos, pos_byte;
2336   int some_multiletter_word;
2337   int some_lowercase;
2338   int some_uppercase;
2339   int some_nonuppercase_initial;
2340   register int c, prevc;
2341   int sub;
2342   int opoint, newpoint;
2343
2344   CHECK_STRING (newtext);
2345
2346   if (! NILP (string))
2347     CHECK_STRING (string);
2348
2349   case_action = nochange;       /* We tried an initialization */
2350                                 /* but some C compilers blew it */
2351
2352   if (search_regs.num_regs <= 0)
2353     error ("`replace-match' called before any match found");
2354
2355   if (NILP (subexp))
2356     sub = 0;
2357   else
2358     {
2359       CHECK_NUMBER (subexp);
2360       sub = XINT (subexp);
2361       if (sub < 0 || sub >= search_regs.num_regs)
2362         args_out_of_range (subexp, make_number (search_regs.num_regs));
2363     }
2364
2365   if (NILP (string))
2366     {
2367       if (search_regs.start[sub] < BEGV
2368           || search_regs.start[sub] > search_regs.end[sub]
2369           || search_regs.end[sub] > ZV)
2370         args_out_of_range (make_number (search_regs.start[sub]),
2371                            make_number (search_regs.end[sub]));
2372     }
2373   else
2374     {
2375       if (search_regs.start[sub] < 0
2376           || search_regs.start[sub] > search_regs.end[sub]
2377           || search_regs.end[sub] > SCHARS (string))
2378         args_out_of_range (make_number (search_regs.start[sub]),
2379                            make_number (search_regs.end[sub]));
2380     }
2381
2382   if (NILP (fixedcase))
2383     {
2384       /* Decide how to casify by examining the matched text. */
2385       int last;
2386
2387       pos = search_regs.start[sub];
2388       last = search_regs.end[sub];
2389
2390       if (NILP (string))
2391         pos_byte = CHAR_TO_BYTE (pos);
2392       else
2393         pos_byte = string_char_to_byte (string, pos);
2394
2395       prevc = '\n';
2396       case_action = all_caps;
2397
2398       /* some_multiletter_word is set nonzero if any original word
2399          is more than one letter long. */
2400       some_multiletter_word = 0;
2401       some_lowercase = 0;
2402       some_nonuppercase_initial = 0;
2403       some_uppercase = 0;
2404
2405       while (pos < last)
2406         {
2407           if (NILP (string))
2408             {
2409               c = FETCH_CHAR (pos_byte);
2410               INC_BOTH (pos, pos_byte);
2411             }
2412           else
2413             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2414
2415           if (LOWERCASEP (c))
2416             {
2417               /* Cannot be all caps if any original char is lower case */
2418
2419               some_lowercase = 1;
2420               if (SYNTAX (prevc) != Sword)
2421                 some_nonuppercase_initial = 1;
2422               else
2423                 some_multiletter_word = 1;
2424             }
2425           else if (UPPERCASEP (c))
2426             {
2427               some_uppercase = 1;
2428               if (SYNTAX (prevc) != Sword)
2429                 ;
2430               else
2431                 some_multiletter_word = 1;
2432             }
2433           else
2434             {
2435               /* If the initial is a caseless word constituent,
2436                  treat that like a lowercase initial.  */
2437               if (SYNTAX (prevc) != Sword)
2438                 some_nonuppercase_initial = 1;
2439             }
2440
2441           prevc = c;
2442         }
2443
2444       /* Convert to all caps if the old text is all caps
2445          and has at least one multiletter word.  */
2446       if (! some_lowercase && some_multiletter_word)
2447         case_action = all_caps;
2448       /* Capitalize each word, if the old text has all capitalized words.  */
2449       else if (!some_nonuppercase_initial && some_multiletter_word)
2450         case_action = cap_initial;
2451       else if (!some_nonuppercase_initial && some_uppercase)
2452         /* Should x -> yz, operating on X, give Yz or YZ?
2453            We'll assume the latter.  */
2454         case_action = all_caps;
2455       else
2456         case_action = nochange;
2457     }
2458
2459   /* Do replacement in a string.  */
2460   if (!NILP (string))
2461     {
2462       Lisp_Object before, after;
2463
2464       before = Fsubstring (string, make_number (0),
2465                            make_number (search_regs.start[sub]));
2466       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2467
2468       /* Substitute parts of the match into NEWTEXT
2469          if desired.  */
2470       if (NILP (literal))
2471         {
2472           int lastpos = 0;
2473           int lastpos_byte = 0;
2474           /* We build up the substituted string in ACCUM.  */
2475           Lisp_Object accum;
2476           Lisp_Object middle;
2477           int length = SBYTES (newtext);
2478
2479           accum = Qnil;
2480
2481           for (pos_byte = 0, pos = 0; pos_byte < length;)
2482             {
2483               int substart = -1;
2484               int subend = 0;
2485               int delbackslash = 0;
2486
2487               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2488
2489               if (c == '\\')
2490                 {
2491                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2492
2493                   if (c == '&')
2494                     {
2495                       substart = search_regs.start[sub];
2496                       subend = search_regs.end[sub];
2497                     }
2498                   else if (c >= '1' && c <= '9')
2499                     {
2500                       if (search_regs.start[c - '0'] >= 0
2501                           && c <= search_regs.num_regs + '0')
2502                         {
2503                           substart = search_regs.start[c - '0'];
2504                           subend = search_regs.end[c - '0'];
2505                         }
2506                       else
2507                         {
2508                           /* If that subexp did not match,
2509                              replace \\N with nothing.  */
2510                           substart = 0;
2511                           subend = 0;
2512                         }
2513                     }
2514                   else if (c == '\\')
2515                     delbackslash = 1;
2516                   else
2517                     error ("Invalid use of `\\' in replacement text");
2518                 }
2519               if (substart >= 0)
2520                 {
2521                   if (pos - 2 != lastpos)
2522                     middle = substring_both (newtext, lastpos,
2523                                              lastpos_byte,
2524                                              pos - 2, pos_byte - 2);
2525                   else
2526                     middle = Qnil;
2527                   accum = concat3 (accum, middle,
2528                                    Fsubstring (string,
2529                                                make_number (substart),
2530                                                make_number (subend)));
2531                   lastpos = pos;
2532                   lastpos_byte = pos_byte;
2533                 }
2534               else if (delbackslash)
2535                 {
2536                   middle = substring_both (newtext, lastpos,
2537                                            lastpos_byte,
2538                                            pos - 1, pos_byte - 1);
2539
2540                   accum = concat2 (accum, middle);
2541                   lastpos = pos;
2542                   lastpos_byte = pos_byte;
2543                 }
2544             }
2545
2546           if (pos != lastpos)
2547             middle = substring_both (newtext, lastpos,
2548                                      lastpos_byte,
2549                                      pos, pos_byte);
2550           else
2551             middle = Qnil;
2552
2553           newtext = concat2 (accum, middle);
2554         }
2555
2556       /* Do case substitution in NEWTEXT if desired.  */
2557       if (case_action == all_caps)
2558         newtext = Fupcase (newtext);
2559       else if (case_action == cap_initial)
2560         newtext = Fupcase_initials (newtext);
2561
2562       return concat3 (before, newtext, after);
2563     }
2564
2565   /* Record point, then move (quietly) to the start of the match.  */
2566   if (PT >= search_regs.end[sub])
2567     opoint = PT - ZV;
2568   else if (PT > search_regs.start[sub])
2569     opoint = search_regs.end[sub] - ZV;
2570   else
2571     opoint = PT;
2572
2573   /* If we want non-literal replacement,
2574      perform substitution on the replacement string.  */
2575   if (NILP (literal))
2576     {
2577       int length = SBYTES (newtext);
2578       unsigned char *substed;
2579       int substed_alloc_size, substed_len;
2580       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2581       int str_multibyte = STRING_MULTIBYTE (newtext);
2582       Lisp_Object rev_tbl;
2583       int really_changed = 0;
2584
2585       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2586                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2587                                           make_number (0))
2588                 : Qnil);
2589
2590       substed_alloc_size = length * 2 + 100;
2591       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2592       substed_len = 0;
2593
2594       /* Go thru NEWTEXT, producing the actual text to insert in
2595          SUBSTED while adjusting multibyteness to that of the current
2596          buffer.  */
2597
2598       for (pos_byte = 0, pos = 0; pos_byte < length;)
2599         {
2600           unsigned char str[MAX_MULTIBYTE_LENGTH];
2601           unsigned char *add_stuff = NULL;
2602           int add_len = 0;
2603           int idx = -1;
2604
2605           if (str_multibyte)
2606             {
2607               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2608               if (!buf_multibyte)
2609                 c = multibyte_char_to_unibyte (c, rev_tbl);
2610             }
2611           else
2612             {
2613               /* Note that we don't have to increment POS.  */
2614               c = SREF (newtext, pos_byte++);
2615               if (buf_multibyte)
2616                 c = unibyte_char_to_multibyte (c);
2617             }
2618
2619           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2620              or set IDX to a match index, which means put that part
2621              of the buffer text into SUBSTED.  */
2622
2623           if (c == '\\')
2624             {
2625               really_changed = 1;
2626
2627               if (str_multibyte)
2628                 {
2629                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2630                                                       pos, pos_byte);
2631                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2632                     c = multibyte_char_to_unibyte (c, rev_tbl);
2633                 }
2634               else
2635                 {
2636                   c = SREF (newtext, pos_byte++);
2637                   if (buf_multibyte)
2638                     c = unibyte_char_to_multibyte (c);
2639                 }
2640
2641               if (c == '&')
2642                 idx = sub;
2643               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2644                 {
2645                   if (search_regs.start[c - '0'] >= 1)
2646                     idx = c - '0';
2647                 }
2648               else if (c == '\\')
2649                 add_len = 1, add_stuff = "\\";
2650               else
2651                 {
2652                   xfree (substed);
2653                   error ("Invalid use of `\\' in replacement text");
2654                 }
2655             }
2656           else
2657             {
2658               add_len = CHAR_STRING (c, str);
2659               add_stuff = str;
2660             }
2661
2662           /* If we want to copy part of a previous match,
2663              set up ADD_STUFF and ADD_LEN to point to it.  */
2664           if (idx >= 0)
2665             {
2666               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2667               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2668               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2669                 move_gap (search_regs.start[idx]);
2670               add_stuff = BYTE_POS_ADDR (begbyte);
2671             }
2672
2673           /* Now the stuff we want to add to SUBSTED
2674              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2675
2676           /* Make sure SUBSTED is big enough.  */
2677           if (substed_len + add_len >= substed_alloc_size)
2678             {
2679               substed_alloc_size = substed_len + add_len + 500;
2680               substed = (unsigned char *) xrealloc (substed,
2681                                                     substed_alloc_size + 1);
2682             }
2683
2684           /* Now add to the end of SUBSTED.  */
2685           if (add_stuff)
2686             {
2687               bcopy (add_stuff, substed + substed_len, add_len);
2688               substed_len += add_len;
2689             }
2690         }
2691
2692       if (really_changed)
2693         {
2694           if (buf_multibyte)
2695             {
2696               int nchars = multibyte_chars_in_text (substed, substed_len);
2697
2698               newtext = make_multibyte_string (substed, nchars, substed_len);
2699             }
2700           else
2701             newtext = make_unibyte_string (substed, substed_len);
2702         }
2703       xfree (substed);
2704     }
2705
2706   /* Replace the old text with the new in the cleanest possible way.  */
2707   replace_range (search_regs.start[sub], search_regs.end[sub],
2708                  newtext, 1, 0, 1);
2709   newpoint = search_regs.start[sub] + SCHARS (newtext);
2710
2711   if (case_action == all_caps)
2712     Fupcase_region (make_number (search_regs.start[sub]),
2713                     make_number (newpoint));
2714   else if (case_action == cap_initial)
2715     Fupcase_initials_region (make_number (search_regs.start[sub]),
2716                              make_number (newpoint));
2717
2718   /* Adjust search data for this change.  */
2719   {
2720     int oldend = search_regs.end[sub];
2721     int oldstart = search_regs.start[sub];
2722     int change = newpoint - search_regs.end[sub];
2723     int i;
2724
2725     for (i = 0; i < search_regs.num_regs; i++)
2726       {
2727         if (search_regs.start[i] >= oldend)
2728           search_regs.start[i] += change;
2729         else if (search_regs.start[i] > oldstart)
2730           search_regs.start[i] = oldstart;
2731         if (search_regs.end[i] >= oldend)
2732           search_regs.end[i] += change;
2733         else if (search_regs.end[i] > oldstart)
2734           search_regs.end[i] = oldstart;
2735       }
2736   }
2737
2738   /* Put point back where it was in the text.  */
2739   if (opoint <= 0)
2740     TEMP_SET_PT (opoint + ZV);
2741   else
2742     TEMP_SET_PT (opoint);
2743
2744   /* Now move point "officially" to the start of the inserted replacement.  */
2745   move_if_not_intangible (newpoint);
2746
2747   return Qnil;
2748 }
2749 \f
2750 static Lisp_Object
2751 match_limit (num, beginningp)
2752      Lisp_Object num;
2753      int beginningp;
2754 {
2755   register int n;
2756
2757   CHECK_NUMBER (num);
2758   n = XINT (num);
2759   if (n < 0)
2760     args_out_of_range (num, make_number (0));
2761   if (search_regs.num_regs <= 0)
2762     error ("No match data, because no search succeeded");
2763   if (n >= search_regs.num_regs
2764       || search_regs.start[n] < 0)
2765     return Qnil;
2766   return (make_number ((beginningp) ? search_regs.start[n]
2767                                     : search_regs.end[n]));
2768 }
2769
2770 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2771        doc: /* Return position of start of text matched by last search.
2772 SUBEXP, a number, specifies which parenthesized expression in the last
2773   regexp.
2774 Value is nil if SUBEXPth pair didn't match, or there were less than
2775   SUBEXP pairs.
2776 Zero means the entire text matched by the whole regexp or whole string.  */)
2777      (subexp)
2778      Lisp_Object subexp;
2779 {
2780   return match_limit (subexp, 1);
2781 }
2782
2783 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2784        doc: /* Return position of end of text matched by last search.
2785 SUBEXP, a number, specifies which parenthesized expression in the last
2786   regexp.
2787 Value is nil if SUBEXPth pair didn't match, or there were less than
2788   SUBEXP pairs.
2789 Zero means the entire text matched by the whole regexp or whole string.  */)
2790      (subexp)
2791      Lisp_Object subexp;
2792 {
2793   return match_limit (subexp, 0);
2794 }
2795
2796 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2797        doc: /* Return a list containing all info on what the last search matched.
2798 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2799 All the elements are markers or nil (nil if the Nth pair didn't match)
2800 if the last match was on a buffer; integers or nil if a string was matched.
2801 Use `store-match-data' to reinstate the data in this list.
2802
2803 If INTEGERS (the optional first argument) is non-nil, always use
2804 integers \(rather than markers) to represent buffer positions.  In
2805 this case, and if the last match was in a buffer, the buffer will get
2806 stored as one additional element at the end of the list.
2807
2808 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2809 enough to hold all the values, and if INTEGERS is non-nil, no consing
2810 is done.
2811
2812 If optional third arg RESEAT is non-nil, any previous markers on the
2813 REUSE list will be modified to point to nowhere.
2814
2815 Return value is undefined if the last search failed.  */)
2816   (integers, reuse, reseat)
2817      Lisp_Object integers, reuse, reseat;
2818 {
2819   Lisp_Object tail, prev;
2820   Lisp_Object *data;
2821   int i, len;
2822
2823   if (!NILP (reseat))
2824     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2825       if (MARKERP (XCAR (tail)))
2826         {
2827           unchain_marker (XMARKER (XCAR (tail)));
2828           XSETCAR (tail, Qnil);
2829         }
2830
2831   if (NILP (last_thing_searched))
2832     return Qnil;
2833
2834   prev = Qnil;
2835
2836   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2837                                  * sizeof (Lisp_Object));
2838
2839   len = 0;
2840   for (i = 0; i < search_regs.num_regs; i++)
2841     {
2842       int start = search_regs.start[i];
2843       if (start >= 0)
2844         {
2845           if (EQ (last_thing_searched, Qt)
2846               || ! NILP (integers))
2847             {
2848               XSETFASTINT (data[2 * i], start);
2849               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2850             }
2851           else if (BUFFERP (last_thing_searched))
2852             {
2853               data[2 * i] = Fmake_marker ();
2854               Fset_marker (data[2 * i],
2855                            make_number (start),
2856                            last_thing_searched);
2857               data[2 * i + 1] = Fmake_marker ();
2858               Fset_marker (data[2 * i + 1],
2859                            make_number (search_regs.end[i]),
2860                            last_thing_searched);
2861             }
2862           else
2863             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2864             abort ();
2865
2866           len = 2 * i + 2;
2867         }
2868       else
2869         data[2 * i] = data[2 * i + 1] = Qnil;
2870     }
2871
2872   if (BUFFERP (last_thing_searched) && !NILP (integers))
2873     {
2874       data[len] = last_thing_searched;
2875       len++;
2876     }
2877
2878   /* If REUSE is not usable, cons up the values and return them.  */
2879   if (! CONSP (reuse))
2880     return Flist (len, data);
2881
2882   /* If REUSE is a list, store as many value elements as will fit
2883      into the elements of REUSE.  */
2884   for (i = 0, tail = reuse; CONSP (tail);
2885        i++, tail = XCDR (tail))
2886     {
2887       if (i < len)
2888         XSETCAR (tail, data[i]);
2889       else
2890         XSETCAR (tail, Qnil);
2891       prev = tail;
2892     }
2893
2894   /* If we couldn't fit all value elements into REUSE,
2895      cons up the rest of them and add them to the end of REUSE.  */
2896   if (i < len)
2897     XSETCDR (prev, Flist (len - i, data + i));
2898
2899   return reuse;
2900 }
2901
2902 /* We used to have an internal use variant of `reseat' described as:
2903
2904       If RESEAT is `evaporate', put the markers back on the free list
2905       immediately.  No other references to the markers must exist in this
2906       case, so it is used only internally on the unwind stack and
2907       save-match-data from Lisp.
2908
2909    But it was ill-conceived: those supposedly-internal markers get exposed via
2910    the undo-list, so freeing them here is unsafe.  */
2911
2912 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2913        doc: /* Set internal data on last search match from elements of LIST.
2914 LIST should have been created by calling `match-data' previously.
2915
2916 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2917     (list, reseat)
2918      register Lisp_Object list, reseat;
2919 {
2920   register int i;
2921   register Lisp_Object marker;
2922
2923   if (running_asynch_code)
2924     save_search_regs ();
2925
2926   CHECK_LIST (list);
2927
2928   /* Unless we find a marker with a buffer or an explicit buffer
2929      in LIST, assume that this match data came from a string.  */
2930   last_thing_searched = Qt;
2931
2932   /* Allocate registers if they don't already exist.  */
2933   {
2934     int length = XFASTINT (Flength (list)) / 2;
2935
2936     if (length > search_regs.num_regs)
2937       {
2938         if (search_regs.num_regs == 0)
2939           {
2940             search_regs.start
2941               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2942             search_regs.end
2943               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2944           }
2945         else
2946           {
2947             search_regs.start
2948               = (regoff_t *) xrealloc (search_regs.start,
2949                                        length * sizeof (regoff_t));
2950             search_regs.end
2951               = (regoff_t *) xrealloc (search_regs.end,
2952                                        length * sizeof (regoff_t));
2953           }
2954
2955         for (i = search_regs.num_regs; i < length; i++)
2956           search_regs.start[i] = -1;
2957
2958         search_regs.num_regs = length;
2959       }
2960
2961     for (i = 0; CONSP (list); i++)
2962       {
2963         marker = XCAR (list);
2964         if (BUFFERP (marker))
2965           {
2966             last_thing_searched = marker;
2967             break;
2968           }
2969         if (i >= length)
2970           break;
2971         if (NILP (marker))
2972           {
2973             search_regs.start[i] = -1;
2974             list = XCDR (list);
2975           }
2976         else
2977           {
2978             int from;
2979             Lisp_Object m;
2980
2981             m = marker;
2982             if (MARKERP (marker))
2983               {
2984                 if (XMARKER (marker)->buffer == 0)
2985                   XSETFASTINT (marker, 0);
2986                 else
2987                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2988               }
2989
2990             CHECK_NUMBER_COERCE_MARKER (marker);
2991             from = XINT (marker);
2992
2993             if (!NILP (reseat) && MARKERP (m))
2994               {
2995                 unchain_marker (XMARKER (m));
2996                 XSETCAR (list, Qnil);
2997               }
2998
2999             if ((list = XCDR (list), !CONSP (list)))
3000               break;
3001
3002             m = marker = XCAR (list);
3003
3004             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3005               XSETFASTINT (marker, 0);
3006
3007             CHECK_NUMBER_COERCE_MARKER (marker);
3008             search_regs.start[i] = from;
3009             search_regs.end[i] = XINT (marker);
3010
3011             if (!NILP (reseat) && MARKERP (m))
3012               {
3013                 unchain_marker (XMARKER (m));
3014                 XSETCAR (list, Qnil);
3015               }
3016           }
3017         list = XCDR (list);
3018       }
3019
3020     for (; i < search_regs.num_regs; i++)
3021       search_regs.start[i] = -1;
3022   }
3023
3024   return Qnil;
3025 }
3026
3027 /* If non-zero the match data have been saved in saved_search_regs
3028    during the execution of a sentinel or filter. */
3029 static int search_regs_saved;
3030 static struct re_registers saved_search_regs;
3031 static Lisp_Object saved_last_thing_searched;
3032
3033 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3034    if asynchronous code (filter or sentinel) is running. */
3035 static void
3036 save_search_regs ()
3037 {
3038   if (!search_regs_saved)
3039     {
3040       saved_search_regs.num_regs = search_regs.num_regs;
3041       saved_search_regs.start = search_regs.start;
3042       saved_search_regs.end = search_regs.end;
3043       saved_last_thing_searched = last_thing_searched;
3044       last_thing_searched = Qnil;
3045       search_regs.num_regs = 0;
3046       search_regs.start = 0;
3047       search_regs.end = 0;
3048
3049       search_regs_saved = 1;
3050     }
3051 }
3052
3053 /* Called upon exit from filters and sentinels. */
3054 void
3055 restore_search_regs ()
3056 {
3057   if (search_regs_saved)
3058     {
3059       if (search_regs.num_regs > 0)
3060         {
3061           xfree (search_regs.start);
3062           xfree (search_regs.end);
3063         }
3064       search_regs.num_regs = saved_search_regs.num_regs;
3065       search_regs.start = saved_search_regs.start;
3066       search_regs.end = saved_search_regs.end;
3067       last_thing_searched = saved_last_thing_searched;
3068       saved_last_thing_searched = Qnil;
3069       search_regs_saved = 0;
3070     }
3071 }
3072
3073 static Lisp_Object
3074 unwind_set_match_data (list)
3075      Lisp_Object list;
3076 {
3077   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3078   return Fset_match_data (list, Qt);
3079 }
3080
3081 /* Called to unwind protect the match data.  */
3082 void
3083 record_unwind_save_match_data ()
3084 {
3085   record_unwind_protect (unwind_set_match_data,
3086                          Fmatch_data (Qnil, Qnil, Qnil));
3087 }
3088
3089 /* Quote a string to inactivate reg-expr chars */
3090
3091 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3092        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3093      (string)
3094      Lisp_Object string;
3095 {
3096   register unsigned char *in, *out, *end;
3097   register unsigned char *temp;
3098   int backslashes_added = 0;
3099
3100   CHECK_STRING (string);
3101
3102   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3103
3104   /* Now copy the data into the new string, inserting escapes. */
3105
3106   in = SDATA (string);
3107   end = in + SBYTES (string);
3108   out = temp;
3109
3110   for (; in != end; in++)
3111     {
3112       if (*in == '['
3113           || *in == '*' || *in == '.' || *in == '\\'
3114           || *in == '?' || *in == '+'
3115           || *in == '^' || *in == '$')
3116         *out++ = '\\', backslashes_added++;
3117       *out++ = *in;
3118     }
3119
3120   return make_specified_string (temp,
3121                                 SCHARS (string) + backslashes_added,
3122                                 out - temp,
3123                                 STRING_MULTIBYTE (string));
3124 }
3125 \f
3126 void
3127 syms_of_search ()
3128 {
3129   register int i;
3130
3131   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3132     {
3133       searchbufs[i].buf.allocated = 100;
3134       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3135       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3136       searchbufs[i].regexp = Qnil;
3137       searchbufs[i].whitespace_regexp = Qnil;
3138       searchbufs[i].syntax_table = Qnil;
3139       staticpro (&searchbufs[i].regexp);
3140       staticpro (&searchbufs[i].whitespace_regexp);
3141       staticpro (&searchbufs[i].syntax_table);
3142       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3143     }
3144   searchbuf_head = &searchbufs[0];
3145
3146   Qsearch_failed = intern ("search-failed");
3147   staticpro (&Qsearch_failed);
3148   Qinvalid_regexp = intern ("invalid-regexp");
3149   staticpro (&Qinvalid_regexp);
3150
3151   Fput (Qsearch_failed, Qerror_conditions,
3152         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3153   Fput (Qsearch_failed, Qerror_message,
3154         build_string ("Search failed"));
3155
3156   Fput (Qinvalid_regexp, Qerror_conditions,
3157         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3158   Fput (Qinvalid_regexp, Qerror_message,
3159         build_string ("Invalid regexp"));
3160
3161   last_thing_searched = Qnil;
3162   staticpro (&last_thing_searched);
3163
3164   saved_last_thing_searched = Qnil;
3165   staticpro (&saved_last_thing_searched);
3166
3167   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3168       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3169 Some commands use this for user-specified regexps.
3170 Spaces that occur inside character classes or repetition operators
3171 or other such regexp constructs are not replaced with this.
3172 A value of nil (which is the normal value) means treat spaces literally.  */);
3173   Vsearch_spaces_regexp = Qnil;
3174
3175   defsubr (&Slooking_at);
3176   defsubr (&Sposix_looking_at);
3177   defsubr (&Sstring_match);
3178   defsubr (&Sposix_string_match);
3179   defsubr (&Ssearch_forward);
3180   defsubr (&Ssearch_backward);
3181   defsubr (&Sword_search_forward);
3182   defsubr (&Sword_search_backward);
3183   defsubr (&Sre_search_forward);
3184   defsubr (&Sre_search_backward);
3185   defsubr (&Sposix_search_forward);
3186   defsubr (&Sposix_search_backward);
3187   defsubr (&Sreplace_match);
3188   defsubr (&Smatch_beginning);
3189   defsubr (&Smatch_end);
3190   defsubr (&Smatch_data);
3191   defsubr (&Sset_match_data);
3192   defsubr (&Sregexp_quote);
3193 }
3194
3195 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3196    (do not change this comment) */