src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87 static int simple_search ();
  88 static int boyer_moore ();
  89 static int search_buffer ();
  90
  91 static void
  92 matcher_overflow ()
  93 {
  94   error ("Stack overflow in regexp matcher");
  95 }
  96
  97 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  98    PATTERN is the pattern to compile.
  99    CP is the place to put the result.
 100    TRANSLATE is a translation table for ignoring case, or nil for none.
 101    REGP is the structure that says where to store the "register"
 102    values that will result from matching this pattern.
 103    If it is 0, we should compile the pattern not to record any
 104    subexpression bounds.
 105    POSIX is nonzero if we want full backtracking (POSIX style)
 106    for this pattern.  0 means backtrack only enough to get a valid match.
 107    MULTIBYTE is nonzero if we want to handle multibyte characters in
 108    PATTERN.  0 means all multibyte characters are recognized just as
 109    sequences of binary data.  */
 110
 111 static void
 112 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 113      struct regexp_cache *cp;
 114      Lisp_Object pattern;
 115      Lisp_Object translate;
 116      struct re_registers *regp;
 117      int posix;
 118      int multibyte;
 119 {
 120   unsigned char *raw_pattern;
 121   int raw_pattern_size;
 122   char *val;
 123   reg_syntax_t old;
 124
 125   /* MULTIBYTE says whether the text to be searched is multibyte.
 126      We must convert PATTERN to match that, or we will not really
 127      find things right.  */
 128
 129   if (multibyte == STRING_MULTIBYTE (pattern))
 130     {
 131       raw_pattern = (unsigned char *) SDATA (pattern);
 132       raw_pattern_size = SBYTES (pattern);
 133     }
 134   else if (multibyte)
 135     {
 136       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 137                                                   SCHARS (pattern));
 138       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 139       copy_text (SDATA (pattern), raw_pattern,
 140                  SCHARS (pattern), 0, 1);
 141     }
 142   else
 143     {
 144       /* Converting multibyte to single-byte.
 145
 146          ??? Perhaps this conversion should be done in a special way
 147          by subtracting nonascii-insert-offset from each non-ASCII char,
 148          so that only the multibyte chars which really correspond to
 149          the chosen single-byte character set can possibly match.  */
 150       raw_pattern_size = SCHARS (pattern);
 151       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 152       copy_text (SDATA (pattern), raw_pattern,
 153                  SBYTES (pattern), 1, 0);
 154     }
 155
 156   cp->regexp = Qnil;
 157   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 158   cp->posix = posix;
 159   cp->buf.multibyte = multibyte;
 160   BLOCK_INPUT;
 161   old = re_set_syntax (RE_SYNTAX_EMACS
 162                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 163   val = (char *) re_compile_pattern ((char *)raw_pattern,
 164                                      raw_pattern_size, &cp->buf);
 165   re_set_syntax (old);
 166   UNBLOCK_INPUT;
 167   if (val)
 168     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 169
 170   cp->regexp = Fcopy_sequence (pattern);
 171 }
 172
 173 /* Shrink each compiled regexp buffer in the cache
 174    to the size actually used right now.
 175    This is called from garbage collection.  */
 176
 177 void
 178 shrink_regexp_cache ()
 179 {
 180   struct regexp_cache *cp;
 181
 182   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 183     {
 184       cp->buf.allocated = cp->buf.used;
 185       cp->buf.buffer
 186         = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
 187     }
 188 }
 189
 190 /* Compile a regexp if necessary, but first check to see if there's one in
 191    the cache.
 192    PATTERN is the pattern to compile.
 193    TRANSLATE is a translation table for ignoring case, or nil for none.
 194    REGP is the structure that says where to store the "register"
 195    values that will result from matching this pattern.
 196    If it is 0, we should compile the pattern not to record any
 197    subexpression bounds.
 198    POSIX is nonzero if we want full backtracking (POSIX style)
 199    for this pattern.  0 means backtrack only enough to get a valid match.  */
 200
 201 struct re_pattern_buffer *
 202 compile_pattern (pattern, regp, translate, posix, multibyte)
 203      Lisp_Object pattern;
 204      struct re_registers *regp;
 205      Lisp_Object translate;
 206      int posix, multibyte;
 207 {
 208   struct regexp_cache *cp, **cpp;
 209
 210   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 211     {
 212       cp = *cpp;
 213       /* Entries are initialized to nil, and may be set to nil by
 214          compile_pattern_1 if the pattern isn't valid.  Don't apply
 215          string accessors in those cases.  However, compile_pattern_1
 216          is only applied to the cache entry we pick here to reuse.  So
 217          nil should never appear before a non-nil entry.  */
 218       if (NILP (cp->regexp))
 219         goto compile_it;
 220       if (SCHARS (cp->regexp) == SCHARS (pattern)
 221           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 222           && !NILP (Fstring_equal (cp->regexp, pattern))
 223           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 224           && cp->posix == posix
 225           && cp->buf.multibyte == multibyte)
 226         break;
 227
 228       /* If we're at the end of the cache, compile into the nil cell
 229          we found, or the last (least recently used) cell with a
 230          string value.  */
 231       if (cp->next == 0)
 232         {
 233         compile_it:
 234           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 235           break;
 236         }
 237     }
 238
 239   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 240      either because we found it in the cache or because we just compiled it.
 241      Move it to the front of the queue to mark it as most recently used.  */
 242   *cpp = cp->next;
 243   cp->next = searchbuf_head;
 244   searchbuf_head = cp;
 245
 246   /* Advise the searching functions about the space we have allocated
 247      for register data.  */
 248   if (regp)
 249     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 250
 251   return &cp->buf;
 252 }
 253
 254 /* Error condition used for failing searches */
 255 Lisp_Object Qsearch_failed;
 256
 257 Lisp_Object
 258 signal_failure (arg)
 259      Lisp_Object arg;
 260 {
 261   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 262   return Qnil;
 263 }
 264 \f
 265 static Lisp_Object
 266 looking_at_1 (string, posix)
 267      Lisp_Object string;
 268      int posix;
 269 {
 270   Lisp_Object val;
 271   unsigned char *p1, *p2;
 272   int s1, s2;
 273   register int i;
 274   struct re_pattern_buffer *bufp;
 275
 276   if (running_asynch_code)
 277     save_search_regs ();
 278
 279   CHECK_STRING (string);
 280   bufp = compile_pattern (string, &search_regs,
 281                           (!NILP (current_buffer->case_fold_search)
 282                            ? DOWNCASE_TABLE : Qnil),
 283                           posix,
 284                           !NILP (current_buffer->enable_multibyte_characters));
 285
 286   immediate_quit = 1;
 287   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 288
 289   /* Get pointers and sizes of the two strings
 290      that make up the visible portion of the buffer. */
 291
 292   p1 = BEGV_ADDR;
 293   s1 = GPT_BYTE - BEGV_BYTE;
 294   p2 = GAP_END_ADDR;
 295   s2 = ZV_BYTE - GPT_BYTE;
 296   if (s1 < 0)
 297     {
 298       p2 = p1;
 299       s2 = ZV_BYTE - BEGV_BYTE;
 300       s1 = 0;
 301     }
 302   if (s2 < 0)
 303     {
 304       s1 = ZV_BYTE - BEGV_BYTE;
 305       s2 = 0;
 306     }
 307
 308   re_match_object = Qnil;
 309
 310   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 311                   PT_BYTE - BEGV_BYTE, &search_regs,
 312                   ZV_BYTE - BEGV_BYTE);
 313   immediate_quit = 0;
 314
 315   if (i == -2)
 316     matcher_overflow ();
 317
 318   val = (0 <= i ? Qt : Qnil);
 319   if (i >= 0)
 320     for (i = 0; i < search_regs.num_regs; i++)
 321       if (search_regs.start[i] >= 0)
 322         {
 323           search_regs.start[i]
 324             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 325           search_regs.end[i]
 326             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 327         }
 328   XSETBUFFER (last_thing_searched, current_buffer);
 329   return val;
 330 }
 331
 332 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 333        doc: /* Return t if text after point matches regular expression REGEXP.
 334 This function modifies the match data that `match-beginning',
 335 `match-end' and `match-data' access; save and restore the match
 336 data if you want to preserve them.  */)
 337      (regexp)
 338      Lisp_Object regexp;
 339 {
 340   return looking_at_1 (regexp, 0);
 341 }
 342
 343 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 344        doc: /* Return t if text after point matches regular expression REGEXP.
 345 Find the longest match, in accord with Posix regular expression rules.
 346 This function modifies the match data that `match-beginning',
 347 `match-end' and `match-data' access; save and restore the match
 348 data if you want to preserve them.  */)
 349      (regexp)
 350      Lisp_Object regexp;
 351 {
 352   return looking_at_1 (regexp, 1);
 353 }
 354 \f
 355 static Lisp_Object
 356 string_match_1 (regexp, string, start, posix)
 357      Lisp_Object regexp, string, start;
 358      int posix;
 359 {
 360   int val;
 361   struct re_pattern_buffer *bufp;
 362   int pos, pos_byte;
 363   int i;
 364
 365   if (running_asynch_code)
 366     save_search_regs ();
 367
 368   CHECK_STRING (regexp);
 369   CHECK_STRING (string);
 370
 371   if (NILP (start))
 372     pos = 0, pos_byte = 0;
 373   else
 374     {
 375       int len = SCHARS (string);
 376
 377       CHECK_NUMBER (start);
 378       pos = XINT (start);
 379       if (pos < 0 && -pos <= len)
 380         pos = len + pos;
 381       else if (0 > pos || pos > len)
 382         args_out_of_range (string, start);
 383       pos_byte = string_char_to_byte (string, pos);
 384     }
 385
 386   bufp = compile_pattern (regexp, &search_regs,
 387                           (!NILP (current_buffer->case_fold_search)
 388                            ? DOWNCASE_TABLE : Qnil),
 389                           posix,
 390                           STRING_MULTIBYTE (string));
 391   immediate_quit = 1;
 392   re_match_object = string;
 393
 394   val = re_search (bufp, (char *) SDATA (string),
 395                    SBYTES (string), pos_byte,
 396                    SBYTES (string) - pos_byte,
 397                    &search_regs);
 398   immediate_quit = 0;
 399   last_thing_searched = Qt;
 400   if (val == -2)
 401     matcher_overflow ();
 402   if (val < 0) return Qnil;
 403
 404   for (i = 0; i < search_regs.num_regs; i++)
 405     if (search_regs.start[i] >= 0)
 406       {
 407         search_regs.start[i]
 408           = string_byte_to_char (string, search_regs.start[i]);
 409         search_regs.end[i]
 410           = string_byte_to_char (string, search_regs.end[i]);
 411       }
 412
 413   return make_number (string_byte_to_char (string, val));
 414 }
 415
 416 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 417        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 418 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 419 If third arg START is non-nil, start search at that index in STRING.
 420 For index of first char beyond the match, do (match-end 0).
 421 `match-end' and `match-beginning' also give indices of substrings
 422 matched by parenthesis constructs in the pattern.  */)
 423      (regexp, string, start)
 424      Lisp_Object regexp, string, start;
 425 {
 426   return string_match_1 (regexp, string, start, 0);
 427 }
 428
 429 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 430        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 431 Find the longest match, in accord with Posix regular expression rules.
 432 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 433 If third arg START is non-nil, start search at that index in STRING.
 434 For index of first char beyond the match, do (match-end 0).
 435 `match-end' and `match-beginning' also give indices of substrings
 436 matched by parenthesis constructs in the pattern.  */)
 437      (regexp, string, start)
 438      Lisp_Object regexp, string, start;
 439 {
 440   return string_match_1 (regexp, string, start, 1);
 441 }
 442
 443 /* Match REGEXP against STRING, searching all of STRING,
 444    and return the index of the match, or negative on failure.
 445    This does not clobber the match data.  */
 446
 447 int
 448 fast_string_match (regexp, string)
 449      Lisp_Object regexp, string;
 450 {
 451   int val;
 452   struct re_pattern_buffer *bufp;
 453
 454   bufp = compile_pattern (regexp, 0, Qnil,
 455                           0, STRING_MULTIBYTE (string));
 456   immediate_quit = 1;
 457   re_match_object = string;
 458
 459   val = re_search (bufp, (char *) SDATA (string),
 460                    SBYTES (string), 0,
 461                    SBYTES (string), 0);
 462   immediate_quit = 0;
 463   return val;
 464 }
 465
 466 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 467    and return the index of the match, or negative on failure.
 468    This does not clobber the match data.
 469    We assume that STRING contains single-byte characters.  */
 470
 471 extern Lisp_Object Vascii_downcase_table;
 472
 473 int
 474 fast_c_string_match_ignore_case (regexp, string)
 475      Lisp_Object regexp;
 476      const char *string;
 477 {
 478   int val;
 479   struct re_pattern_buffer *bufp;
 480   int len = strlen (string);
 481
 482   regexp = string_make_unibyte (regexp);
 483   re_match_object = Qt;
 484   bufp = compile_pattern (regexp, 0,
 485                           Vascii_downcase_table, 0,
 486                           0);
 487   immediate_quit = 1;
 488   val = re_search (bufp, string, len, 0, len, 0);
 489   immediate_quit = 0;
 490   return val;
 491 }
 492 \f
 493 /* The newline cache: remembering which sections of text have no newlines.  */
 494
 495 /* If the user has requested newline caching, make sure it's on.
 496    Otherwise, make sure it's off.
 497    This is our cheezy way of associating an action with the change of
 498    state of a buffer-local variable.  */
 499 static void
 500 newline_cache_on_off (buf)
 501      struct buffer *buf;
 502 {
 503   if (NILP (buf->cache_long_line_scans))
 504     {
 505       /* It should be off.  */
 506       if (buf->newline_cache)
 507         {
 508           free_region_cache (buf->newline_cache);
 509           buf->newline_cache = 0;
 510         }
 511     }
 512   else
 513     {
 514       /* It should be on.  */
 515       if (buf->newline_cache == 0)
 516         buf->newline_cache = new_region_cache ();
 517     }
 518 }
 519
 520 \f
 521 /* Search for COUNT instances of the character TARGET between START and END.
 522
 523    If COUNT is positive, search forwards; END must be >= START.
 524    If COUNT is negative, search backwards for the -COUNTth instance;
 525       END must be <= START.
 526    If COUNT is zero, do anything you please; run rogue, for all I care.
 527
 528    If END is zero, use BEGV or ZV instead, as appropriate for the
 529    direction indicated by COUNT.
 530
 531    If we find COUNT instances, set *SHORTAGE to zero, and return the
 532    position after the COUNTth match.  Note that for reverse motion
 533    this is not the same as the usual convention for Emacs motion commands.
 534
 535    If we don't find COUNT instances before reaching END, set *SHORTAGE
 536    to the number of TARGETs left unfound, and return END.
 537
 538    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 539    except when inside redisplay.  */
 540
 541 int
 542 scan_buffer (target, start, end, count, shortage, allow_quit)
 543      register int target;
 544      int start, end;
 545      int count;
 546      int *shortage;
 547      int allow_quit;
 548 {
 549   struct region_cache *newline_cache;
 550   int direction;
 551
 552   if (count > 0)
 553     {
 554       direction = 1;
 555       if (! end) end = ZV;
 556     }
 557   else
 558     {
 559       direction = -1;
 560       if (! end) end = BEGV;
 561     }
 562
 563   newline_cache_on_off (current_buffer);
 564   newline_cache = current_buffer->newline_cache;
 565
 566   if (shortage != 0)
 567     *shortage = 0;
 568
 569   immediate_quit = allow_quit;
 570
 571   if (count > 0)
 572     while (start != end)
 573       {
 574         /* Our innermost scanning loop is very simple; it doesn't know
 575            about gaps, buffer ends, or the newline cache.  ceiling is
 576            the position of the last character before the next such
 577            obstacle --- the last character the dumb search loop should
 578            examine.  */
 579         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 580         int start_byte = CHAR_TO_BYTE (start);
 581         int tem;
 582
 583         /* If we're looking for a newline, consult the newline cache
 584            to see where we can avoid some scanning.  */
 585         if (target == '\n' && newline_cache)
 586           {
 587             int next_change;
 588             immediate_quit = 0;
 589             while (region_cache_forward
 590                    (current_buffer, newline_cache, start_byte, &next_change))
 591               start_byte = next_change;
 592             immediate_quit = allow_quit;
 593
 594             /* START should never be after END.  */
 595             if (start_byte > ceiling_byte)
 596               start_byte = ceiling_byte;
 597
 598             /* Now the text after start is an unknown region, and
 599                next_change is the position of the next known region. */
 600             ceiling_byte = min (next_change - 1, ceiling_byte);
 601           }
 602
 603         /* The dumb loop can only scan text stored in contiguous
 604            bytes. BUFFER_CEILING_OF returns the last character
 605            position that is contiguous, so the ceiling is the
 606            position after that.  */
 607         tem = BUFFER_CEILING_OF (start_byte);
 608         ceiling_byte = min (tem, ceiling_byte);
 609
 610         {
 611           /* The termination address of the dumb loop.  */
 612           register unsigned char *ceiling_addr
 613             = BYTE_POS_ADDR (ceiling_byte) + 1;
 614           register unsigned char *cursor
 615             = BYTE_POS_ADDR (start_byte);
 616           unsigned char *base = cursor;
 617
 618           while (cursor < ceiling_addr)
 619             {
 620               unsigned char *scan_start = cursor;
 621
 622               /* The dumb loop.  */
 623               while (*cursor != target && ++cursor < ceiling_addr)
 624                 ;
 625
 626               /* If we're looking for newlines, cache the fact that
 627                  the region from start to cursor is free of them. */
 628               if (target == '\n' && newline_cache)
 629                 know_region_cache (current_buffer, newline_cache,
 630                                    start_byte + scan_start - base,
 631                                    start_byte + cursor - base);
 632
 633               /* Did we find the target character?  */
 634               if (cursor < ceiling_addr)
 635                 {
 636                   if (--count == 0)
 637                     {
 638                       immediate_quit = 0;
 639                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 640                     }
 641                   cursor++;
 642                 }
 643             }
 644
 645           start = BYTE_TO_CHAR (start_byte + cursor - base);
 646         }
 647       }
 648   else
 649     while (start > end)
 650       {
 651         /* The last character to check before the next obstacle.  */
 652         int ceiling_byte = CHAR_TO_BYTE (end);
 653         int start_byte = CHAR_TO_BYTE (start);
 654         int tem;
 655
 656         /* Consult the newline cache, if appropriate.  */
 657         if (target == '\n' && newline_cache)
 658           {
 659             int next_change;
 660             immediate_quit = 0;
 661             while (region_cache_backward
 662                    (current_buffer, newline_cache, start_byte, &next_change))
 663               start_byte = next_change;
 664             immediate_quit = allow_quit;
 665
 666             /* Start should never be at or before end.  */
 667             if (start_byte <= ceiling_byte)
 668               start_byte = ceiling_byte + 1;
 669
 670             /* Now the text before start is an unknown region, and
 671                next_change is the position of the next known region. */
 672             ceiling_byte = max (next_change, ceiling_byte);
 673           }
 674
 675         /* Stop scanning before the gap.  */
 676         tem = BUFFER_FLOOR_OF (start_byte - 1);
 677         ceiling_byte = max (tem, ceiling_byte);
 678
 679         {
 680           /* The termination address of the dumb loop.  */
 681           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 682           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 683           unsigned char *base = cursor;
 684
 685           while (cursor >= ceiling_addr)
 686             {
 687               unsigned char *scan_start = cursor;
 688
 689               while (*cursor != target && --cursor >= ceiling_addr)
 690                 ;
 691
 692               /* If we're looking for newlines, cache the fact that
 693                  the region from after the cursor to start is free of them.  */
 694               if (target == '\n' && newline_cache)
 695                 know_region_cache (current_buffer, newline_cache,
 696                                    start_byte + cursor - base,
 697                                    start_byte + scan_start - base);
 698
 699               /* Did we find the target character?  */
 700               if (cursor >= ceiling_addr)
 701                 {
 702                   if (++count >= 0)
 703                     {
 704                       immediate_quit = 0;
 705                       return BYTE_TO_CHAR (start_byte + cursor - base);
 706                     }
 707                   cursor--;
 708                 }
 709             }
 710
 711           start = BYTE_TO_CHAR (start_byte + cursor - base);
 712         }
 713       }
 714
 715   immediate_quit = 0;
 716   if (shortage != 0)
 717     *shortage = count * direction;
 718   return start;
 719 }
 720 \f
 721 /* Search for COUNT instances of a line boundary, which means either a
 722    newline or (if selective display enabled) a carriage return.
 723    Start at START.  If COUNT is negative, search backwards.
 724
 725    We report the resulting position by calling TEMP_SET_PT_BOTH.
 726
 727    If we find COUNT instances. we position after (always after,
 728    even if scanning backwards) the COUNTth match, and return 0.
 729
 730    If we don't find COUNT instances before reaching the end of the
 731    buffer (or the beginning, if scanning backwards), we return
 732    the number of line boundaries left unfound, and position at
 733    the limit we bumped up against.
 734
 735    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 736    except in special cases.  */
 737
 738 int
 739 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 740      int start, start_byte;
 741      int limit, limit_byte;
 742      register int count;
 743      int allow_quit;
 744 {
 745   int direction = ((count > 0) ? 1 : -1);
 746
 747   register unsigned char *cursor;
 748   unsigned char *base;
 749
 750   register int ceiling;
 751   register unsigned char *ceiling_addr;
 752
 753   int old_immediate_quit = immediate_quit;
 754
 755   /* The code that follows is like scan_buffer
 756      but checks for either newline or carriage return.  */
 757
 758   if (allow_quit)
 759     immediate_quit++;
 760
 761   start_byte = CHAR_TO_BYTE (start);
 762
 763   if (count > 0)
 764     {
 765       while (start_byte < limit_byte)
 766         {
 767           ceiling =  BUFFER_CEILING_OF (start_byte);
 768           ceiling = min (limit_byte - 1, ceiling);
 769           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 770           base = (cursor = BYTE_POS_ADDR (start_byte));
 771           while (1)
 772             {
 773               while (*cursor != '\n' && ++cursor != ceiling_addr)
 774                 ;
 775
 776               if (cursor != ceiling_addr)
 777                 {
 778                   if (--count == 0)
 779                     {
 780                       immediate_quit = old_immediate_quit;
 781                       start_byte = start_byte + cursor - base + 1;
 782                       start = BYTE_TO_CHAR (start_byte);
 783                       TEMP_SET_PT_BOTH (start, start_byte);
 784                       return 0;
 785                     }
 786                   else
 787                     if (++cursor == ceiling_addr)
 788                       break;
 789                 }
 790               else
 791                 break;
 792             }
 793           start_byte += cursor - base;
 794         }
 795     }
 796   else
 797     {
 798       while (start_byte > limit_byte)
 799         {
 800           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 801           ceiling = max (limit_byte, ceiling);
 802           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 803           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 804           while (1)
 805             {
 806               while (--cursor != ceiling_addr && *cursor != '\n')
 807                 ;
 808
 809               if (cursor != ceiling_addr)
 810                 {
 811                   if (++count == 0)
 812                     {
 813                       immediate_quit = old_immediate_quit;
 814                       /* Return the position AFTER the match we found.  */
 815                       start_byte = start_byte + cursor - base + 1;
 816                       start = BYTE_TO_CHAR (start_byte);
 817                       TEMP_SET_PT_BOTH (start, start_byte);
 818                       return 0;
 819                     }
 820                 }
 821               else
 822                 break;
 823             }
 824           /* Here we add 1 to compensate for the last decrement
 825              of CURSOR, which took it past the valid range.  */
 826           start_byte += cursor - base + 1;
 827         }
 828     }
 829
 830   TEMP_SET_PT_BOTH (limit, limit_byte);
 831   immediate_quit = old_immediate_quit;
 832
 833   return count * direction;
 834 }
 835
 836 int
 837 find_next_newline_no_quit (from, cnt)
 838      register int from, cnt;
 839 {
 840   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 841 }
 842
 843 /* Like find_next_newline, but returns position before the newline,
 844    not after, and only search up to TO.  This isn't just
 845    find_next_newline (...)-1, because you might hit TO.  */
 846
 847 int
 848 find_before_next_newline (from, to, cnt)
 849      int from, to, cnt;
 850 {
 851   int shortage;
 852   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 853
 854   if (shortage == 0)
 855     pos--;
 856
 857   return pos;
 858 }
 859 \f
 860 /* Subroutines of Lisp buffer search functions. */
 861
 862 static Lisp_Object
 863 search_command (string, bound, noerror, count, direction, RE, posix)
 864      Lisp_Object string, bound, noerror, count;
 865      int direction;
 866      int RE;
 867      int posix;
 868 {
 869   register int np;
 870   int lim, lim_byte;
 871   int n = direction;
 872
 873   if (!NILP (count))
 874     {
 875       CHECK_NUMBER (count);
 876       n *= XINT (count);
 877     }
 878
 879   CHECK_STRING (string);
 880   if (NILP (bound))
 881     {
 882       if (n > 0)
 883         lim = ZV, lim_byte = ZV_BYTE;
 884       else
 885         lim = BEGV, lim_byte = BEGV_BYTE;
 886     }
 887   else
 888     {
 889       CHECK_NUMBER_COERCE_MARKER (bound);
 890       lim = XINT (bound);
 891       if (n > 0 ? lim < PT : lim > PT)
 892         error ("Invalid search bound (wrong side of point)");
 893       if (lim > ZV)
 894         lim = ZV, lim_byte = ZV_BYTE;
 895       else if (lim < BEGV)
 896         lim = BEGV, lim_byte = BEGV_BYTE;
 897       else
 898         lim_byte = CHAR_TO_BYTE (lim);
 899     }
 900
 901   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 902                       (!NILP (current_buffer->case_fold_search)
 903                        ? current_buffer->case_canon_table
 904                        : Qnil),
 905                       (!NILP (current_buffer->case_fold_search)
 906                        ? current_buffer->case_eqv_table
 907                        : Qnil),
 908                       posix);
 909   if (np <= 0)
 910     {
 911       if (NILP (noerror))
 912         return signal_failure (string);
 913       if (!EQ (noerror, Qt))
 914         {
 915           if (lim < BEGV || lim > ZV)
 916             abort ();
 917           SET_PT_BOTH (lim, lim_byte);
 918           return Qnil;
 919 #if 0 /* This would be clean, but maybe programs depend on
 920          a value of nil here.  */
 921           np = lim;
 922 #endif
 923         }
 924       else
 925         return Qnil;
 926     }
 927
 928   if (np < BEGV || np > ZV)
 929     abort ();
 930
 931   SET_PT (np);
 932
 933   return make_number (np);
 934 }
 935 \f
 936 /* Return 1 if REGEXP it matches just one constant string.  */
 937
 938 static int
 939 trivial_regexp_p (regexp)
 940      Lisp_Object regexp;
 941 {
 942   int len = SBYTES (regexp);
 943   unsigned char *s = SDATA (regexp);
 944   while (--len >= 0)
 945     {
 946       switch (*s++)
 947         {
 948         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 949           return 0;
 950         case '\\':
 951           if (--len < 0)
 952             return 0;
 953           switch (*s++)
 954             {
 955             case '|': case '(': case ')': case '`': case '\'': case 'b':
 956             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 957             case 'S': case '=': case '{': case '}':
 958             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 959             case '1': case '2': case '3': case '4': case '5':
 960             case '6': case '7': case '8': case '9':
 961               return 0;
 962             }
 963         }
 964     }
 965   return 1;
 966 }
 967
 968 /* Search for the n'th occurrence of STRING in the current buffer,
 969    starting at position POS and stopping at position LIM,
 970    treating STRING as a literal string if RE is false or as
 971    a regular expression if RE is true.
 972
 973    If N is positive, searching is forward and LIM must be greater than POS.
 974    If N is negative, searching is backward and LIM must be less than POS.
 975
 976    Returns -x if x occurrences remain to be found (x > 0),
 977    or else the position at the beginning of the Nth occurrence
 978    (if searching backward) or the end (if searching forward).
 979
 980    POSIX is nonzero if we want full backtracking (POSIX style)
 981    for this pattern.  0 means backtrack only enough to get a valid match.  */
 982
 983 #define TRANSLATE(out, trt, d)                  \
 984 do                                              \
 985   {                                             \
 986     if (! NILP (trt))                           \
 987       {                                         \
 988         Lisp_Object temp;                       \
 989         temp = Faref (trt, make_number (d));    \
 990         if (INTEGERP (temp))                    \
 991           out = XINT (temp);                    \
 992         else                                    \
 993           out = d;                              \
 994       }                                         \
 995     else                                        \
 996       out = d;                                  \
 997   }                                             \
 998 while (0)
 999
1000 static int
1001 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1002                RE, trt, inverse_trt, posix)
1003      Lisp_Object string;
1004      int pos;
1005      int pos_byte;
1006      int lim;
1007      int lim_byte;
1008      int n;
1009      int RE;
1010      Lisp_Object trt;
1011      Lisp_Object inverse_trt;
1012      int posix;
1013 {
1014   int len = SCHARS (string);
1015   int len_byte = SBYTES (string);
1016   register int i;
1017
1018   if (running_asynch_code)
1019     save_search_regs ();
1020
1021   /* Searching 0 times means don't move.  */
1022   /* Null string is found at starting position.  */
1023   if (len == 0 || n == 0)
1024     {
1025       set_search_regs (pos_byte, 0);
1026       return pos;
1027     }
1028
1029   if (RE && !trivial_regexp_p (string))
1030     {
1031       unsigned char *p1, *p2;
1032       int s1, s2;
1033       struct re_pattern_buffer *bufp;
1034
1035       bufp = compile_pattern (string, &search_regs, trt, posix,
1036                               !NILP (current_buffer->enable_multibyte_characters));
1037
1038       immediate_quit = 1;       /* Quit immediately if user types ^G,
1039                                    because letting this function finish
1040                                    can take too long. */
1041       QUIT;                     /* Do a pending quit right away,
1042                                    to avoid paradoxical behavior */
1043       /* Get pointers and sizes of the two strings
1044          that make up the visible portion of the buffer. */
1045
1046       p1 = BEGV_ADDR;
1047       s1 = GPT_BYTE - BEGV_BYTE;
1048       p2 = GAP_END_ADDR;
1049       s2 = ZV_BYTE - GPT_BYTE;
1050       if (s1 < 0)
1051         {
1052           p2 = p1;
1053           s2 = ZV_BYTE - BEGV_BYTE;
1054           s1 = 0;
1055         }
1056       if (s2 < 0)
1057         {
1058           s1 = ZV_BYTE - BEGV_BYTE;
1059           s2 = 0;
1060         }
1061       re_match_object = Qnil;
1062
1063       while (n < 0)
1064         {
1065           int val;
1066           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1067                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1068                              &search_regs,
1069                              /* Don't allow match past current point */
1070                              pos_byte - BEGV_BYTE);
1071           if (val == -2)
1072             {
1073               matcher_overflow ();
1074             }
1075           if (val >= 0)
1076             {
1077               pos_byte = search_regs.start[0] + BEGV_BYTE;
1078               for (i = 0; i < search_regs.num_regs; i++)
1079                 if (search_regs.start[i] >= 0)
1080                   {
1081                     search_regs.start[i]
1082                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1083                     search_regs.end[i]
1084                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1085                   }
1086               XSETBUFFER (last_thing_searched, current_buffer);
1087               /* Set pos to the new position. */
1088               pos = search_regs.start[0];
1089             }
1090           else
1091             {
1092               immediate_quit = 0;
1093               return (n);
1094             }
1095           n++;
1096         }
1097       while (n > 0)
1098         {
1099           int val;
1100           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1101                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1102                              &search_regs,
1103                              lim_byte - BEGV_BYTE);
1104           if (val == -2)
1105             {
1106               matcher_overflow ();
1107             }
1108           if (val >= 0)
1109             {
1110               pos_byte = search_regs.end[0] + BEGV_BYTE;
1111               for (i = 0; i < search_regs.num_regs; i++)
1112                 if (search_regs.start[i] >= 0)
1113                   {
1114                     search_regs.start[i]
1115                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1116                     search_regs.end[i]
1117                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1118                   }
1119               XSETBUFFER (last_thing_searched, current_buffer);
1120               pos = search_regs.end[0];
1121             }
1122           else
1123             {
1124               immediate_quit = 0;
1125               return (0 - n);
1126             }
1127           n--;
1128         }
1129       immediate_quit = 0;
1130       return (pos);
1131     }
1132   else                          /* non-RE case */
1133     {
1134       unsigned char *raw_pattern, *pat;
1135       int raw_pattern_size;
1136       int raw_pattern_size_byte;
1137       unsigned char *patbuf;
1138       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1139       unsigned char *base_pat = SDATA (string);
1140       int charset_base = -1;
1141       int boyer_moore_ok = 1;
1142
1143       /* MULTIBYTE says whether the text to be searched is multibyte.
1144          We must convert PATTERN to match that, or we will not really
1145          find things right.  */
1146
1147       if (multibyte == STRING_MULTIBYTE (string))
1148         {
1149           raw_pattern = (unsigned char *) SDATA (string);
1150           raw_pattern_size = SCHARS (string);
1151           raw_pattern_size_byte = SBYTES (string);
1152         }
1153       else if (multibyte)
1154         {
1155           raw_pattern_size = SCHARS (string);
1156           raw_pattern_size_byte
1157             = count_size_as_multibyte (SDATA (string),
1158                                        raw_pattern_size);
1159           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1160           copy_text (SDATA (string), raw_pattern,
1161                      SCHARS (string), 0, 1);
1162         }
1163       else
1164         {
1165           /* Converting multibyte to single-byte.
1166
1167              ??? Perhaps this conversion should be done in a special way
1168              by subtracting nonascii-insert-offset from each non-ASCII char,
1169              so that only the multibyte chars which really correspond to
1170              the chosen single-byte character set can possibly match.  */
1171           raw_pattern_size = SCHARS (string);
1172           raw_pattern_size_byte = SCHARS (string);
1173           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1174           copy_text (SDATA (string), raw_pattern,
1175                      SBYTES (string), 1, 0);
1176         }
1177
1178       /* Copy and optionally translate the pattern.  */
1179       len = raw_pattern_size;
1180       len_byte = raw_pattern_size_byte;
1181       patbuf = (unsigned char *) alloca (len_byte);
1182       pat = patbuf;
1183       base_pat = raw_pattern;
1184       if (multibyte)
1185         {
1186           while (--len >= 0)
1187             {
1188               unsigned char str[MAX_MULTIBYTE_LENGTH];
1189               int c, translated, inverse;
1190               int in_charlen, charlen;
1191
1192               /* If we got here and the RE flag is set, it's because we're
1193                  dealing with a regexp known to be trivial, so the backslash
1194                  just quotes the next character.  */
1195               if (RE && *base_pat == '\\')
1196                 {
1197                   len--;
1198                   len_byte--;
1199                   base_pat++;
1200                 }
1201
1202               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1203
1204               /* Translate the character, if requested.  */
1205               TRANSLATE (translated, trt, c);
1206               /* If translation changed the byte-length, go back
1207                  to the original character.  */
1208               charlen = CHAR_STRING (translated, str);
1209               if (in_charlen != charlen)
1210                 {
1211                   translated = c;
1212                   charlen = CHAR_STRING (c, str);
1213                 }
1214
1215               /* If we are searching for something strange,
1216                  an invalid multibyte code, don't use boyer-moore.  */
1217               if (! ASCII_BYTE_P (translated)
1218                   && (charlen == 1 /* 8bit code */
1219                       || charlen != in_charlen /* invalid multibyte code */
1220                       ))
1221                 boyer_moore_ok = 0;
1222
1223               TRANSLATE (inverse, inverse_trt, c);
1224
1225               /* Did this char actually get translated?
1226                  Would any other char get translated into it?  */
1227               if (translated != c || inverse != c)
1228                 {
1229                   /* Keep track of which character set row
1230                      contains the characters that need translation.  */
1231                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1232                   int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1233
1234                   if (charset_base_code != inverse_charset_base)
1235                     boyer_moore_ok = 0;
1236                   else if (charset_base == -1)
1237                     charset_base = charset_base_code;
1238                   else if (charset_base != charset_base_code)
1239                     /* If two different rows appear, needing translation,
1240                        then we cannot use boyer_moore search.  */
1241                     boyer_moore_ok = 0;
1242                 }
1243
1244               /* Store this character into the translated pattern.  */
1245               bcopy (str, pat, charlen);
1246               pat += charlen;
1247               base_pat += in_charlen;
1248               len_byte -= in_charlen;
1249             }
1250         }
1251       else
1252         {
1253           /* Unibyte buffer.  */
1254           charset_base = 0;
1255           while (--len >= 0)
1256             {
1257               int c, translated;
1258
1259               /* If we got here and the RE flag is set, it's because we're
1260                  dealing with a regexp known to be trivial, so the backslash
1261                  just quotes the next character.  */
1262               if (RE && *base_pat == '\\')
1263                 {
1264                   len--;
1265                   base_pat++;
1266                 }
1267               c = *base_pat++;
1268               TRANSLATE (translated, trt, c);
1269               *pat++ = translated;
1270             }
1271         }
1272
1273       len_byte = pat - patbuf;
1274       len = raw_pattern_size;
1275       pat = base_pat = patbuf;
1276
1277       if (boyer_moore_ok)
1278         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1279                             pos, pos_byte, lim, lim_byte,
1280                             charset_base);
1281       else
1282         return simple_search (n, pat, len, len_byte, trt,
1283                               pos, pos_byte, lim, lim_byte);
1284     }
1285 }
1286 \f
1287 /* Do a simple string search N times for the string PAT,
1288    whose length is LEN/LEN_BYTE,
1289    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1290    TRT is the translation table.
1291
1292    Return the character position where the match is found.
1293    Otherwise, if M matches remained to be found, return -M.
1294
1295    This kind of search works regardless of what is in PAT and
1296    regardless of what is in TRT.  It is used in cases where
1297    boyer_moore cannot work.  */
1298
1299 static int
1300 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1301      int n;
1302      unsigned char *pat;
1303      int len, len_byte;
1304      Lisp_Object trt;
1305      int pos, pos_byte;
1306      int lim, lim_byte;
1307 {
1308   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1309   int forward = n > 0;
1310
1311   if (lim > pos && multibyte)
1312     while (n > 0)
1313       {
1314         while (1)
1315           {
1316             /* Try matching at position POS.  */
1317             int this_pos = pos;
1318             int this_pos_byte = pos_byte;
1319             int this_len = len;
1320             int this_len_byte = len_byte;
1321             unsigned char *p = pat;
1322             if (pos + len > lim)
1323               goto stop;
1324
1325             while (this_len > 0)
1326               {
1327                 int charlen, buf_charlen;
1328                 int pat_ch, buf_ch;
1329
1330                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1331                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1332                                                  ZV_BYTE - this_pos_byte,
1333                                                  buf_charlen);
1334                 TRANSLATE (buf_ch, trt, buf_ch);
1335
1336                 if (buf_ch != pat_ch)
1337                   break;
1338
1339                 this_len_byte -= charlen;
1340                 this_len--;
1341                 p += charlen;
1342
1343                 this_pos_byte += buf_charlen;
1344                 this_pos++;
1345               }
1346
1347             if (this_len == 0)
1348               {
1349                 pos += len;
1350                 pos_byte += len_byte;
1351                 break;
1352               }
1353
1354             INC_BOTH (pos, pos_byte);
1355           }
1356
1357         n--;
1358       }
1359   else if (lim > pos)
1360     while (n > 0)
1361       {
1362         while (1)
1363           {
1364             /* Try matching at position POS.  */
1365             int this_pos = pos;
1366             int this_len = len;
1367             unsigned char *p = pat;
1368
1369             if (pos + len > lim)
1370               goto stop;
1371
1372             while (this_len > 0)
1373               {
1374                 int pat_ch = *p++;
1375                 int buf_ch = FETCH_BYTE (this_pos);
1376                 TRANSLATE (buf_ch, trt, buf_ch);
1377
1378                 if (buf_ch != pat_ch)
1379                   break;
1380
1381                 this_len--;
1382                 this_pos++;
1383               }
1384
1385             if (this_len == 0)
1386               {
1387                 pos += len;
1388                 break;
1389               }
1390
1391             pos++;
1392           }
1393
1394         n--;
1395       }
1396   /* Backwards search.  */
1397   else if (lim < pos && multibyte)
1398     while (n < 0)
1399       {
1400         while (1)
1401           {
1402             /* Try matching at position POS.  */
1403             int this_pos = pos - len;
1404             int this_pos_byte = pos_byte - len_byte;
1405             int this_len = len;
1406             int this_len_byte = len_byte;
1407             unsigned char *p = pat;
1408
1409             if (pos - len < lim)
1410               goto stop;
1411
1412             while (this_len > 0)
1413               {
1414                 int charlen, buf_charlen;
1415                 int pat_ch, buf_ch;
1416
1417                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1418                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1419                                                  ZV_BYTE - this_pos_byte,
1420                                                  buf_charlen);
1421                 TRANSLATE (buf_ch, trt, buf_ch);
1422
1423                 if (buf_ch != pat_ch)
1424                   break;
1425
1426                 this_len_byte -= charlen;
1427                 this_len--;
1428                 p += charlen;
1429                 this_pos_byte += buf_charlen;
1430                 this_pos++;
1431               }
1432
1433             if (this_len == 0)
1434               {
1435                 pos -= len;
1436                 pos_byte -= len_byte;
1437                 break;
1438               }
1439
1440             DEC_BOTH (pos, pos_byte);
1441           }
1442
1443         n++;
1444       }
1445   else if (lim < pos)
1446     while (n < 0)
1447       {
1448         while (1)
1449           {
1450             /* Try matching at position POS.  */
1451             int this_pos = pos - len;
1452             int this_len = len;
1453             unsigned char *p = pat;
1454
1455             if (pos - len < lim)
1456               goto stop;
1457
1458             while (this_len > 0)
1459               {
1460                 int pat_ch = *p++;
1461                 int buf_ch = FETCH_BYTE (this_pos);
1462                 TRANSLATE (buf_ch, trt, buf_ch);
1463
1464                 if (buf_ch != pat_ch)
1465                   break;
1466                 this_len--;
1467                 this_pos++;
1468               }
1469
1470             if (this_len == 0)
1471               {
1472                 pos -= len;
1473                 break;
1474               }
1475
1476             pos--;
1477           }
1478
1479         n++;
1480       }
1481
1482  stop:
1483   if (n == 0)
1484     {
1485       if (forward)
1486         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1487       else
1488         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1489
1490       return pos;
1491     }
1492   else if (n > 0)
1493     return -n;
1494   else
1495     return n;
1496 }
1497 \f
1498 /* Do Boyer-Moore search N times for the string PAT,
1499    whose length is LEN/LEN_BYTE,
1500    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1501    DIRECTION says which direction we search in.
1502    TRT and INVERSE_TRT are translation tables.
1503
1504    This kind of search works if all the characters in PAT that have
1505    nontrivial translation are the same aside from the last byte.  This
1506    makes it possible to translate just the last byte of a character,
1507    and do so after just a simple test of the context.
1508
1509    If that criterion is not satisfied, do not call this function.  */
1510
1511 static int
1512 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1513              pos, pos_byte, lim, lim_byte, charset_base)
1514      int n;
1515      unsigned char *base_pat;
1516      int len, len_byte;
1517      Lisp_Object trt;
1518      Lisp_Object inverse_trt;
1519      int pos, pos_byte;
1520      int lim, lim_byte;
1521      int charset_base;
1522 {
1523   int direction = ((n > 0) ? 1 : -1);
1524   register int dirlen;
1525   int infinity, limit, stride_for_teases = 0;
1526   register int *BM_tab;
1527   int *BM_tab_base;
1528   register unsigned char *cursor, *p_limit;
1529   register int i, j;
1530   unsigned char *pat, *pat_end;
1531   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1532
1533   unsigned char simple_translate[0400];
1534   int translate_prev_byte = 0;
1535   int translate_anteprev_byte = 0;
1536
1537 #ifdef C_ALLOCA
1538   int BM_tab_space[0400];
1539   BM_tab = &BM_tab_space[0];
1540 #else
1541   BM_tab = (int *) alloca (0400 * sizeof (int));
1542 #endif
1543   /* The general approach is that we are going to maintain that we know */
1544   /* the first (closest to the present position, in whatever direction */
1545   /* we're searching) character that could possibly be the last */
1546   /* (furthest from present position) character of a valid match.  We */
1547   /* advance the state of our knowledge by looking at that character */
1548   /* and seeing whether it indeed matches the last character of the */
1549   /* pattern.  If it does, we take a closer look.  If it does not, we */
1550   /* move our pointer (to putative last characters) as far as is */
1551   /* logically possible.  This amount of movement, which I call a */
1552   /* stride, will be the length of the pattern if the actual character */
1553   /* appears nowhere in the pattern, otherwise it will be the distance */
1554   /* from the last occurrence of that character to the end of the */
1555   /* pattern. */
1556   /* As a coding trick, an enormous stride is coded into the table for */
1557   /* characters that match the last character.  This allows use of only */
1558   /* a single test, a test for having gone past the end of the */
1559   /* permissible match region, to test for both possible matches (when */
1560   /* the stride goes past the end immediately) and failure to */
1561   /* match (where you get nudged past the end one stride at a time). */
1562
1563   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1564   /* is determined only by the last character of the putative match. */
1565   /* If that character does not match, we will stride the proper */
1566   /* distance to propose a match that superimposes it on the last */
1567   /* instance of a character that matches it (per trt), or misses */
1568   /* it entirely if there is none. */
1569
1570   dirlen = len_byte * direction;
1571   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1572
1573   /* Record position after the end of the pattern.  */
1574   pat_end = base_pat + len_byte;
1575   /* BASE_PAT points to a character that we start scanning from.
1576      It is the first character in a forward search,
1577      the last character in a backward search.  */
1578   if (direction < 0)
1579     base_pat = pat_end - 1;
1580
1581   BM_tab_base = BM_tab;
1582   BM_tab += 0400;
1583   j = dirlen;           /* to get it in a register */
1584   /* A character that does not appear in the pattern induces a */
1585   /* stride equal to the pattern length. */
1586   while (BM_tab_base != BM_tab)
1587     {
1588       *--BM_tab = j;
1589       *--BM_tab = j;
1590       *--BM_tab = j;
1591       *--BM_tab = j;
1592     }
1593
1594   /* We use this for translation, instead of TRT itself.
1595      We fill this in to handle the characters that actually
1596      occur in the pattern.  Others don't matter anyway!  */
1597   bzero (simple_translate, sizeof simple_translate);
1598   for (i = 0; i < 0400; i++)
1599     simple_translate[i] = i;
1600
1601   i = 0;
1602   while (i != infinity)
1603     {
1604       unsigned char *ptr = base_pat + i;
1605       i += direction;
1606       if (i == dirlen)
1607         i = infinity;
1608       if (! NILP (trt))
1609         {
1610           int ch;
1611           int untranslated;
1612           int this_translated = 1;
1613
1614           if (multibyte
1615               /* Is *PTR the last byte of a character?  */
1616               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1617             {
1618               unsigned char *charstart = ptr;
1619               while (! CHAR_HEAD_P (*charstart))
1620                 charstart--;
1621               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1622               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1623                 {
1624                   TRANSLATE (ch, trt, untranslated);
1625                   if (! CHAR_HEAD_P (*ptr))
1626                     {
1627                       translate_prev_byte = ptr[-1];
1628                       if (! CHAR_HEAD_P (translate_prev_byte))
1629                         translate_anteprev_byte = ptr[-2];
1630                     }
1631                 }
1632               else
1633                 {
1634                   this_translated = 0;
1635                   ch = *ptr;
1636                 }
1637             }
1638           else if (!multibyte)
1639             TRANSLATE (ch, trt, *ptr);
1640           else
1641             {
1642               ch = *ptr;
1643               this_translated = 0;
1644             }
1645
1646           if (ch > 0400)
1647             j = ((unsigned char) ch) | 0200;
1648           else
1649             j = (unsigned char) ch;
1650
1651           if (i == infinity)
1652             stride_for_teases = BM_tab[j];
1653
1654           BM_tab[j] = dirlen - i;
1655           /* A translation table is accompanied by its inverse -- see */
1656           /* comment following downcase_table for details */
1657           if (this_translated)
1658             {
1659               int starting_ch = ch;
1660               int starting_j = j;
1661               while (1)
1662                 {
1663                   TRANSLATE (ch, inverse_trt, ch);
1664                   if (ch > 0400)
1665                     j = ((unsigned char) ch) | 0200;
1666                   else
1667                     j = (unsigned char) ch;
1668
1669                   /* For all the characters that map into CH,
1670                      set up simple_translate to map the last byte
1671                      into STARTING_J.  */
1672                   simple_translate[j] = starting_j;
1673                   if (ch == starting_ch)
1674                     break;
1675                   BM_tab[j] = dirlen - i;
1676                 }
1677             }
1678         }
1679       else
1680         {
1681           j = *ptr;
1682
1683           if (i == infinity)
1684             stride_for_teases = BM_tab[j];
1685           BM_tab[j] = dirlen - i;
1686         }
1687       /* stride_for_teases tells how much to stride if we get a */
1688       /* match on the far character but are subsequently */
1689       /* disappointed, by recording what the stride would have been */
1690       /* for that character if the last character had been */
1691       /* different. */
1692     }
1693   infinity = dirlen - infinity;
1694   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1695   /* loop invariant - POS_BYTE points at where last char (first
1696      char if reverse) of pattern would align in a possible match.  */
1697   while (n != 0)
1698     {
1699       int tail_end;
1700       unsigned char *tail_end_ptr;
1701
1702       /* It's been reported that some (broken) compiler thinks that
1703          Boolean expressions in an arithmetic context are unsigned.
1704          Using an explicit ?1:0 prevents this.  */
1705       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1706           < 0)
1707         return (n * (0 - direction));
1708       /* First we do the part we can by pointers (maybe nothing) */
1709       QUIT;
1710       pat = base_pat;
1711       limit = pos_byte - dirlen + direction;
1712       if (direction > 0)
1713         {
1714           limit = BUFFER_CEILING_OF (limit);
1715           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1716              can take on without hitting edge of buffer or the gap.  */
1717           limit = min (limit, pos_byte + 20000);
1718           limit = min (limit, lim_byte - 1);
1719         }
1720       else
1721         {
1722           limit = BUFFER_FLOOR_OF (limit);
1723           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1724              can take on without hitting edge of buffer or the gap.  */
1725           limit = max (limit, pos_byte - 20000);
1726           limit = max (limit, lim_byte);
1727         }
1728       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1729       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1730
1731       if ((limit - pos_byte) * direction > 20)
1732         {
1733           unsigned char *p2;
1734
1735           p_limit = BYTE_POS_ADDR (limit);
1736           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1737           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1738           while (1)             /* use one cursor setting as long as i can */
1739             {
1740               if (direction > 0) /* worth duplicating */
1741                 {
1742                   /* Use signed comparison if appropriate
1743                      to make cursor+infinity sure to be > p_limit.
1744                      Assuming that the buffer lies in a range of addresses
1745                      that are all "positive" (as ints) or all "negative",
1746                      either kind of comparison will work as long
1747                      as we don't step by infinity.  So pick the kind
1748                      that works when we do step by infinity.  */
1749                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1750                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1751                       cursor += BM_tab[*cursor];
1752                   else
1753                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1754                       cursor += BM_tab[*cursor];
1755                 }
1756               else
1757                 {
1758                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1759                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1760                       cursor += BM_tab[*cursor];
1761                   else
1762                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1763                       cursor += BM_tab[*cursor];
1764                 }
1765 /* If you are here, cursor is beyond the end of the searched region. */
1766 /* This can happen if you match on the far character of the pattern, */
1767 /* because the "stride" of that character is infinity, a number able */
1768 /* to throw you well beyond the end of the search.  It can also */
1769 /* happen if you fail to match within the permitted region and would */
1770 /* otherwise try a character beyond that region */
1771               if ((cursor - p_limit) * direction <= len_byte)
1772                 break;  /* a small overrun is genuine */
1773               cursor -= infinity; /* large overrun = hit */
1774               i = dirlen - direction;
1775               if (! NILP (trt))
1776                 {
1777                   while ((i -= direction) + direction != 0)
1778                     {
1779                       int ch;
1780                       cursor -= direction;
1781                       /* Translate only the last byte of a character.  */
1782                       if (! multibyte
1783                           || ((cursor == tail_end_ptr
1784                                || CHAR_HEAD_P (cursor[1]))
1785                               && (CHAR_HEAD_P (cursor[0])
1786                                   || (translate_prev_byte == cursor[-1]
1787                                       && (CHAR_HEAD_P (translate_prev_byte)
1788                                           || translate_anteprev_byte == cursor[-2])))))
1789                         ch = simple_translate[*cursor];
1790                       else
1791                         ch = *cursor;
1792                       if (pat[i] != ch)
1793                         break;
1794                     }
1795                 }
1796               else
1797                 {
1798                   while ((i -= direction) + direction != 0)
1799                     {
1800                       cursor -= direction;
1801                       if (pat[i] != *cursor)
1802                         break;
1803                     }
1804                 }
1805               cursor += dirlen - i - direction; /* fix cursor */
1806               if (i + direction == 0)
1807                 {
1808                   int position;
1809
1810                   cursor -= direction;
1811
1812                   position = pos_byte + cursor - p2 + ((direction > 0)
1813                                                        ? 1 - len_byte : 0);
1814                   set_search_regs (position, len_byte);
1815
1816                   if ((n -= direction) != 0)
1817                     cursor += dirlen; /* to resume search */
1818                   else
1819                     return ((direction > 0)
1820                             ? search_regs.end[0] : search_regs.start[0]);
1821                 }
1822               else
1823                 cursor += stride_for_teases; /* <sigh> we lose -  */
1824             }
1825           pos_byte += cursor - p2;
1826         }
1827       else
1828         /* Now we'll pick up a clump that has to be done the hard */
1829         /* way because it covers a discontinuity */
1830         {
1831           limit = ((direction > 0)
1832                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1833                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1834           limit = ((direction > 0)
1835                    ? min (limit + len_byte, lim_byte - 1)
1836                    : max (limit - len_byte, lim_byte));
1837           /* LIMIT is now the last value POS_BYTE can have
1838              and still be valid for a possible match.  */
1839           while (1)
1840             {
1841               /* This loop can be coded for space rather than */
1842               /* speed because it will usually run only once. */
1843               /* (the reach is at most len + 21, and typically */
1844               /* does not exceed len) */
1845               while ((limit - pos_byte) * direction >= 0)
1846                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1847               /* now run the same tests to distinguish going off the */
1848               /* end, a match or a phony match. */
1849               if ((pos_byte - limit) * direction <= len_byte)
1850                 break;  /* ran off the end */
1851               /* Found what might be a match.
1852                  Set POS_BYTE back to last (first if reverse) pos.  */
1853               pos_byte -= infinity;
1854               i = dirlen - direction;
1855               while ((i -= direction) + direction != 0)
1856                 {
1857                   int ch;
1858                   unsigned char *ptr;
1859                   pos_byte -= direction;
1860                   ptr = BYTE_POS_ADDR (pos_byte);
1861                   /* Translate only the last byte of a character.  */
1862                   if (! multibyte
1863                       || ((ptr == tail_end_ptr
1864                            || CHAR_HEAD_P (ptr[1]))
1865                           && (CHAR_HEAD_P (ptr[0])
1866                               || (translate_prev_byte == ptr[-1]
1867                                   && (CHAR_HEAD_P (translate_prev_byte)
1868                                       || translate_anteprev_byte == ptr[-2])))))
1869                     ch = simple_translate[*ptr];
1870                   else
1871                     ch = *ptr;
1872                   if (pat[i] != ch)
1873                     break;
1874                 }
1875               /* Above loop has moved POS_BYTE part or all the way
1876                  back to the first pos (last pos if reverse).
1877                  Set it once again at the last (first if reverse) char.  */
1878               pos_byte += dirlen - i- direction;
1879               if (i + direction == 0)
1880                 {
1881                   int position;
1882                   pos_byte -= direction;
1883
1884                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1885
1886                   set_search_regs (position, len_byte);
1887
1888                   if ((n -= direction) != 0)
1889                     pos_byte += dirlen; /* to resume search */
1890                   else
1891                     return ((direction > 0)
1892                             ? search_regs.end[0] : search_regs.start[0]);
1893                 }
1894               else
1895                 pos_byte += stride_for_teases;
1896             }
1897           }
1898       /* We have done one clump.  Can we continue? */
1899       if ((lim_byte - pos_byte) * direction < 0)
1900         return ((0 - n) * direction);
1901     }
1902   return BYTE_TO_CHAR (pos_byte);
1903 }
1904
1905 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1906    for the overall match just found in the current buffer.
1907    Also clear out the match data for registers 1 and up.  */
1908
1909 static void
1910 set_search_regs (beg_byte, nbytes)
1911      int beg_byte, nbytes;
1912 {
1913   int i;
1914
1915   /* Make sure we have registers in which to store
1916      the match position.  */
1917   if (search_regs.num_regs == 0)
1918     {
1919       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1920       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1921       search_regs.num_regs = 2;
1922     }
1923
1924   /* Clear out the other registers.  */
1925   for (i = 1; i < search_regs.num_regs; i++)
1926     {
1927       search_regs.start[i] = -1;
1928       search_regs.end[i] = -1;
1929     }
1930
1931   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1932   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1933   XSETBUFFER (last_thing_searched, current_buffer);
1934 }
1935 \f
1936 /* Given a string of words separated by word delimiters,
1937   compute a regexp that matches those exact words
1938   separated by arbitrary punctuation.  */
1939
1940 static Lisp_Object
1941 wordify (string)
1942      Lisp_Object string;
1943 {
1944   register unsigned char *p, *o;
1945   register int i, i_byte, len, punct_count = 0, word_count = 0;
1946   Lisp_Object val;
1947   int prev_c = 0;
1948   int adjust;
1949
1950   CHECK_STRING (string);
1951   p = SDATA (string);
1952   len = SCHARS (string);
1953
1954   for (i = 0, i_byte = 0; i < len; )
1955     {
1956       int c;
1957
1958       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1959
1960       if (SYNTAX (c) != Sword)
1961         {
1962           punct_count++;
1963           if (i > 0 && SYNTAX (prev_c) == Sword)
1964             word_count++;
1965         }
1966
1967       prev_c = c;
1968     }
1969
1970   if (SYNTAX (prev_c) == Sword)
1971     word_count++;
1972   if (!word_count)
1973     return empty_string;
1974
1975   adjust = - punct_count + 5 * (word_count - 1) + 4;
1976   if (STRING_MULTIBYTE (string))
1977     val = make_uninit_multibyte_string (len + adjust,
1978                                         SBYTES (string)
1979                                         + adjust);
1980   else
1981     val = make_uninit_string (len + adjust);
1982
1983   o = SDATA (val);
1984   *o++ = '\\';
1985   *o++ = 'b';
1986   prev_c = 0;
1987
1988   for (i = 0, i_byte = 0; i < len; )
1989     {
1990       int c;
1991       int i_byte_orig = i_byte;
1992
1993       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1994
1995       if (SYNTAX (c) == Sword)
1996         {
1997           bcopy (SDATA (string) + i_byte_orig, o,
1998                  i_byte - i_byte_orig);
1999           o += i_byte - i_byte_orig;
2000         }
2001       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2002         {
2003           *o++ = '\\';
2004           *o++ = 'W';
2005           *o++ = '\\';
2006           *o++ = 'W';
2007           *o++ = '*';
2008         }
2009
2010       prev_c = c;
2011     }
2012
2013   *o++ = '\\';
2014   *o++ = 'b';
2015
2016   return val;
2017 }
2018 \f
2019 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2020        "MSearch backward: ",
2021        doc: /* Search backward from point for STRING.
2022 Set point to the beginning of the occurrence found, and return point.
2023 An optional second argument bounds the search; it is a buffer position.
2024 The match found must not extend before that position.
2025 Optional third argument, if t, means if fail just return nil (no error).
2026  If not nil and not t, position at limit of search and return nil.
2027 Optional fourth argument is repeat count--search for successive occurrences.
2028
2029 Search case-sensitivity is determined by the value of the variable
2030 `case-fold-search', which see.
2031
2032 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2033      (string, bound, noerror, count)
2034      Lisp_Object string, bound, noerror, count;
2035 {
2036   return search_command (string, bound, noerror, count, -1, 0, 0);
2037 }
2038
2039 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2040        doc: /* Search forward from point for STRING.
2041 Set point to the end of the occurrence found, and return point.
2042 An optional second argument bounds the search; it is a buffer position.
2043 The match found must not extend after that position.  nil is equivalent
2044   to (point-max).
2045 Optional third argument, if t, means if fail just return nil (no error).
2046   If not nil and not t, move to limit of search and return nil.
2047 Optional fourth argument is repeat count--search for successive occurrences.
2048
2049 Search case-sensitivity is determined by the value of the variable
2050 `case-fold-search', which see.
2051
2052 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2053      (string, bound, noerror, count)
2054      Lisp_Object string, bound, noerror, count;
2055 {
2056   return search_command (string, bound, noerror, count, 1, 0, 0);
2057 }
2058
2059 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2060        "sWord search backward: ",
2061        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2062 Set point to the beginning of the occurrence found, and return point.
2063 An optional second argument bounds the search; it is a buffer position.
2064 The match found must not extend before that position.
2065 Optional third argument, if t, means if fail just return nil (no error).
2066   If not nil and not t, move to limit of search and return nil.
2067 Optional fourth argument is repeat count--search for successive occurrences.  */)
2068      (string, bound, noerror, count)
2069      Lisp_Object string, bound, noerror, count;
2070 {
2071   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2072 }
2073
2074 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2075        "sWord search: ",
2076        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2077 Set point to the end of the occurrence found, and return point.
2078 An optional second argument bounds the search; it is a buffer position.
2079 The match found must not extend after that position.
2080 Optional third argument, if t, means if fail just return nil (no error).
2081   If not nil and not t, move to limit of search and return nil.
2082 Optional fourth argument is repeat count--search for successive occurrences.  */)
2083      (string, bound, noerror, count)
2084      Lisp_Object string, bound, noerror, count;
2085 {
2086   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2087 }
2088
2089 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2090        "sRE search backward: ",
2091        doc: /* Search backward from point for match for regular expression REGEXP.
2092 Set point to the beginning of the match, and return point.
2093 The match found is the one starting last in the buffer
2094 and yet ending before the origin of the search.
2095 An optional second argument bounds the search; it is a buffer position.
2096 The match found must start at or after that position.
2097 Optional third argument, if t, means if fail just return nil (no error).
2098   If not nil and not t, move to limit of search and return nil.
2099 Optional fourth argument is repeat count--search for successive occurrences.
2100 See also the functions `match-beginning', `match-end', `match-string',
2101 and `replace-match'.  */)
2102      (regexp, bound, noerror, count)
2103      Lisp_Object regexp, bound, noerror, count;
2104 {
2105   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2106 }
2107
2108 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2109        "sRE search: ",
2110        doc: /* Search forward from point for regular expression REGEXP.
2111 Set point to the end of the occurrence found, and return point.
2112 An optional second argument bounds the search; it is a buffer position.
2113 The match found must not extend after that position.
2114 Optional third argument, if t, means if fail just return nil (no error).
2115   If not nil and not t, move to limit of search and return nil.
2116 Optional fourth argument is repeat count--search for successive occurrences.
2117 See also the functions `match-beginning', `match-end', `match-string',
2118 and `replace-match'.  */)
2119      (regexp, bound, noerror, count)
2120      Lisp_Object regexp, bound, noerror, count;
2121 {
2122   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2123 }
2124
2125 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2126        "sPosix search backward: ",
2127        doc: /* Search backward from point for match for regular expression REGEXP.
2128 Find the longest match in accord with Posix regular expression rules.
2129 Set point to the beginning of the match, and return point.
2130 The match found is the one starting last in the buffer
2131 and yet ending before the origin of the search.
2132 An optional second argument bounds the search; it is a buffer position.
2133 The match found must start at or after that position.
2134 Optional third argument, if t, means if fail just return nil (no error).
2135   If not nil and not t, move to limit of search and return nil.
2136 Optional fourth argument is repeat count--search for successive occurrences.
2137 See also the functions `match-beginning', `match-end', `match-string',
2138 and `replace-match'.  */)
2139      (regexp, bound, noerror, count)
2140      Lisp_Object regexp, bound, noerror, count;
2141 {
2142   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2143 }
2144
2145 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2146        "sPosix search: ",
2147        doc: /* Search forward from point for regular expression REGEXP.
2148 Find the longest match in accord with Posix regular expression rules.
2149 Set point to the end of the occurrence found, and return point.
2150 An optional second argument bounds the search; it is a buffer position.
2151 The match found must not extend after that position.
2152 Optional third argument, if t, means if fail just return nil (no error).
2153   If not nil and not t, move to limit of search and return nil.
2154 Optional fourth argument is repeat count--search for successive occurrences.
2155 See also the functions `match-beginning', `match-end', `match-string',
2156 and `replace-match'.  */)
2157      (regexp, bound, noerror, count)
2158      Lisp_Object regexp, bound, noerror, count;
2159 {
2160   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2161 }
2162 \f
2163 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2164        doc: /* Replace text matched by last search with NEWTEXT.
2165 Leave point at the end of the replacement text.
2166
2167 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2168 Otherwise maybe capitalize the whole text, or maybe just word initials,
2169 based on the replaced text.
2170 If the replaced text has only capital letters
2171 and has at least one multiletter word, convert NEWTEXT to all caps.
2172 Otherwise if all words are capitalized in the replaced text,
2173 capitalize each word in NEWTEXT.
2174
2175 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2176 Otherwise treat `\\' as special:
2177   `\\&' in NEWTEXT means substitute original matched text.
2178   `\\N' means substitute what matched the Nth `\\(...\\)'.
2179        If Nth parens didn't match, substitute nothing.
2180   `\\\\' means insert one `\\'.
2181 Case conversion does not apply to these substitutions.
2182
2183 FIXEDCASE and LITERAL are optional arguments.
2184
2185 The optional fourth argument STRING can be a string to modify.
2186 This is meaningful when the previous match was done against STRING,
2187 using `string-match'.  When used this way, `replace-match'
2188 creates and returns a new string made by copying STRING and replacing
2189 the part of STRING that was matched.
2190
2191 The optional fifth argument SUBEXP specifies a subexpression;
2192 it says to replace just that subexpression with NEWTEXT,
2193 rather than replacing the entire matched text.
2194 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2195 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2196 NEWTEXT in place of subexp N.
2197 This is useful only after a regular expression search or match,
2198 since only regular expressions have distinguished subexpressions.  */)
2199      (newtext, fixedcase, literal, string, subexp)
2200      Lisp_Object newtext, fixedcase, literal, string, subexp;
2201 {
2202   enum { nochange, all_caps, cap_initial } case_action;
2203   register int pos, pos_byte;
2204   int some_multiletter_word;
2205   int some_lowercase;
2206   int some_uppercase;
2207   int some_nonuppercase_initial;
2208   register int c, prevc;
2209   int sub;
2210   int opoint, newpoint;
2211
2212   CHECK_STRING (newtext);
2213
2214   if (! NILP (string))
2215     CHECK_STRING (string);
2216
2217   case_action = nochange;       /* We tried an initialization */
2218                                 /* but some C compilers blew it */
2219
2220   if (search_regs.num_regs <= 0)
2221     error ("replace-match called before any match found");
2222
2223   if (NILP (subexp))
2224     sub = 0;
2225   else
2226     {
2227       CHECK_NUMBER (subexp);
2228       sub = XINT (subexp);
2229       if (sub < 0 || sub >= search_regs.num_regs)
2230         args_out_of_range (subexp, make_number (search_regs.num_regs));
2231     }
2232
2233   if (NILP (string))
2234     {
2235       if (search_regs.start[sub] < BEGV
2236           || search_regs.start[sub] > search_regs.end[sub]
2237           || search_regs.end[sub] > ZV)
2238         args_out_of_range (make_number (search_regs.start[sub]),
2239                            make_number (search_regs.end[sub]));
2240     }
2241   else
2242     {
2243       if (search_regs.start[sub] < 0
2244           || search_regs.start[sub] > search_regs.end[sub]
2245           || search_regs.end[sub] > SCHARS (string))
2246         args_out_of_range (make_number (search_regs.start[sub]),
2247                            make_number (search_regs.end[sub]));
2248     }
2249
2250   if (NILP (fixedcase))
2251     {
2252       /* Decide how to casify by examining the matched text. */
2253       int last;
2254
2255       pos = search_regs.start[sub];
2256       last = search_regs.end[sub];
2257
2258       if (NILP (string))
2259         pos_byte = CHAR_TO_BYTE (pos);
2260       else
2261         pos_byte = string_char_to_byte (string, pos);
2262
2263       prevc = '\n';
2264       case_action = all_caps;
2265
2266       /* some_multiletter_word is set nonzero if any original word
2267          is more than one letter long. */
2268       some_multiletter_word = 0;
2269       some_lowercase = 0;
2270       some_nonuppercase_initial = 0;
2271       some_uppercase = 0;
2272
2273       while (pos < last)
2274         {
2275           if (NILP (string))
2276             {
2277               c = FETCH_CHAR (pos_byte);
2278               INC_BOTH (pos, pos_byte);
2279             }
2280           else
2281             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2282
2283           if (LOWERCASEP (c))
2284             {
2285               /* Cannot be all caps if any original char is lower case */
2286
2287               some_lowercase = 1;
2288               if (SYNTAX (prevc) != Sword)
2289                 some_nonuppercase_initial = 1;
2290               else
2291                 some_multiletter_word = 1;
2292             }
2293           else if (!NOCASEP (c))
2294             {
2295               some_uppercase = 1;
2296               if (SYNTAX (prevc) != Sword)
2297                 ;
2298               else
2299                 some_multiletter_word = 1;
2300             }
2301           else
2302             {
2303               /* If the initial is a caseless word constituent,
2304                  treat that like a lowercase initial.  */
2305               if (SYNTAX (prevc) != Sword)
2306                 some_nonuppercase_initial = 1;
2307             }
2308
2309           prevc = c;
2310         }
2311
2312       /* Convert to all caps if the old text is all caps
2313          and has at least one multiletter word.  */
2314       if (! some_lowercase && some_multiletter_word)
2315         case_action = all_caps;
2316       /* Capitalize each word, if the old text has all capitalized words.  */
2317       else if (!some_nonuppercase_initial && some_multiletter_word)
2318         case_action = cap_initial;
2319       else if (!some_nonuppercase_initial && some_uppercase)
2320         /* Should x -> yz, operating on X, give Yz or YZ?
2321            We'll assume the latter.  */
2322         case_action = all_caps;
2323       else
2324         case_action = nochange;
2325     }
2326
2327   /* Do replacement in a string.  */
2328   if (!NILP (string))
2329     {
2330       Lisp_Object before, after;
2331
2332       before = Fsubstring (string, make_number (0),
2333                            make_number (search_regs.start[sub]));
2334       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2335
2336       /* Substitute parts of the match into NEWTEXT
2337          if desired.  */
2338       if (NILP (literal))
2339         {
2340           int lastpos = 0;
2341           int lastpos_byte = 0;
2342           /* We build up the substituted string in ACCUM.  */
2343           Lisp_Object accum;
2344           Lisp_Object middle;
2345           int length = SBYTES (newtext);
2346
2347           accum = Qnil;
2348
2349           for (pos_byte = 0, pos = 0; pos_byte < length;)
2350             {
2351               int substart = -1;
2352               int subend = 0;
2353               int delbackslash = 0;
2354
2355               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2356
2357               if (c == '\\')
2358                 {
2359                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2360
2361                   if (c == '&')
2362                     {
2363                       substart = search_regs.start[sub];
2364                       subend = search_regs.end[sub];
2365                     }
2366                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2367                     {
2368                       if (search_regs.start[c - '0'] >= 0)
2369                         {
2370                           substart = search_regs.start[c - '0'];
2371                           subend = search_regs.end[c - '0'];
2372                         }
2373                     }
2374                   else if (c == '\\')
2375                     delbackslash = 1;
2376                   else
2377                     error ("Invalid use of `\\' in replacement text");
2378                 }
2379               if (substart >= 0)
2380                 {
2381                   if (pos - 2 != lastpos)
2382                     middle = substring_both (newtext, lastpos,
2383                                              lastpos_byte,
2384                                              pos - 2, pos_byte - 2);
2385                   else
2386                     middle = Qnil;
2387                   accum = concat3 (accum, middle,
2388                                    Fsubstring (string,
2389                                                make_number (substart),
2390                                                make_number (subend)));
2391                   lastpos = pos;
2392                   lastpos_byte = pos_byte;
2393                 }
2394               else if (delbackslash)
2395                 {
2396                   middle = substring_both (newtext, lastpos,
2397                                            lastpos_byte,
2398                                            pos - 1, pos_byte - 1);
2399
2400                   accum = concat2 (accum, middle);
2401                   lastpos = pos;
2402                   lastpos_byte = pos_byte;
2403                 }
2404             }
2405
2406           if (pos != lastpos)
2407             middle = substring_both (newtext, lastpos,
2408                                      lastpos_byte,
2409                                      pos, pos_byte);
2410           else
2411             middle = Qnil;
2412
2413           newtext = concat2 (accum, middle);
2414         }
2415
2416       /* Do case substitution in NEWTEXT if desired.  */
2417       if (case_action == all_caps)
2418         newtext = Fupcase (newtext);
2419       else if (case_action == cap_initial)
2420         newtext = Fupcase_initials (newtext);
2421
2422       return concat3 (before, newtext, after);
2423     }
2424
2425   /* Record point, then move (quietly) to the start of the match.  */
2426   if (PT >= search_regs.end[sub])
2427     opoint = PT - ZV;
2428   else if (PT > search_regs.start[sub])
2429     opoint = search_regs.end[sub] - ZV;
2430   else
2431     opoint = PT;
2432
2433   /* If we want non-literal replacement,
2434      perform substitution on the replacement string.  */
2435   if (NILP (literal))
2436     {
2437       int length = SBYTES (newtext);
2438       unsigned char *substed;
2439       int substed_alloc_size, substed_len;
2440       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2441       int str_multibyte = STRING_MULTIBYTE (newtext);
2442       Lisp_Object rev_tbl;
2443       int really_changed = 0;
2444
2445       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2446                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2447                                           make_number (0))
2448                 : Qnil);
2449
2450       substed_alloc_size = length * 2 + 100;
2451       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2452       substed_len = 0;
2453
2454       /* Go thru NEWTEXT, producing the actual text to insert in
2455          SUBSTED while adjusting multibyteness to that of the current
2456          buffer.  */
2457
2458       for (pos_byte = 0, pos = 0; pos_byte < length;)
2459         {
2460           unsigned char str[MAX_MULTIBYTE_LENGTH];
2461           unsigned char *add_stuff = NULL;
2462           int add_len = 0;
2463           int idx = -1;
2464
2465           if (str_multibyte)
2466             {
2467               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2468               if (!buf_multibyte)
2469                 c = multibyte_char_to_unibyte (c, rev_tbl);
2470             }
2471           else
2472             {
2473               /* Note that we don't have to increment POS.  */
2474               c = SREF (newtext, pos_byte++);
2475               if (buf_multibyte)
2476                 c = unibyte_char_to_multibyte (c);
2477             }
2478
2479           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2480              or set IDX to a match index, which means put that part
2481              of the buffer text into SUBSTED.  */
2482
2483           if (c == '\\')
2484             {
2485               really_changed = 1;
2486
2487               if (str_multibyte)
2488                 {
2489                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2490                                                       pos, pos_byte);
2491                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2492                     c = multibyte_char_to_unibyte (c, rev_tbl);
2493                 }
2494               else
2495                 {
2496                   c = SREF (newtext, pos_byte++);
2497                   if (buf_multibyte)
2498                     c = unibyte_char_to_multibyte (c);
2499                 }
2500
2501               if (c == '&')
2502                 idx = sub;
2503               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2504                 {
2505                   if (search_regs.start[c - '0'] >= 1)
2506                     idx = c - '0';
2507                 }
2508               else if (c == '\\')
2509                 add_len = 1, add_stuff = "\\";
2510               else
2511                 {
2512                   xfree (substed);
2513                   error ("Invalid use of `\\' in replacement text");
2514                 }
2515             }
2516           else
2517             {
2518               add_len = CHAR_STRING (c, str);
2519               add_stuff = str;
2520             }
2521
2522           /* If we want to copy part of a previous match,
2523              set up ADD_STUFF and ADD_LEN to point to it.  */
2524           if (idx >= 0)
2525             {
2526               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2527               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2528               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2529                 move_gap (search_regs.start[idx]);
2530               add_stuff = BYTE_POS_ADDR (begbyte);
2531             }
2532
2533           /* Now the stuff we want to add to SUBSTED
2534              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2535
2536           /* Make sure SUBSTED is big enough.  */
2537           if (substed_len + add_len >= substed_alloc_size)
2538             {
2539               substed_alloc_size = substed_len + add_len + 500;
2540               substed = (unsigned char *) xrealloc (substed,
2541                                                     substed_alloc_size + 1);
2542             }
2543
2544           /* Now add to the end of SUBSTED.  */
2545           if (add_stuff)
2546             {
2547               bcopy (add_stuff, substed + substed_len, add_len);
2548               substed_len += add_len;
2549             }
2550         }
2551
2552       if (really_changed)
2553         newtext = make_string (substed, substed_len);
2554
2555       xfree (substed);
2556     }
2557
2558   /* Replace the old text with the new in the cleanest possible way.  */
2559   replace_range (search_regs.start[sub], search_regs.end[sub],
2560                  newtext, 1, 0, 1);
2561   newpoint = search_regs.start[sub] + SCHARS (newtext);
2562
2563   if (case_action == all_caps)
2564     Fupcase_region (make_number (search_regs.start[sub]),
2565                     make_number (newpoint));
2566   else if (case_action == cap_initial)
2567     Fupcase_initials_region (make_number (search_regs.start[sub]),
2568                              make_number (newpoint));
2569
2570   /* Adjust search data for this change.  */
2571   {
2572     int change = newpoint - search_regs.end[sub];
2573     int i;
2574
2575     for (i = 0; i < search_regs.num_regs; i++)
2576       {
2577         if (search_regs.start[i] > newpoint)
2578           search_regs.start[i] += change;
2579         if (search_regs.end[i] > newpoint)
2580           search_regs.end[i] += change;
2581       }
2582   }
2583
2584   /* Put point back where it was in the text.  */
2585   if (opoint <= 0)
2586     TEMP_SET_PT (opoint + ZV);
2587   else
2588     TEMP_SET_PT (opoint);
2589
2590   /* Now move point "officially" to the start of the inserted replacement.  */
2591   move_if_not_intangible (newpoint);
2592
2593   return Qnil;
2594 }
2595 \f
2596 static Lisp_Object
2597 match_limit (num, beginningp)
2598      Lisp_Object num;
2599      int beginningp;
2600 {
2601   register int n;
2602
2603   CHECK_NUMBER (num);
2604   n = XINT (num);
2605   if (n < 0 || n >= search_regs.num_regs)
2606     args_out_of_range (num, make_number (search_regs.num_regs));
2607   if (search_regs.num_regs <= 0
2608       || search_regs.start[n] < 0)
2609     return Qnil;
2610   return (make_number ((beginningp) ? search_regs.start[n]
2611                                     : search_regs.end[n]));
2612 }
2613
2614 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2615        doc: /* Return position of start of text matched by last search.
2616 SUBEXP, a number, specifies which parenthesized expression in the last
2617   regexp.
2618 Value is nil if SUBEXPth pair didn't match, or there were less than
2619   SUBEXP pairs.
2620 Zero means the entire text matched by the whole regexp or whole string.  */)
2621      (subexp)
2622      Lisp_Object subexp;
2623 {
2624   return match_limit (subexp, 1);
2625 }
2626
2627 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2628        doc: /* Return position of end of text matched by last search.
2629 SUBEXP, a number, specifies which parenthesized expression in the last
2630   regexp.
2631 Value is nil if SUBEXPth pair didn't match, or there were less than
2632   SUBEXP pairs.
2633 Zero means the entire text matched by the whole regexp or whole string.  */)
2634      (subexp)
2635      Lisp_Object subexp;
2636 {
2637   return match_limit (subexp, 0);
2638 }
2639
2640 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2641        doc: /* Return a list containing all info on what the last search matched.
2642 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2643 All the elements are markers or nil (nil if the Nth pair didn't match)
2644 if the last match was on a buffer; integers or nil if a string was matched.
2645 Use `store-match-data' to reinstate the data in this list.
2646
2647 If INTEGERS (the optional first argument) is non-nil, always use integers
2648 \(rather than markers) to represent buffer positions.
2649 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2650 to hold all the values, and if INTEGERS is non-nil, no consing is done.  */)
2651      (integers, reuse)
2652      Lisp_Object integers, reuse;
2653 {
2654   Lisp_Object tail, prev;
2655   Lisp_Object *data;
2656   int i, len;
2657
2658   if (NILP (last_thing_searched))
2659     return Qnil;
2660
2661   prev = Qnil;
2662
2663   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2664                                  * sizeof (Lisp_Object));
2665
2666   len = -1;
2667   for (i = 0; i < search_regs.num_regs; i++)
2668     {
2669       int start = search_regs.start[i];
2670       if (start >= 0)
2671         {
2672           if (EQ (last_thing_searched, Qt)
2673               || ! NILP (integers))
2674             {
2675               XSETFASTINT (data[2 * i], start);
2676               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2677             }
2678           else if (BUFFERP (last_thing_searched))
2679             {
2680               data[2 * i] = Fmake_marker ();
2681               Fset_marker (data[2 * i],
2682                            make_number (start),
2683                            last_thing_searched);
2684               data[2 * i + 1] = Fmake_marker ();
2685               Fset_marker (data[2 * i + 1],
2686                            make_number (search_regs.end[i]),
2687                            last_thing_searched);
2688             }
2689           else
2690             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2691             abort ();
2692
2693           len = i;
2694         }
2695       else
2696         data[2 * i] = data [2 * i + 1] = Qnil;
2697     }
2698
2699   /* If REUSE is not usable, cons up the values and return them.  */
2700   if (! CONSP (reuse))
2701     return Flist (2 * len + 2, data);
2702
2703   /* If REUSE is a list, store as many value elements as will fit
2704      into the elements of REUSE.  */
2705   for (i = 0, tail = reuse; CONSP (tail);
2706        i++, tail = XCDR (tail))
2707     {
2708       if (i < 2 * len + 2)
2709         XSETCAR (tail, data[i]);
2710       else
2711         XSETCAR (tail, Qnil);
2712       prev = tail;
2713     }
2714
2715   /* If we couldn't fit all value elements into REUSE,
2716      cons up the rest of them and add them to the end of REUSE.  */
2717   if (i < 2 * len + 2)
2718     XSETCDR (prev, Flist (2 * len + 2 - i, data + i));
2719
2720   return reuse;
2721 }
2722
2723
2724 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2725        doc: /* Set internal data on last search match from elements of LIST.
2726 LIST should have been created by calling `match-data' previously.  */)
2727      (list)
2728      register Lisp_Object list;
2729 {
2730   register int i;
2731   register Lisp_Object marker;
2732
2733   if (running_asynch_code)
2734     save_search_regs ();
2735
2736   if (!CONSP (list) && !NILP (list))
2737     list = wrong_type_argument (Qconsp, list);
2738
2739   /* Unless we find a marker with a buffer in LIST, assume that this
2740      match data came from a string.  */
2741   last_thing_searched = Qt;
2742
2743   /* Allocate registers if they don't already exist.  */
2744   {
2745     int length = XFASTINT (Flength (list)) / 2;
2746
2747     if (length > search_regs.num_regs)
2748       {
2749         if (search_regs.num_regs == 0)
2750           {
2751             search_regs.start
2752               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2753             search_regs.end
2754               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2755           }
2756         else
2757           {
2758             search_regs.start
2759               = (regoff_t *) xrealloc (search_regs.start,
2760                                        length * sizeof (regoff_t));
2761             search_regs.end
2762               = (regoff_t *) xrealloc (search_regs.end,
2763                                        length * sizeof (regoff_t));
2764           }
2765
2766         for (i = search_regs.num_regs; i < length; i++)
2767           search_regs.start[i] = -1;
2768
2769         search_regs.num_regs = length;
2770       }
2771   }
2772
2773   for (i = 0; i < search_regs.num_regs; i++)
2774     {
2775       marker = Fcar (list);
2776       if (NILP (marker))
2777         {
2778           search_regs.start[i] = -1;
2779           list = Fcdr (list);
2780         }
2781       else
2782         {
2783           int from;
2784
2785           if (MARKERP (marker))
2786             {
2787               if (XMARKER (marker)->buffer == 0)
2788                 XSETFASTINT (marker, 0);
2789               else
2790                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2791             }
2792
2793           CHECK_NUMBER_COERCE_MARKER (marker);
2794           from = XINT (marker);
2795           list = Fcdr (list);
2796
2797           marker = Fcar (list);
2798           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2799             XSETFASTINT (marker, 0);
2800
2801           CHECK_NUMBER_COERCE_MARKER (marker);
2802           search_regs.start[i] = from;
2803           search_regs.end[i] = XINT (marker);
2804         }
2805       list = Fcdr (list);
2806     }
2807
2808   return Qnil;
2809 }
2810
2811 /* If non-zero the match data have been saved in saved_search_regs
2812    during the execution of a sentinel or filter. */
2813 static int search_regs_saved;
2814 static struct re_registers saved_search_regs;
2815
2816 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2817    if asynchronous code (filter or sentinel) is running. */
2818 static void
2819 save_search_regs ()
2820 {
2821   if (!search_regs_saved)
2822     {
2823       saved_search_regs.num_regs = search_regs.num_regs;
2824       saved_search_regs.start = search_regs.start;
2825       saved_search_regs.end = search_regs.end;
2826       search_regs.num_regs = 0;
2827       search_regs.start = 0;
2828       search_regs.end = 0;
2829
2830       search_regs_saved = 1;
2831     }
2832 }
2833
2834 /* Called upon exit from filters and sentinels. */
2835 void
2836 restore_match_data ()
2837 {
2838   if (search_regs_saved)
2839     {
2840       if (search_regs.num_regs > 0)
2841         {
2842           xfree (search_regs.start);
2843           xfree (search_regs.end);
2844         }
2845       search_regs.num_regs = saved_search_regs.num_regs;
2846       search_regs.start = saved_search_regs.start;
2847       search_regs.end = saved_search_regs.end;
2848
2849       search_regs_saved = 0;
2850     }
2851 }
2852
2853 /* Quote a string to inactivate reg-expr chars */
2854
2855 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2856        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2857      (string)
2858      Lisp_Object string;
2859 {
2860   register unsigned char *in, *out, *end;
2861   register unsigned char *temp;
2862   int backslashes_added = 0;
2863
2864   CHECK_STRING (string);
2865
2866   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2867
2868   /* Now copy the data into the new string, inserting escapes. */
2869
2870   in = SDATA (string);
2871   end = in + SBYTES (string);
2872   out = temp;
2873
2874   for (; in != end; in++)
2875     {
2876       if (*in == '[' || *in == ']'
2877           || *in == '*' || *in == '.' || *in == '\\'
2878           || *in == '?' || *in == '+'
2879           || *in == '^' || *in == '$')
2880         *out++ = '\\', backslashes_added++;
2881       *out++ = *in;
2882     }
2883
2884   return make_specified_string (temp,
2885                                 SCHARS (string) + backslashes_added,
2886                                 out - temp,
2887                                 STRING_MULTIBYTE (string));
2888 }
2889 \f
2890 void
2891 syms_of_search ()
2892 {
2893   register int i;
2894
2895   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2896     {
2897       searchbufs[i].buf.allocated = 100;
2898       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2899       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2900       searchbufs[i].regexp = Qnil;
2901       staticpro (&searchbufs[i].regexp);
2902       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2903     }
2904   searchbuf_head = &searchbufs[0];
2905
2906   Qsearch_failed = intern ("search-failed");
2907   staticpro (&Qsearch_failed);
2908   Qinvalid_regexp = intern ("invalid-regexp");
2909   staticpro (&Qinvalid_regexp);
2910
2911   Fput (Qsearch_failed, Qerror_conditions,
2912         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2913   Fput (Qsearch_failed, Qerror_message,
2914         build_string ("Search failed"));
2915
2916   Fput (Qinvalid_regexp, Qerror_conditions,
2917         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2918   Fput (Qinvalid_regexp, Qerror_message,
2919         build_string ("Invalid regexp"));
2920
2921   last_thing_searched = Qnil;
2922   staticpro (&last_thing_searched);
2923
2924   defsubr (&Slooking_at);
2925   defsubr (&Sposix_looking_at);
2926   defsubr (&Sstring_match);
2927   defsubr (&Sposix_string_match);
2928   defsubr (&Ssearch_forward);
2929   defsubr (&Ssearch_backward);
2930   defsubr (&Sword_search_forward);
2931   defsubr (&Sword_search_backward);
2932   defsubr (&Sre_search_forward);
2933   defsubr (&Sre_search_backward);
2934   defsubr (&Sposix_search_forward);
2935   defsubr (&Sposix_search_backward);
2936   defsubr (&Sreplace_match);
2937   defsubr (&Smatch_beginning);
2938   defsubr (&Smatch_end);
2939   defsubr (&Smatch_data);
2940   defsubr (&Sset_match_data);
2941   defsubr (&Sregexp_quote);
2942 }