src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define min(a, b) ((a) < (b) ? (a) : (b))
  37 #define max(a, b) ((a) > (b) ? (a) : (b))
  38
  39 #define REGEXP_CACHE_SIZE 20
  40
  41 /* If the regexp is non-nil, then the buffer contains the compiled form
  42    of that regexp, suitable for searching.  */
  43 struct regexp_cache
  44 {
  45   struct regexp_cache *next;
  46   Lisp_Object regexp;
  47   struct re_pattern_buffer buf;
  48   char fastmap[0400];
  49   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  50   char posix;
  51 };
  52
  53 /* The instances of that struct.  */
  54 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  55
  56 /* The head of the linked list; points to the most recently used buffer.  */
  57 struct regexp_cache *searchbuf_head;
  58
  59
  60 /* Every call to re_match, etc., must pass &search_regs as the regs
  61    argument unless you can show it is unnecessary (i.e., if re_match
  62    is certainly going to be called again before region-around-match
  63    can be called).
  64
  65    Since the registers are now dynamically allocated, we need to make
  66    sure not to refer to the Nth register before checking that it has
  67    been allocated by checking search_regs.num_regs.
  68
  69    The regex code keeps track of whether it has allocated the search
  70    buffer using bits in the re_pattern_buffer.  This means that whenever
  71    you compile a new pattern, it completely forgets whether it has
  72    allocated any registers, and will allocate new registers the next
  73    time you call a searching or matching function.  Therefore, we need
  74    to call re_set_registers after compiling a new pattern or after
  75    setting the match registers, so that the regex functions will be
  76    able to free or re-allocate it properly.  */
  77 static struct re_registers search_regs;
  78
  79 /* The buffer in which the last search was performed, or
  80    Qt if the last search was done in a string;
  81    Qnil if no searching has been done yet.  */
  82 static Lisp_Object last_thing_searched;
  83
  84 /* error condition signaled when regexp compile_pattern fails */
  85
  86 Lisp_Object Qinvalid_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 #ifdef __STDC__
 101 #define CONST const
 102 #else
 103 #define CONST
 104 #endif
 105
 106 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 107    PATTERN is the pattern to compile.
 108    CP is the place to put the result.
 109    TRANSLATE is a translation table for ignoring case, or nil for none.
 110    REGP is the structure that says where to store the "register"
 111    values that will result from matching this pattern.
 112    If it is 0, we should compile the pattern not to record any
 113    subexpression bounds.
 114    POSIX is nonzero if we want full backtracking (POSIX style)
 115    for this pattern.  0 means backtrack only enough to get a valid match.
 116    MULTIBYTE is nonzero if we want to handle multibyte characters in
 117    PATTERN.  0 means all multibyte characters are recognized just as
 118    sequences of binary data.  */
 119
 120 static void
 121 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 122      struct regexp_cache *cp;
 123      Lisp_Object pattern;
 124      Lisp_Object translate;
 125      struct re_registers *regp;
 126      int posix;
 127      int multibyte;
 128 {
 129   unsigned char *raw_pattern;
 130   int raw_pattern_size;
 131   char *val;
 132   reg_syntax_t old;
 133
 134   /* MULTIBYTE says whether the text to be searched is multibyte.
 135      We must convert PATTERN to match that, or we will not really
 136      find things right.  */
 137
 138   if (multibyte == STRING_MULTIBYTE (pattern))
 139     {
 140       raw_pattern = (unsigned char *) XSTRING (pattern)->data;
 141       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 142     }
 143   else if (multibyte)
 144     {
 145       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 146                                                   XSTRING (pattern)->size);
 147       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 148       copy_text (XSTRING (pattern)->data, raw_pattern,
 149                  XSTRING (pattern)->size, 0, 1);
 150     }
 151   else
 152     {
 153       /* Converting multibyte to single-byte.
 154
 155          ??? Perhaps this conversion should be done in a special way
 156          by subtracting nonascii-insert-offset from each non-ASCII char,
 157          so that only the multibyte chars which really correspond to
 158          the chosen single-byte character set can possibly match.  */
 159       raw_pattern_size = XSTRING (pattern)->size;
 160       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 161       copy_text (XSTRING (pattern)->data, raw_pattern,
 162                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 163     }
 164
 165   cp->regexp = Qnil;
 166   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 167   cp->posix = posix;
 168   cp->buf.multibyte = multibyte;
 169   BLOCK_INPUT;
 170   old = re_set_syntax (RE_SYNTAX_EMACS
 171                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 172   val = (char *) re_compile_pattern ((char *)raw_pattern,
 173                                      raw_pattern_size, &cp->buf);
 174   re_set_syntax (old);
 175   UNBLOCK_INPUT;
 176   if (val)
 177     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 178
 179   cp->regexp = Fcopy_sequence (pattern);
 180 }
 181
 182 /* Shrink each compiled regexp buffer in the cache
 183    to the size actually used right now.
 184    This is called from garbage collection.  */
 185
 186 void
 187 shrink_regexp_cache ()
 188 {
 189   struct regexp_cache *cp, **cpp;
 190
 191   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 192     {
 193       cp->buf.allocated = cp->buf.used;
 194       cp->buf.buffer
 195         = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
 196     }
 197 }
 198
 199 /* Compile a regexp if necessary, but first check to see if there's one in
 200    the cache.
 201    PATTERN is the pattern to compile.
 202    TRANSLATE is a translation table for ignoring case, or nil for none.
 203    REGP is the structure that says where to store the "register"
 204    values that will result from matching this pattern.
 205    If it is 0, we should compile the pattern not to record any
 206    subexpression bounds.
 207    POSIX is nonzero if we want full backtracking (POSIX style)
 208    for this pattern.  0 means backtrack only enough to get a valid match.  */
 209
 210 struct re_pattern_buffer *
 211 compile_pattern (pattern, regp, translate, posix, multibyte)
 212      Lisp_Object pattern;
 213      struct re_registers *regp;
 214      Lisp_Object translate;
 215      int posix, multibyte;
 216 {
 217   struct regexp_cache *cp, **cpp;
 218
 219   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 220     {
 221       cp = *cpp;
 222       /* Entries are initialized to nil, and may be set to nil by
 223          compile_pattern_1 if the pattern isn't valid.  Don't apply
 224          XSTRING in those cases.  However, compile_pattern_1 is only
 225          applied to the cache entry we pick here to reuse.  So nil
 226          should never appear before a non-nil entry.  */
 227       if (cp->regexp == Qnil)
 228         goto compile_it;
 229       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 230           && !NILP (Fstring_equal (cp->regexp, pattern))
 231           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 232           && cp->posix == posix
 233           && cp->buf.multibyte == multibyte)
 234         break;
 235
 236       /* If we're at the end of the cache, compile into the nil cell
 237          we found, or the last (least recently used) cell with a
 238          string value.  */
 239       if (cp->next == 0)
 240         {
 241         compile_it:
 242           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 243           break;
 244         }
 245     }
 246
 247   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 248      either because we found it in the cache or because we just compiled it.
 249      Move it to the front of the queue to mark it as most recently used.  */
 250   *cpp = cp->next;
 251   cp->next = searchbuf_head;
 252   searchbuf_head = cp;
 253
 254   /* Advise the searching functions about the space we have allocated
 255      for register data.  */
 256   if (regp)
 257     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 258
 259   return &cp->buf;
 260 }
 261
 262 /* Error condition used for failing searches */
 263 Lisp_Object Qsearch_failed;
 264
 265 Lisp_Object
 266 signal_failure (arg)
 267      Lisp_Object arg;
 268 {
 269   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 270   return Qnil;
 271 }
 272 \f
 273 static Lisp_Object
 274 looking_at_1 (string, posix)
 275      Lisp_Object string;
 276      int posix;
 277 {
 278   Lisp_Object val;
 279   unsigned char *p1, *p2;
 280   int s1, s2;
 281   register int i;
 282   struct re_pattern_buffer *bufp;
 283
 284   if (running_asynch_code)
 285     save_search_regs ();
 286
 287   CHECK_STRING (string, 0);
 288   bufp = compile_pattern (string, &search_regs,
 289                           (!NILP (current_buffer->case_fold_search)
 290                            ? DOWNCASE_TABLE : Qnil),
 291                           posix,
 292                           !NILP (current_buffer->enable_multibyte_characters));
 293
 294   immediate_quit = 1;
 295   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 296
 297   /* Get pointers and sizes of the two strings
 298      that make up the visible portion of the buffer. */
 299
 300   p1 = BEGV_ADDR;
 301   s1 = GPT_BYTE - BEGV_BYTE;
 302   p2 = GAP_END_ADDR;
 303   s2 = ZV_BYTE - GPT_BYTE;
 304   if (s1 < 0)
 305     {
 306       p2 = p1;
 307       s2 = ZV_BYTE - BEGV_BYTE;
 308       s1 = 0;
 309     }
 310   if (s2 < 0)
 311     {
 312       s1 = ZV_BYTE - BEGV_BYTE;
 313       s2 = 0;
 314     }
 315
 316   re_match_object = Qnil;
 317
 318   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 319                   PT_BYTE - BEGV_BYTE, &search_regs,
 320                   ZV_BYTE - BEGV_BYTE);
 321   immediate_quit = 0;
 322
 323   if (i == -2)
 324     matcher_overflow ();
 325
 326   val = (0 <= i ? Qt : Qnil);
 327   if (i >= 0)
 328     for (i = 0; i < search_regs.num_regs; i++)
 329       if (search_regs.start[i] >= 0)
 330         {
 331           search_regs.start[i]
 332             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 333           search_regs.end[i]
 334             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 335         }
 336   XSETBUFFER (last_thing_searched, current_buffer);
 337   return val;
 338 }
 339
 340 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 341   "Return t if text after point matches regular expression REGEXP.\n\
 342 This function modifies the match data that `match-beginning',\n\
 343 `match-end' and `match-data' access; save and restore the match\n\
 344 data if you want to preserve them.")
 345   (regexp)
 346      Lisp_Object regexp;
 347 {
 348   return looking_at_1 (regexp, 0);
 349 }
 350
 351 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 352   "Return t if text after point matches regular expression REGEXP.\n\
 353 Find the longest match, in accord with Posix regular expression rules.\n\
 354 This function modifies the match data that `match-beginning',\n\
 355 `match-end' and `match-data' access; save and restore the match\n\
 356 data if you want to preserve them.")
 357   (regexp)
 358      Lisp_Object regexp;
 359 {
 360   return looking_at_1 (regexp, 1);
 361 }
 362 \f
 363 static Lisp_Object
 364 string_match_1 (regexp, string, start, posix)
 365      Lisp_Object regexp, string, start;
 366      int posix;
 367 {
 368   int val;
 369   struct re_pattern_buffer *bufp;
 370   int pos, pos_byte;
 371   int i;
 372
 373   if (running_asynch_code)
 374     save_search_regs ();
 375
 376   CHECK_STRING (regexp, 0);
 377   CHECK_STRING (string, 1);
 378
 379   if (NILP (start))
 380     pos = 0, pos_byte = 0;
 381   else
 382     {
 383       int len = XSTRING (string)->size;
 384
 385       CHECK_NUMBER (start, 2);
 386       pos = XINT (start);
 387       if (pos < 0 && -pos <= len)
 388         pos = len + pos;
 389       else if (0 > pos || pos > len)
 390         args_out_of_range (string, start);
 391       pos_byte = string_char_to_byte (string, pos);
 392     }
 393
 394   bufp = compile_pattern (regexp, &search_regs,
 395                           (!NILP (current_buffer->case_fold_search)
 396                            ? DOWNCASE_TABLE : Qnil),
 397                           posix,
 398                           STRING_MULTIBYTE (string));
 399   immediate_quit = 1;
 400   re_match_object = string;
 401
 402   val = re_search (bufp, (char *) XSTRING (string)->data,
 403                    STRING_BYTES (XSTRING (string)), pos_byte,
 404                    STRING_BYTES (XSTRING (string)) - pos_byte,
 405                    &search_regs);
 406   immediate_quit = 0;
 407   last_thing_searched = Qt;
 408   if (val == -2)
 409     matcher_overflow ();
 410   if (val < 0) return Qnil;
 411
 412   for (i = 0; i < search_regs.num_regs; i++)
 413     if (search_regs.start[i] >= 0)
 414       {
 415         search_regs.start[i]
 416           = string_byte_to_char (string, search_regs.start[i]);
 417         search_regs.end[i]
 418           = string_byte_to_char (string, search_regs.end[i]);
 419       }
 420
 421   return make_number (string_byte_to_char (string, val));
 422 }
 423
 424 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 425   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 426 Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
 427 If third arg START is non-nil, start search at that index in STRING.\n\
 428 For index of first char beyond the match, do (match-end 0).\n\
 429 `match-end' and `match-beginning' also give indices of substrings\n\
 430 matched by parenthesis constructs in the pattern.")
 431   (regexp, string, start)
 432      Lisp_Object regexp, string, start;
 433 {
 434   return string_match_1 (regexp, string, start, 0);
 435 }
 436
 437 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 438   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 439 Find the longest match, in accord with Posix regular expression rules.\n\
 440 Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
 441 If third arg START is non-nil, start search at that index in STRING.\n\
 442 For index of first char beyond the match, do (match-end 0).\n\
 443 `match-end' and `match-beginning' also give indices of substrings\n\
 444 matched by parenthesis constructs in the pattern.")
 445   (regexp, string, start)
 446      Lisp_Object regexp, string, start;
 447 {
 448   return string_match_1 (regexp, string, start, 1);
 449 }
 450
 451 /* Match REGEXP against STRING, searching all of STRING,
 452    and return the index of the match, or negative on failure.
 453    This does not clobber the match data.  */
 454
 455 int
 456 fast_string_match (regexp, string)
 457      Lisp_Object regexp, string;
 458 {
 459   int val;
 460   struct re_pattern_buffer *bufp;
 461
 462   bufp = compile_pattern (regexp, 0, Qnil,
 463                           0, STRING_MULTIBYTE (string));
 464   immediate_quit = 1;
 465   re_match_object = string;
 466
 467   val = re_search (bufp, (char *) XSTRING (string)->data,
 468                    STRING_BYTES (XSTRING (string)), 0,
 469                    STRING_BYTES (XSTRING (string)), 0);
 470   immediate_quit = 0;
 471   return val;
 472 }
 473
 474 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 475    and return the index of the match, or negative on failure.
 476    This does not clobber the match data.
 477    We assume that STRING contains single-byte characters.  */
 478
 479 extern Lisp_Object Vascii_downcase_table;
 480
 481 int
 482 fast_c_string_match_ignore_case (regexp, string)
 483      Lisp_Object regexp;
 484      char *string;
 485 {
 486   int val;
 487   struct re_pattern_buffer *bufp;
 488   int len = strlen (string);
 489
 490   regexp = string_make_unibyte (regexp);
 491   re_match_object = Qt;
 492   bufp = compile_pattern (regexp, 0,
 493                           Vascii_downcase_table, 0,
 494                           0);
 495   immediate_quit = 1;
 496   val = re_search (bufp, string, len, 0, len, 0);
 497   immediate_quit = 0;
 498   return val;
 499 }
 500 \f
 501 /* The newline cache: remembering which sections of text have no newlines.  */
 502
 503 /* If the user has requested newline caching, make sure it's on.
 504    Otherwise, make sure it's off.
 505    This is our cheezy way of associating an action with the change of
 506    state of a buffer-local variable.  */
 507 static void
 508 newline_cache_on_off (buf)
 509      struct buffer *buf;
 510 {
 511   if (NILP (buf->cache_long_line_scans))
 512     {
 513       /* It should be off.  */
 514       if (buf->newline_cache)
 515         {
 516           free_region_cache (buf->newline_cache);
 517           buf->newline_cache = 0;
 518         }
 519     }
 520   else
 521     {
 522       /* It should be on.  */
 523       if (buf->newline_cache == 0)
 524         buf->newline_cache = new_region_cache ();
 525     }
 526 }
 527
 528 \f
 529 /* Search for COUNT instances of the character TARGET between START and END.
 530
 531    If COUNT is positive, search forwards; END must be >= START.
 532    If COUNT is negative, search backwards for the -COUNTth instance;
 533       END must be <= START.
 534    If COUNT is zero, do anything you please; run rogue, for all I care.
 535
 536    If END is zero, use BEGV or ZV instead, as appropriate for the
 537    direction indicated by COUNT.
 538
 539    If we find COUNT instances, set *SHORTAGE to zero, and return the
 540    position after the COUNTth match.  Note that for reverse motion
 541    this is not the same as the usual convention for Emacs motion commands.
 542
 543    If we don't find COUNT instances before reaching END, set *SHORTAGE
 544    to the number of TARGETs left unfound, and return END.
 545
 546    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 547    except when inside redisplay.  */
 548
 549 int
 550 scan_buffer (target, start, end, count, shortage, allow_quit)
 551      register int target;
 552      int start, end;
 553      int count;
 554      int *shortage;
 555      int allow_quit;
 556 {
 557   struct region_cache *newline_cache;
 558   int direction;
 559
 560   if (count > 0)
 561     {
 562       direction = 1;
 563       if (! end) end = ZV;
 564     }
 565   else
 566     {
 567       direction = -1;
 568       if (! end) end = BEGV;
 569     }
 570
 571   newline_cache_on_off (current_buffer);
 572   newline_cache = current_buffer->newline_cache;
 573
 574   if (shortage != 0)
 575     *shortage = 0;
 576
 577   immediate_quit = allow_quit;
 578
 579   if (count > 0)
 580     while (start != end)
 581       {
 582         /* Our innermost scanning loop is very simple; it doesn't know
 583            about gaps, buffer ends, or the newline cache.  ceiling is
 584            the position of the last character before the next such
 585            obstacle --- the last character the dumb search loop should
 586            examine.  */
 587         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 588         int start_byte = CHAR_TO_BYTE (start);
 589         int tem;
 590
 591         /* If we're looking for a newline, consult the newline cache
 592            to see where we can avoid some scanning.  */
 593         if (target == '\n' && newline_cache)
 594           {
 595             int next_change;
 596             immediate_quit = 0;
 597             while (region_cache_forward
 598                    (current_buffer, newline_cache, start_byte, &next_change))
 599               start_byte = next_change;
 600             immediate_quit = allow_quit;
 601
 602             /* START should never be after END.  */
 603             if (start_byte > ceiling_byte)
 604               start_byte = ceiling_byte;
 605
 606             /* Now the text after start is an unknown region, and
 607                next_change is the position of the next known region. */
 608             ceiling_byte = min (next_change - 1, ceiling_byte);
 609           }
 610
 611         /* The dumb loop can only scan text stored in contiguous
 612            bytes. BUFFER_CEILING_OF returns the last character
 613            position that is contiguous, so the ceiling is the
 614            position after that.  */
 615         tem = BUFFER_CEILING_OF (start_byte);
 616         ceiling_byte = min (tem, ceiling_byte);
 617
 618         {
 619           /* The termination address of the dumb loop.  */
 620           register unsigned char *ceiling_addr
 621             = BYTE_POS_ADDR (ceiling_byte) + 1;
 622           register unsigned char *cursor
 623             = BYTE_POS_ADDR (start_byte);
 624           unsigned char *base = cursor;
 625
 626           while (cursor < ceiling_addr)
 627             {
 628               unsigned char *scan_start = cursor;
 629
 630               /* The dumb loop.  */
 631               while (*cursor != target && ++cursor < ceiling_addr)
 632                 ;
 633
 634               /* If we're looking for newlines, cache the fact that
 635                  the region from start to cursor is free of them. */
 636               if (target == '\n' && newline_cache)
 637                 know_region_cache (current_buffer, newline_cache,
 638                                    start_byte + scan_start - base,
 639                                    start_byte + cursor - base);
 640
 641               /* Did we find the target character?  */
 642               if (cursor < ceiling_addr)
 643                 {
 644                   if (--count == 0)
 645                     {
 646                       immediate_quit = 0;
 647                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 648                     }
 649                   cursor++;
 650                 }
 651             }
 652
 653           start = BYTE_TO_CHAR (start_byte + cursor - base);
 654         }
 655       }
 656   else
 657     while (start > end)
 658       {
 659         /* The last character to check before the next obstacle.  */
 660         int ceiling_byte = CHAR_TO_BYTE (end);
 661         int start_byte = CHAR_TO_BYTE (start);
 662         int tem;
 663
 664         /* Consult the newline cache, if appropriate.  */
 665         if (target == '\n' && newline_cache)
 666           {
 667             int next_change;
 668             immediate_quit = 0;
 669             while (region_cache_backward
 670                    (current_buffer, newline_cache, start_byte, &next_change))
 671               start_byte = next_change;
 672             immediate_quit = allow_quit;
 673
 674             /* Start should never be at or before end.  */
 675             if (start_byte <= ceiling_byte)
 676               start_byte = ceiling_byte + 1;
 677
 678             /* Now the text before start is an unknown region, and
 679                next_change is the position of the next known region. */
 680             ceiling_byte = max (next_change, ceiling_byte);
 681           }
 682
 683         /* Stop scanning before the gap.  */
 684         tem = BUFFER_FLOOR_OF (start_byte - 1);
 685         ceiling_byte = max (tem, ceiling_byte);
 686
 687         {
 688           /* The termination address of the dumb loop.  */
 689           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 690           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 691           unsigned char *base = cursor;
 692
 693           while (cursor >= ceiling_addr)
 694             {
 695               unsigned char *scan_start = cursor;
 696
 697               while (*cursor != target && --cursor >= ceiling_addr)
 698                 ;
 699
 700               /* If we're looking for newlines, cache the fact that
 701                  the region from after the cursor to start is free of them.  */
 702               if (target == '\n' && newline_cache)
 703                 know_region_cache (current_buffer, newline_cache,
 704                                    start_byte + cursor - base,
 705                                    start_byte + scan_start - base);
 706
 707               /* Did we find the target character?  */
 708               if (cursor >= ceiling_addr)
 709                 {
 710                   if (++count >= 0)
 711                     {
 712                       immediate_quit = 0;
 713                       return BYTE_TO_CHAR (start_byte + cursor - base);
 714                     }
 715                   cursor--;
 716                 }
 717             }
 718
 719           start = BYTE_TO_CHAR (start_byte + cursor - base);
 720         }
 721       }
 722
 723   immediate_quit = 0;
 724   if (shortage != 0)
 725     *shortage = count * direction;
 726   return start;
 727 }
 728 \f
 729 /* Search for COUNT instances of a line boundary, which means either a
 730    newline or (if selective display enabled) a carriage return.
 731    Start at START.  If COUNT is negative, search backwards.
 732
 733    We report the resulting position by calling TEMP_SET_PT_BOTH.
 734
 735    If we find COUNT instances. we position after (always after,
 736    even if scanning backwards) the COUNTth match, and return 0.
 737
 738    If we don't find COUNT instances before reaching the end of the
 739    buffer (or the beginning, if scanning backwards), we return
 740    the number of line boundaries left unfound, and position at
 741    the limit we bumped up against.
 742
 743    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 744    except in special cases.  */
 745
 746 int
 747 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 748      int start, start_byte;
 749      int limit, limit_byte;
 750      register int count;
 751      int allow_quit;
 752 {
 753   int direction = ((count > 0) ? 1 : -1);
 754
 755   register unsigned char *cursor;
 756   unsigned char *base;
 757
 758   register int ceiling;
 759   register unsigned char *ceiling_addr;
 760
 761   int old_immediate_quit = immediate_quit;
 762
 763   /* If we are not in selective display mode,
 764      check only for newlines.  */
 765   int selective_display = (!NILP (current_buffer->selective_display)
 766                            && !INTEGERP (current_buffer->selective_display));
 767
 768   /* The code that follows is like scan_buffer
 769      but checks for either newline or carriage return.  */
 770
 771   if (allow_quit)
 772     immediate_quit++;
 773
 774   start_byte = CHAR_TO_BYTE (start);
 775
 776   if (count > 0)
 777     {
 778       while (start_byte < limit_byte)
 779         {
 780           ceiling =  BUFFER_CEILING_OF (start_byte);
 781           ceiling = min (limit_byte - 1, ceiling);
 782           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 783           base = (cursor = BYTE_POS_ADDR (start_byte));
 784           while (1)
 785             {
 786               while (*cursor != '\n' && ++cursor != ceiling_addr)
 787                 ;
 788
 789               if (cursor != ceiling_addr)
 790                 {
 791                   if (--count == 0)
 792                     {
 793                       immediate_quit = old_immediate_quit;
 794                       start_byte = start_byte + cursor - base + 1;
 795                       start = BYTE_TO_CHAR (start_byte);
 796                       TEMP_SET_PT_BOTH (start, start_byte);
 797                       return 0;
 798                     }
 799                   else
 800                     if (++cursor == ceiling_addr)
 801                       break;
 802                 }
 803               else
 804                 break;
 805             }
 806           start_byte += cursor - base;
 807         }
 808     }
 809   else
 810     {
 811       while (start_byte > limit_byte)
 812         {
 813           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 814           ceiling = max (limit_byte, ceiling);
 815           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 816           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 817           while (1)
 818             {
 819               while (--cursor != ceiling_addr && *cursor != '\n')
 820                 ;
 821
 822               if (cursor != ceiling_addr)
 823                 {
 824                   if (++count == 0)
 825                     {
 826                       immediate_quit = old_immediate_quit;
 827                       /* Return the position AFTER the match we found.  */
 828                       start_byte = start_byte + cursor - base + 1;
 829                       start = BYTE_TO_CHAR (start_byte);
 830                       TEMP_SET_PT_BOTH (start, start_byte);
 831                       return 0;
 832                     }
 833                 }
 834               else
 835                 break;
 836             }
 837           /* Here we add 1 to compensate for the last decrement
 838              of CURSOR, which took it past the valid range.  */
 839           start_byte += cursor - base + 1;
 840         }
 841     }
 842
 843   TEMP_SET_PT_BOTH (limit, limit_byte);
 844   immediate_quit = old_immediate_quit;
 845
 846   return count * direction;
 847 }
 848
 849 int
 850 find_next_newline_no_quit (from, cnt)
 851      register int from, cnt;
 852 {
 853   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 854 }
 855
 856 /* Like find_next_newline, but returns position before the newline,
 857    not after, and only search up to TO.  This isn't just
 858    find_next_newline (...)-1, because you might hit TO.  */
 859
 860 int
 861 find_before_next_newline (from, to, cnt)
 862      int from, to, cnt;
 863 {
 864   int shortage;
 865   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 866
 867   if (shortage == 0)
 868     pos--;
 869
 870   return pos;
 871 }
 872 \f
 873 /* Subroutines of Lisp buffer search functions. */
 874
 875 static Lisp_Object
 876 search_command (string, bound, noerror, count, direction, RE, posix)
 877      Lisp_Object string, bound, noerror, count;
 878      int direction;
 879      int RE;
 880      int posix;
 881 {
 882   register int np;
 883   int lim, lim_byte;
 884   int n = direction;
 885
 886   if (!NILP (count))
 887     {
 888       CHECK_NUMBER (count, 3);
 889       n *= XINT (count);
 890     }
 891
 892   CHECK_STRING (string, 0);
 893   if (NILP (bound))
 894     {
 895       if (n > 0)
 896         lim = ZV, lim_byte = ZV_BYTE;
 897       else
 898         lim = BEGV, lim_byte = BEGV_BYTE;
 899     }
 900   else
 901     {
 902       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 903       lim = XINT (bound);
 904       if (n > 0 ? lim < PT : lim > PT)
 905         error ("Invalid search bound (wrong side of point)");
 906       if (lim > ZV)
 907         lim = ZV, lim_byte = ZV_BYTE;
 908       else if (lim < BEGV)
 909         lim = BEGV, lim_byte = BEGV_BYTE;
 910       else
 911         lim_byte = CHAR_TO_BYTE (lim);
 912     }
 913
 914   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 915                       (!NILP (current_buffer->case_fold_search)
 916                        ? current_buffer->case_canon_table
 917                        : Qnil),
 918                       (!NILP (current_buffer->case_fold_search)
 919                        ? current_buffer->case_eqv_table
 920                        : Qnil),
 921                       posix);
 922   if (np <= 0)
 923     {
 924       if (NILP (noerror))
 925         return signal_failure (string);
 926       if (!EQ (noerror, Qt))
 927         {
 928           if (lim < BEGV || lim > ZV)
 929             abort ();
 930           SET_PT_BOTH (lim, lim_byte);
 931           return Qnil;
 932 #if 0 /* This would be clean, but maybe programs depend on
 933          a value of nil here.  */
 934           np = lim;
 935 #endif
 936         }
 937       else
 938         return Qnil;
 939     }
 940
 941   if (np < BEGV || np > ZV)
 942     abort ();
 943
 944   SET_PT (np);
 945
 946   return make_number (np);
 947 }
 948 \f
 949 /* Return 1 if REGEXP it matches just one constant string.  */
 950
 951 static int
 952 trivial_regexp_p (regexp)
 953      Lisp_Object regexp;
 954 {
 955   int len = STRING_BYTES (XSTRING (regexp));
 956   unsigned char *s = XSTRING (regexp)->data;
 957   unsigned char c;
 958   while (--len >= 0)
 959     {
 960       switch (*s++)
 961         {
 962         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 963           return 0;
 964         case '\\':
 965           if (--len < 0)
 966             return 0;
 967           switch (*s++)
 968             {
 969             case '|': case '(': case ')': case '`': case '\'': case 'b':
 970             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 971             case 'S': case '=':
 972             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 973             case '1': case '2': case '3': case '4': case '5':
 974             case '6': case '7': case '8': case '9':
 975               return 0;
 976             }
 977         }
 978     }
 979   return 1;
 980 }
 981
 982 /* Search for the n'th occurrence of STRING in the current buffer,
 983    starting at position POS and stopping at position LIM,
 984    treating STRING as a literal string if RE is false or as
 985    a regular expression if RE is true.
 986
 987    If N is positive, searching is forward and LIM must be greater than POS.
 988    If N is negative, searching is backward and LIM must be less than POS.
 989
 990    Returns -x if x occurrences remain to be found (x > 0),
 991    or else the position at the beginning of the Nth occurrence
 992    (if searching backward) or the end (if searching forward).
 993
 994    POSIX is nonzero if we want full backtracking (POSIX style)
 995    for this pattern.  0 means backtrack only enough to get a valid match.  */
 996
 997 #define TRANSLATE(out, trt, d)                  \
 998 do                                              \
 999   {                                             \
1000     if (! NILP (trt))                           \
1001       {                                         \
1002         Lisp_Object temp;                       \
1003         temp = Faref (trt, make_number (d));    \
1004         if (INTEGERP (temp))                    \
1005           out = XINT (temp);                    \
1006         else                                    \
1007           out = d;                              \
1008       }                                         \
1009     else                                        \
1010       out = d;                                  \
1011   }                                             \
1012 while (0)
1013
1014 static int
1015 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1016                RE, trt, inverse_trt, posix)
1017      Lisp_Object string;
1018      int pos;
1019      int pos_byte;
1020      int lim;
1021      int lim_byte;
1022      int n;
1023      int RE;
1024      Lisp_Object trt;
1025      Lisp_Object inverse_trt;
1026      int posix;
1027 {
1028   int len = XSTRING (string)->size;
1029   int len_byte = STRING_BYTES (XSTRING (string));
1030   register int i;
1031
1032   if (running_asynch_code)
1033     save_search_regs ();
1034
1035   /* Searching 0 times means don't move.  */
1036   /* Null string is found at starting position.  */
1037   if (len == 0 || n == 0)
1038     {
1039       set_search_regs (pos, 0);
1040       return pos;
1041     }
1042
1043   if (RE && !trivial_regexp_p (string))
1044     {
1045       unsigned char *p1, *p2;
1046       int s1, s2;
1047       struct re_pattern_buffer *bufp;
1048
1049       bufp = compile_pattern (string, &search_regs, trt, posix,
1050                               !NILP (current_buffer->enable_multibyte_characters));
1051
1052       immediate_quit = 1;       /* Quit immediately if user types ^G,
1053                                    because letting this function finish
1054                                    can take too long. */
1055       QUIT;                     /* Do a pending quit right away,
1056                                    to avoid paradoxical behavior */
1057       /* Get pointers and sizes of the two strings
1058          that make up the visible portion of the buffer. */
1059
1060       p1 = BEGV_ADDR;
1061       s1 = GPT_BYTE - BEGV_BYTE;
1062       p2 = GAP_END_ADDR;
1063       s2 = ZV_BYTE - GPT_BYTE;
1064       if (s1 < 0)
1065         {
1066           p2 = p1;
1067           s2 = ZV_BYTE - BEGV_BYTE;
1068           s1 = 0;
1069         }
1070       if (s2 < 0)
1071         {
1072           s1 = ZV_BYTE - BEGV_BYTE;
1073           s2 = 0;
1074         }
1075       re_match_object = Qnil;
1076
1077       while (n < 0)
1078         {
1079           int val;
1080           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1081                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1082                              &search_regs,
1083                              /* Don't allow match past current point */
1084                              pos_byte - BEGV_BYTE);
1085           if (val == -2)
1086             {
1087               matcher_overflow ();
1088             }
1089           if (val >= 0)
1090             {
1091               pos_byte = search_regs.start[0] + BEGV_BYTE;
1092               for (i = 0; i < search_regs.num_regs; i++)
1093                 if (search_regs.start[i] >= 0)
1094                   {
1095                     search_regs.start[i]
1096                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1097                     search_regs.end[i]
1098                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1099                   }
1100               XSETBUFFER (last_thing_searched, current_buffer);
1101               /* Set pos to the new position. */
1102               pos = search_regs.start[0];
1103             }
1104           else
1105             {
1106               immediate_quit = 0;
1107               return (n);
1108             }
1109           n++;
1110         }
1111       while (n > 0)
1112         {
1113           int val;
1114           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1115                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1116                              &search_regs,
1117                              lim_byte - BEGV_BYTE);
1118           if (val == -2)
1119             {
1120               matcher_overflow ();
1121             }
1122           if (val >= 0)
1123             {
1124               pos_byte = search_regs.end[0] + BEGV_BYTE;
1125               for (i = 0; i < search_regs.num_regs; i++)
1126                 if (search_regs.start[i] >= 0)
1127                   {
1128                     search_regs.start[i]
1129                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1130                     search_regs.end[i]
1131                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1132                   }
1133               XSETBUFFER (last_thing_searched, current_buffer);
1134               pos = search_regs.end[0];
1135             }
1136           else
1137             {
1138               immediate_quit = 0;
1139               return (0 - n);
1140             }
1141           n--;
1142         }
1143       immediate_quit = 0;
1144       return (pos);
1145     }
1146   else                          /* non-RE case */
1147     {
1148       unsigned char *raw_pattern, *pat;
1149       int raw_pattern_size;
1150       int raw_pattern_size_byte;
1151       unsigned char *patbuf;
1152       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1153       unsigned char *base_pat = XSTRING (string)->data;
1154       int charset_base = -1;
1155       int boyer_moore_ok = 1;
1156
1157       /* MULTIBYTE says whether the text to be searched is multibyte.
1158          We must convert PATTERN to match that, or we will not really
1159          find things right.  */
1160
1161       if (multibyte == STRING_MULTIBYTE (string))
1162         {
1163           raw_pattern = (unsigned char *) XSTRING (string)->data;
1164           raw_pattern_size = XSTRING (string)->size;
1165           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1166         }
1167       else if (multibyte)
1168         {
1169           raw_pattern_size = XSTRING (string)->size;
1170           raw_pattern_size_byte
1171             = count_size_as_multibyte (XSTRING (string)->data,
1172                                        raw_pattern_size);
1173           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1174           copy_text (XSTRING (string)->data, raw_pattern,
1175                      XSTRING (string)->size, 0, 1);
1176         }
1177       else
1178         {
1179           /* Converting multibyte to single-byte.
1180
1181              ??? Perhaps this conversion should be done in a special way
1182              by subtracting nonascii-insert-offset from each non-ASCII char,
1183              so that only the multibyte chars which really correspond to
1184              the chosen single-byte character set can possibly match.  */
1185           raw_pattern_size = XSTRING (string)->size;
1186           raw_pattern_size_byte = XSTRING (string)->size;
1187           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1188           copy_text (XSTRING (string)->data, raw_pattern,
1189                      STRING_BYTES (XSTRING (string)), 1, 0);
1190         }
1191
1192       /* Copy and optionally translate the pattern.  */
1193       len = raw_pattern_size;
1194       len_byte = raw_pattern_size_byte;
1195       patbuf = (unsigned char *) alloca (len_byte);
1196       pat = patbuf;
1197       base_pat = raw_pattern;
1198       if (multibyte)
1199         {
1200           while (--len >= 0)
1201             {
1202               unsigned char str[MAX_MULTIBYTE_LENGTH];
1203               int c, translated, inverse;
1204               int in_charlen, charlen;
1205
1206               /* If we got here and the RE flag is set, it's because we're
1207                  dealing with a regexp known to be trivial, so the backslash
1208                  just quotes the next character.  */
1209               if (RE && *base_pat == '\\')
1210                 {
1211                   len--;
1212                   len_byte--;
1213                   base_pat++;
1214                 }
1215
1216               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1217
1218               /* Translate the character, if requested.  */
1219               TRANSLATE (translated, trt, c);
1220               /* If translation changed the byte-length, go back
1221                  to the original character.  */
1222               charlen = CHAR_STRING (translated, str);
1223               if (in_charlen != charlen)
1224                 {
1225                   translated = c;
1226                   charlen = CHAR_STRING (c, str);
1227                 }
1228
1229               /* If we are searching for something strange,
1230                  an invalid multibyte code, don't use boyer-moore.  */
1231               if (! ASCII_BYTE_P (translated)
1232                   && (charlen == 1 /* 8bit code */
1233                       || charlen != in_charlen /* invalid multibyte code */
1234                       ))
1235                 boyer_moore_ok = 0;
1236
1237               TRANSLATE (inverse, inverse_trt, c);
1238
1239               /* Did this char actually get translated?
1240                  Would any other char get translated into it?  */
1241               if (translated != c || inverse != c)
1242                 {
1243                   /* Keep track of which character set row
1244                      contains the characters that need translation.  */
1245                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1246                   if (charset_base == -1)
1247                     charset_base = charset_base_code;
1248                   else if (charset_base != charset_base_code)
1249                     /* If two different rows appear, needing translation,
1250                        then we cannot use boyer_moore search.  */
1251                     boyer_moore_ok = 0;
1252                 }
1253
1254               /* Store this character into the translated pattern.  */
1255               bcopy (str, pat, charlen);
1256               pat += charlen;
1257               base_pat += in_charlen;
1258               len_byte -= in_charlen;
1259             }
1260         }
1261       else
1262         {
1263           /* Unibyte buffer.  */
1264           charset_base = 0;
1265           while (--len >= 0)
1266             {
1267               int c, translated;
1268
1269               /* If we got here and the RE flag is set, it's because we're
1270                  dealing with a regexp known to be trivial, so the backslash
1271                  just quotes the next character.  */
1272               if (RE && *base_pat == '\\')
1273                 {
1274                   len--;
1275                   base_pat++;
1276                 }
1277               c = *base_pat++;
1278               TRANSLATE (translated, trt, c);
1279               *pat++ = translated;
1280             }
1281         }
1282
1283       len_byte = pat - patbuf;
1284       len = raw_pattern_size;
1285       pat = base_pat = patbuf;
1286
1287       if (boyer_moore_ok)
1288         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1289                             pos, pos_byte, lim, lim_byte,
1290                             charset_base);
1291       else
1292         return simple_search (n, pat, len, len_byte, trt,
1293                               pos, pos_byte, lim, lim_byte);
1294     }
1295 }
1296 \f
1297 /* Do a simple string search N times for the string PAT,
1298    whose length is LEN/LEN_BYTE,
1299    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1300    TRT is the translation table.
1301
1302    Return the character position where the match is found.
1303    Otherwise, if M matches remained to be found, return -M.
1304
1305    This kind of search works regardless of what is in PAT and
1306    regardless of what is in TRT.  It is used in cases where
1307    boyer_moore cannot work.  */
1308
1309 static int
1310 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1311      int n;
1312      unsigned char *pat;
1313      int len, len_byte;
1314      Lisp_Object trt;
1315      int pos, pos_byte;
1316      int lim, lim_byte;
1317 {
1318   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1319   int forward = n > 0;
1320
1321   if (lim > pos && multibyte)
1322     while (n > 0)
1323       {
1324         while (1)
1325           {
1326             /* Try matching at position POS.  */
1327             int this_pos = pos;
1328             int this_pos_byte = pos_byte;
1329             int this_len = len;
1330             int this_len_byte = len_byte;
1331             unsigned char *p = pat;
1332             if (pos + len > lim)
1333               goto stop;
1334
1335             while (this_len > 0)
1336               {
1337                 int charlen, buf_charlen;
1338                 int pat_ch, buf_ch;
1339
1340                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1341                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1342                                                  ZV_BYTE - this_pos_byte,
1343                                                  buf_charlen);
1344                 TRANSLATE (buf_ch, trt, buf_ch);
1345
1346                 if (buf_ch != pat_ch)
1347                   break;
1348
1349                 this_len_byte -= charlen;
1350                 this_len--;
1351                 p += charlen;
1352
1353                 this_pos_byte += buf_charlen;
1354                 this_pos++;
1355               }
1356
1357             if (this_len == 0)
1358               {
1359                 pos += len;
1360                 pos_byte += len_byte;
1361                 break;
1362               }
1363
1364             INC_BOTH (pos, pos_byte);
1365           }
1366
1367         n--;
1368       }
1369   else if (lim > pos)
1370     while (n > 0)
1371       {
1372         while (1)
1373           {
1374             /* Try matching at position POS.  */
1375             int this_pos = pos;
1376             int this_len = len;
1377             unsigned char *p = pat;
1378
1379             if (pos + len > lim)
1380               goto stop;
1381
1382             while (this_len > 0)
1383               {
1384                 int pat_ch = *p++;
1385                 int buf_ch = FETCH_BYTE (this_pos);
1386                 TRANSLATE (buf_ch, trt, buf_ch);
1387
1388                 if (buf_ch != pat_ch)
1389                   break;
1390
1391                 this_len--;
1392                 this_pos++;
1393               }
1394
1395             if (this_len == 0)
1396               {
1397                 pos += len;
1398                 break;
1399               }
1400
1401             pos++;
1402           }
1403
1404         n--;
1405       }
1406   /* Backwards search.  */
1407   else if (lim < pos && multibyte)
1408     while (n < 0)
1409       {
1410         while (1)
1411           {
1412             /* Try matching at position POS.  */
1413             int this_pos = pos - len;
1414             int this_pos_byte = pos_byte - len_byte;
1415             int this_len = len;
1416             int this_len_byte = len_byte;
1417             unsigned char *p = pat;
1418
1419             if (pos - len < lim)
1420               goto stop;
1421
1422             while (this_len > 0)
1423               {
1424                 int charlen, buf_charlen;
1425                 int pat_ch, buf_ch;
1426
1427                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1428                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1429                                                  ZV_BYTE - this_pos_byte,
1430                                                  buf_charlen);
1431                 TRANSLATE (buf_ch, trt, buf_ch);
1432
1433                 if (buf_ch != pat_ch)
1434                   break;
1435
1436                 this_len_byte -= charlen;
1437                 this_len--;
1438                 p += charlen;
1439                 this_pos_byte += buf_charlen;
1440                 this_pos++;
1441               }
1442
1443             if (this_len == 0)
1444               {
1445                 pos -= len;
1446                 pos_byte -= len_byte;
1447                 break;
1448               }
1449
1450             DEC_BOTH (pos, pos_byte);
1451           }
1452
1453         n++;
1454       }
1455   else if (lim < pos)
1456     while (n < 0)
1457       {
1458         while (1)
1459           {
1460             /* Try matching at position POS.  */
1461             int this_pos = pos - len;
1462             int this_len = len;
1463             unsigned char *p = pat;
1464
1465             if (pos - len < lim)
1466               goto stop;
1467
1468             while (this_len > 0)
1469               {
1470                 int pat_ch = *p++;
1471                 int buf_ch = FETCH_BYTE (this_pos);
1472                 TRANSLATE (buf_ch, trt, buf_ch);
1473
1474                 if (buf_ch != pat_ch)
1475                   break;
1476                 this_len--;
1477                 this_pos++;
1478               }
1479
1480             if (this_len == 0)
1481               {
1482                 pos -= len;
1483                 break;
1484               }
1485
1486             pos--;
1487           }
1488
1489         n++;
1490       }
1491
1492  stop:
1493   if (n == 0)
1494     {
1495       if (forward)
1496         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1497       else
1498         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1499
1500       return pos;
1501     }
1502   else if (n > 0)
1503     return -n;
1504   else
1505     return n;
1506 }
1507 \f
1508 /* Do Boyer-Moore search N times for the string PAT,
1509    whose length is LEN/LEN_BYTE,
1510    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1511    DIRECTION says which direction we search in.
1512    TRT and INVERSE_TRT are translation tables.
1513
1514    This kind of search works if all the characters in PAT that have
1515    nontrivial translation are the same aside from the last byte.  This
1516    makes it possible to translate just the last byte of a character,
1517    and do so after just a simple test of the context.
1518
1519    If that criterion is not satisfied, do not call this function.  */
1520
1521 static int
1522 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1523              pos, pos_byte, lim, lim_byte, charset_base)
1524      int n;
1525      unsigned char *base_pat;
1526      int len, len_byte;
1527      Lisp_Object trt;
1528      Lisp_Object inverse_trt;
1529      int pos, pos_byte;
1530      int lim, lim_byte;
1531      int charset_base;
1532 {
1533   int direction = ((n > 0) ? 1 : -1);
1534   register int dirlen;
1535   int infinity, limit, k, stride_for_teases;
1536   register int *BM_tab;
1537   int *BM_tab_base;
1538   register unsigned char *cursor, *p_limit;
1539   register int i, j;
1540   unsigned char *pat, *pat_end;
1541   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1542
1543   unsigned char simple_translate[0400];
1544   int translate_prev_byte;
1545   int translate_anteprev_byte;
1546
1547 #ifdef C_ALLOCA
1548   int BM_tab_space[0400];
1549   BM_tab = &BM_tab_space[0];
1550 #else
1551   BM_tab = (int *) alloca (0400 * sizeof (int));
1552 #endif
1553   /* The general approach is that we are going to maintain that we know */
1554   /* the first (closest to the present position, in whatever direction */
1555   /* we're searching) character that could possibly be the last */
1556   /* (furthest from present position) character of a valid match.  We */
1557   /* advance the state of our knowledge by looking at that character */
1558   /* and seeing whether it indeed matches the last character of the */
1559   /* pattern.  If it does, we take a closer look.  If it does not, we */
1560   /* move our pointer (to putative last characters) as far as is */
1561   /* logically possible.  This amount of movement, which I call a */
1562   /* stride, will be the length of the pattern if the actual character */
1563   /* appears nowhere in the pattern, otherwise it will be the distance */
1564   /* from the last occurrence of that character to the end of the */
1565   /* pattern. */
1566   /* As a coding trick, an enormous stride is coded into the table for */
1567   /* characters that match the last character.  This allows use of only */
1568   /* a single test, a test for having gone past the end of the */
1569   /* permissible match region, to test for both possible matches (when */
1570   /* the stride goes past the end immediately) and failure to */
1571   /* match (where you get nudged past the end one stride at a time). */
1572
1573   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1574   /* is determined only by the last character of the putative match. */
1575   /* If that character does not match, we will stride the proper */
1576   /* distance to propose a match that superimposes it on the last */
1577   /* instance of a character that matches it (per trt), or misses */
1578   /* it entirely if there is none. */
1579
1580   dirlen = len_byte * direction;
1581   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1582
1583   /* Record position after the end of the pattern.  */
1584   pat_end = base_pat + len_byte;
1585   /* BASE_PAT points to a character that we start scanning from.
1586      It is the first character in a forward search,
1587      the last character in a backward search.  */
1588   if (direction < 0)
1589     base_pat = pat_end - 1;
1590
1591   BM_tab_base = BM_tab;
1592   BM_tab += 0400;
1593   j = dirlen;           /* to get it in a register */
1594   /* A character that does not appear in the pattern induces a */
1595   /* stride equal to the pattern length. */
1596   while (BM_tab_base != BM_tab)
1597     {
1598       *--BM_tab = j;
1599       *--BM_tab = j;
1600       *--BM_tab = j;
1601       *--BM_tab = j;
1602     }
1603
1604   /* We use this for translation, instead of TRT itself.
1605      We fill this in to handle the characters that actually
1606      occur in the pattern.  Others don't matter anyway!  */
1607   bzero (simple_translate, sizeof simple_translate);
1608   for (i = 0; i < 0400; i++)
1609     simple_translate[i] = i;
1610
1611   i = 0;
1612   while (i != infinity)
1613     {
1614       unsigned char *ptr = base_pat + i;
1615       i += direction;
1616       if (i == dirlen)
1617         i = infinity;
1618       if (! NILP (trt))
1619         {
1620           int ch;
1621           int untranslated;
1622           int this_translated = 1;
1623
1624           if (multibyte
1625               /* Is *PTR the last byte of a character?  */
1626               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1627             {
1628               unsigned char *charstart = ptr;
1629               while (! CHAR_HEAD_P (*charstart))
1630                 charstart--;
1631               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1632               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1633                 {
1634                   TRANSLATE (ch, trt, untranslated);
1635                   if (! CHAR_HEAD_P (*ptr))
1636                     {
1637                       translate_prev_byte = ptr[-1];
1638                       if (! CHAR_HEAD_P (translate_prev_byte))
1639                         translate_anteprev_byte = ptr[-2];
1640                     }
1641                 }
1642               else
1643                 {
1644                   this_translated = 0;
1645                   ch = *ptr;
1646                 }
1647             }
1648           else if (!multibyte)
1649             TRANSLATE (ch, trt, *ptr);
1650           else
1651             {
1652               ch = *ptr;
1653               this_translated = 0;
1654             }
1655
1656           if (ch > 0400)
1657             j = ((unsigned char) ch) | 0200;
1658           else
1659             j = (unsigned char) ch;
1660
1661           if (i == infinity)
1662             stride_for_teases = BM_tab[j];
1663
1664           BM_tab[j] = dirlen - i;
1665           /* A translation table is accompanied by its inverse -- see */
1666           /* comment following downcase_table for details */
1667           if (this_translated)
1668             {
1669               int starting_ch = ch;
1670               int starting_j = j;
1671               while (1)
1672                 {
1673                   TRANSLATE (ch, inverse_trt, ch);
1674                   if (ch > 0400)
1675                     j = ((unsigned char) ch) | 0200;
1676                   else
1677                     j = (unsigned char) ch;
1678
1679                   /* For all the characters that map into CH,
1680                      set up simple_translate to map the last byte
1681                      into STARTING_J.  */
1682                   simple_translate[j] = starting_j;
1683                   if (ch == starting_ch)
1684                     break;
1685                   BM_tab[j] = dirlen - i;
1686                 }
1687             }
1688         }
1689       else
1690         {
1691           j = *ptr;
1692
1693           if (i == infinity)
1694             stride_for_teases = BM_tab[j];
1695           BM_tab[j] = dirlen - i;
1696         }
1697       /* stride_for_teases tells how much to stride if we get a */
1698       /* match on the far character but are subsequently */
1699       /* disappointed, by recording what the stride would have been */
1700       /* for that character if the last character had been */
1701       /* different. */
1702     }
1703   infinity = dirlen - infinity;
1704   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1705   /* loop invariant - POS_BYTE points at where last char (first
1706      char if reverse) of pattern would align in a possible match.  */
1707   while (n != 0)
1708     {
1709       int tail_end;
1710       unsigned char *tail_end_ptr;
1711
1712       /* It's been reported that some (broken) compiler thinks that
1713          Boolean expressions in an arithmetic context are unsigned.
1714          Using an explicit ?1:0 prevents this.  */
1715       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1716           < 0)
1717         return (n * (0 - direction));
1718       /* First we do the part we can by pointers (maybe nothing) */
1719       QUIT;
1720       pat = base_pat;
1721       limit = pos_byte - dirlen + direction;
1722       if (direction > 0)
1723         {
1724           limit = BUFFER_CEILING_OF (limit);
1725           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1726              can take on without hitting edge of buffer or the gap.  */
1727           limit = min (limit, pos_byte + 20000);
1728           limit = min (limit, lim_byte - 1);
1729         }
1730       else
1731         {
1732           limit = BUFFER_FLOOR_OF (limit);
1733           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1734              can take on without hitting edge of buffer or the gap.  */
1735           limit = max (limit, pos_byte - 20000);
1736           limit = max (limit, lim_byte);
1737         }
1738       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1739       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1740
1741       if ((limit - pos_byte) * direction > 20)
1742         {
1743           unsigned char *p2;
1744
1745           p_limit = BYTE_POS_ADDR (limit);
1746           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1747           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1748           while (1)             /* use one cursor setting as long as i can */
1749             {
1750               if (direction > 0) /* worth duplicating */
1751                 {
1752                   /* Use signed comparison if appropriate
1753                      to make cursor+infinity sure to be > p_limit.
1754                      Assuming that the buffer lies in a range of addresses
1755                      that are all "positive" (as ints) or all "negative",
1756                      either kind of comparison will work as long
1757                      as we don't step by infinity.  So pick the kind
1758                      that works when we do step by infinity.  */
1759                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1760                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1761                       cursor += BM_tab[*cursor];
1762                   else
1763                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1764                       cursor += BM_tab[*cursor];
1765                 }
1766               else
1767                 {
1768                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1769                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1770                       cursor += BM_tab[*cursor];
1771                   else
1772                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1773                       cursor += BM_tab[*cursor];
1774                 }
1775 /* If you are here, cursor is beyond the end of the searched region. */
1776 /* This can happen if you match on the far character of the pattern, */
1777 /* because the "stride" of that character is infinity, a number able */
1778 /* to throw you well beyond the end of the search.  It can also */
1779 /* happen if you fail to match within the permitted region and would */
1780 /* otherwise try a character beyond that region */
1781               if ((cursor - p_limit) * direction <= len_byte)
1782                 break;  /* a small overrun is genuine */
1783               cursor -= infinity; /* large overrun = hit */
1784               i = dirlen - direction;
1785               if (! NILP (trt))
1786                 {
1787                   while ((i -= direction) + direction != 0)
1788                     {
1789                       int ch;
1790                       cursor -= direction;
1791                       /* Translate only the last byte of a character.  */
1792                       if (! multibyte
1793                           || ((cursor == tail_end_ptr
1794                                || CHAR_HEAD_P (cursor[1]))
1795                               && (CHAR_HEAD_P (cursor[0])
1796                                   || (translate_prev_byte == cursor[-1]
1797                                       && (CHAR_HEAD_P (translate_prev_byte)
1798                                           || translate_anteprev_byte == cursor[-2])))))
1799                         ch = simple_translate[*cursor];
1800                       else
1801                         ch = *cursor;
1802                       if (pat[i] != ch)
1803                         break;
1804                     }
1805                 }
1806               else
1807                 {
1808                   while ((i -= direction) + direction != 0)
1809                     {
1810                       cursor -= direction;
1811                       if (pat[i] != *cursor)
1812                         break;
1813                     }
1814                 }
1815               cursor += dirlen - i - direction; /* fix cursor */
1816               if (i + direction == 0)
1817                 {
1818                   int position;
1819
1820                   cursor -= direction;
1821
1822                   position = pos_byte + cursor - p2 + ((direction > 0)
1823                                                        ? 1 - len_byte : 0);
1824                   set_search_regs (position, len_byte);
1825
1826                   if ((n -= direction) != 0)
1827                     cursor += dirlen; /* to resume search */
1828                   else
1829                     return ((direction > 0)
1830                             ? search_regs.end[0] : search_regs.start[0]);
1831                 }
1832               else
1833                 cursor += stride_for_teases; /* <sigh> we lose -  */
1834             }
1835           pos_byte += cursor - p2;
1836         }
1837       else
1838         /* Now we'll pick up a clump that has to be done the hard */
1839         /* way because it covers a discontinuity */
1840         {
1841           limit = ((direction > 0)
1842                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1843                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1844           limit = ((direction > 0)
1845                    ? min (limit + len_byte, lim_byte - 1)
1846                    : max (limit - len_byte, lim_byte));
1847           /* LIMIT is now the last value POS_BYTE can have
1848              and still be valid for a possible match.  */
1849           while (1)
1850             {
1851               /* This loop can be coded for space rather than */
1852               /* speed because it will usually run only once. */
1853               /* (the reach is at most len + 21, and typically */
1854               /* does not exceed len) */
1855               while ((limit - pos_byte) * direction >= 0)
1856                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1857               /* now run the same tests to distinguish going off the */
1858               /* end, a match or a phony match. */
1859               if ((pos_byte - limit) * direction <= len_byte)
1860                 break;  /* ran off the end */
1861               /* Found what might be a match.
1862                  Set POS_BYTE back to last (first if reverse) pos.  */
1863               pos_byte -= infinity;
1864               i = dirlen - direction;
1865               while ((i -= direction) + direction != 0)
1866                 {
1867                   int ch;
1868                   unsigned char *ptr;
1869                   pos_byte -= direction;
1870                   ptr = BYTE_POS_ADDR (pos_byte);
1871                   /* Translate only the last byte of a character.  */
1872                   if (! multibyte
1873                       || ((ptr == tail_end_ptr
1874                            || CHAR_HEAD_P (ptr[1]))
1875                           && (CHAR_HEAD_P (ptr[0])
1876                               || (translate_prev_byte == ptr[-1]
1877                                   && (CHAR_HEAD_P (translate_prev_byte)
1878                                       || translate_anteprev_byte == ptr[-2])))))
1879                     ch = simple_translate[*ptr];
1880                   else
1881                     ch = *ptr;
1882                   if (pat[i] != ch)
1883                     break;
1884                 }
1885               /* Above loop has moved POS_BYTE part or all the way
1886                  back to the first pos (last pos if reverse).
1887                  Set it once again at the last (first if reverse) char.  */
1888               pos_byte += dirlen - i- direction;
1889               if (i + direction == 0)
1890                 {
1891                   int position;
1892                   pos_byte -= direction;
1893
1894                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1895
1896                   set_search_regs (position, len_byte);
1897
1898                   if ((n -= direction) != 0)
1899                     pos_byte += dirlen; /* to resume search */
1900                   else
1901                     return ((direction > 0)
1902                             ? search_regs.end[0] : search_regs.start[0]);
1903                 }
1904               else
1905                 pos_byte += stride_for_teases;
1906             }
1907           }
1908       /* We have done one clump.  Can we continue? */
1909       if ((lim_byte - pos_byte) * direction < 0)
1910         return ((0 - n) * direction);
1911     }
1912   return BYTE_TO_CHAR (pos_byte);
1913 }
1914
1915 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1916    for the overall match just found in the current buffer.
1917    Also clear out the match data for registers 1 and up.  */
1918
1919 static void
1920 set_search_regs (beg_byte, nbytes)
1921      int beg_byte, nbytes;
1922 {
1923   int i;
1924
1925   /* Make sure we have registers in which to store
1926      the match position.  */
1927   if (search_regs.num_regs == 0)
1928     {
1929       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1930       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1931       search_regs.num_regs = 2;
1932     }
1933
1934   /* Clear out the other registers.  */
1935   for (i = 1; i < search_regs.num_regs; i++)
1936     {
1937       search_regs.start[i] = -1;
1938       search_regs.end[i] = -1;
1939     }
1940
1941   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1942   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1943   XSETBUFFER (last_thing_searched, current_buffer);
1944 }
1945 \f
1946 /* Given a string of words separated by word delimiters,
1947   compute a regexp that matches those exact words
1948   separated by arbitrary punctuation.  */
1949
1950 static Lisp_Object
1951 wordify (string)
1952      Lisp_Object string;
1953 {
1954   register unsigned char *p, *o;
1955   register int i, i_byte, len, punct_count = 0, word_count = 0;
1956   Lisp_Object val;
1957   int prev_c = 0;
1958   int adjust;
1959
1960   CHECK_STRING (string, 0);
1961   p = XSTRING (string)->data;
1962   len = XSTRING (string)->size;
1963
1964   for (i = 0, i_byte = 0; i < len; )
1965     {
1966       int c;
1967
1968       if (STRING_MULTIBYTE (string))
1969         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1970       else
1971         c = XSTRING (string)->data[i++];
1972
1973       if (SYNTAX (c) != Sword)
1974         {
1975           punct_count++;
1976           if (i > 0 && SYNTAX (prev_c) == Sword)
1977             word_count++;
1978         }
1979
1980       prev_c = c;
1981     }
1982
1983   if (SYNTAX (prev_c) == Sword)
1984     word_count++;
1985   if (!word_count)
1986     return build_string ("");
1987
1988   adjust = - punct_count + 5 * (word_count - 1) + 4;
1989   if (STRING_MULTIBYTE (string))
1990     val = make_uninit_multibyte_string (len + adjust,
1991                                         STRING_BYTES (XSTRING (string))
1992                                         + adjust);
1993   else
1994     val = make_uninit_string (len + adjust);
1995
1996   o = XSTRING (val)->data;
1997   *o++ = '\\';
1998   *o++ = 'b';
1999   prev_c = 0;
2000
2001   for (i = 0, i_byte = 0; i < len; )
2002     {
2003       int c;
2004       int i_byte_orig = i_byte;
2005
2006       if (STRING_MULTIBYTE (string))
2007         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2008       else
2009         {
2010           c = XSTRING (string)->data[i++];
2011           i_byte++;
2012         }
2013
2014       if (SYNTAX (c) == Sword)
2015         {
2016           bcopy (&XSTRING (string)->data[i_byte_orig], o,
2017                  i_byte - i_byte_orig);
2018           o += i_byte - i_byte_orig;
2019         }
2020       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2021         {
2022           *o++ = '\\';
2023           *o++ = 'W';
2024           *o++ = '\\';
2025           *o++ = 'W';
2026           *o++ = '*';
2027         }
2028
2029       prev_c = c;
2030     }
2031
2032   *o++ = '\\';
2033   *o++ = 'b';
2034
2035   return val;
2036 }
2037 \f
2038 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2039   "MSearch backward: ",
2040   "Search backward from point for STRING.\n\
2041 Set point to the beginning of the occurrence found, and return point.\n\
2042 An optional second argument bounds the search; it is a buffer position.\n\
2043 The match found must not extend before that position.\n\
2044 Optional third argument, if t, means if fail just return nil (no error).\n\
2045  If not nil and not t, position at limit of search and return nil.\n\
2046 Optional fourth argument is repeat count--search for successive occurrences.\n\
2047 See also the functions `match-beginning', `match-end' and `replace-match'.")
2048   (string, bound, noerror, count)
2049      Lisp_Object string, bound, noerror, count;
2050 {
2051   return search_command (string, bound, noerror, count, -1, 0, 0);
2052 }
2053
2054 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2055   "Search forward from point for STRING.\n\
2056 Set point to the end of the occurrence found, and return point.\n\
2057 An optional second argument bounds the search; it is a buffer position.\n\
2058 The match found must not extend after that position.  nil is equivalent\n\
2059   to (point-max).\n\
2060 Optional third argument, if t, means if fail just return nil (no error).\n\
2061   If not nil and not t, move to limit of search and return nil.\n\
2062 Optional fourth argument is repeat count--search for successive occurrences.\n\
2063 See also the functions `match-beginning', `match-end' and `replace-match'.")
2064   (string, bound, noerror, count)
2065      Lisp_Object string, bound, noerror, count;
2066 {
2067   return search_command (string, bound, noerror, count, 1, 0, 0);
2068 }
2069
2070 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2071   "sWord search backward: ",
2072   "Search backward from point for STRING, ignoring differences in punctuation.\n\
2073 Set point to the beginning of the occurrence found, and return point.\n\
2074 An optional second argument bounds the search; it is a buffer position.\n\
2075 The match found must not extend before that position.\n\
2076 Optional third argument, if t, means if fail just return nil (no error).\n\
2077   If not nil and not t, move to limit of search and return nil.\n\
2078 Optional fourth argument is repeat count--search for successive occurrences.")
2079   (string, bound, noerror, count)
2080      Lisp_Object string, bound, noerror, count;
2081 {
2082   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2083 }
2084
2085 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2086   "sWord search: ",
2087   "Search forward from point for STRING, ignoring differences in punctuation.\n\
2088 Set point to the end of the occurrence found, and return point.\n\
2089 An optional second argument bounds the search; it is a buffer position.\n\
2090 The match found must not extend after that position.\n\
2091 Optional third argument, if t, means if fail just return nil (no error).\n\
2092   If not nil and not t, move to limit of search and return nil.\n\
2093 Optional fourth argument is repeat count--search for successive occurrences.")
2094   (string, bound, noerror, count)
2095      Lisp_Object string, bound, noerror, count;
2096 {
2097   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2098 }
2099
2100 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2101   "sRE search backward: ",
2102   "Search backward from point for match for regular expression REGEXP.\n\
2103 Set point to the beginning of the match, and return point.\n\
2104 The match found is the one starting last in the buffer\n\
2105 and yet ending before the origin of the search.\n\
2106 An optional second argument bounds the search; it is a buffer position.\n\
2107 The match found must start at or after that position.\n\
2108 Optional third argument, if t, means if fail just return nil (no error).\n\
2109   If not nil and not t, move to limit of search and return nil.\n\
2110 Optional fourth argument is repeat count--search for successive occurrences.\n\
2111 See also the functions `match-beginning', `match-end' and `replace-match'.")
2112   (regexp, bound, noerror, count)
2113      Lisp_Object regexp, bound, noerror, count;
2114 {
2115   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2116 }
2117
2118 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2119   "sRE search: ",
2120   "Search forward from point for regular expression REGEXP.\n\
2121 Set point to the end of the occurrence found, and return point.\n\
2122 An optional second argument bounds the search; it is a buffer position.\n\
2123 The match found must not extend after that position.\n\
2124 Optional third argument, if t, means if fail just return nil (no error).\n\
2125   If not nil and not t, move to limit of search and return nil.\n\
2126 Optional fourth argument is repeat count--search for successive occurrences.\n\
2127 See also the functions `match-beginning', `match-end' and `replace-match'.")
2128   (regexp, bound, noerror, count)
2129      Lisp_Object regexp, bound, noerror, count;
2130 {
2131   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2132 }
2133
2134 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2135   "sPosix search backward: ",
2136   "Search backward from point for match for regular expression REGEXP.\n\
2137 Find the longest match in accord with Posix regular expression rules.\n\
2138 Set point to the beginning of the match, and return point.\n\
2139 The match found is the one starting last in the buffer\n\
2140 and yet ending before the origin of the search.\n\
2141 An optional second argument bounds the search; it is a buffer position.\n\
2142 The match found must start at or after that position.\n\
2143 Optional third argument, if t, means if fail just return nil (no error).\n\
2144   If not nil and not t, move to limit of search and return nil.\n\
2145 Optional fourth argument is repeat count--search for successive occurrences.\n\
2146 See also the functions `match-beginning', `match-end' and `replace-match'.")
2147   (regexp, bound, noerror, count)
2148      Lisp_Object regexp, bound, noerror, count;
2149 {
2150   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2151 }
2152
2153 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2154   "sPosix search: ",
2155   "Search forward from point for regular expression REGEXP.\n\
2156 Find the longest match in accord with Posix regular expression rules.\n\
2157 Set point to the end of the occurrence found, and return point.\n\
2158 An optional second argument bounds the search; it is a buffer position.\n\
2159 The match found must not extend after that position.\n\
2160 Optional third argument, if t, means if fail just return nil (no error).\n\
2161   If not nil and not t, move to limit of search and return nil.\n\
2162 Optional fourth argument is repeat count--search for successive occurrences.\n\
2163 See also the functions `match-beginning', `match-end' and `replace-match'.")
2164   (regexp, bound, noerror, count)
2165      Lisp_Object regexp, bound, noerror, count;
2166 {
2167   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2168 }
2169 \f
2170 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2171   "Replace text matched by last search with NEWTEXT.\n\
2172 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2173 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2174 based on the replaced text.\n\
2175 If the replaced text has only capital letters\n\
2176 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2177 If the replaced text has at least one word starting with a capital letter,\n\
2178 then capitalize each word in NEWTEXT.\n\n\
2179 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2180 Otherwise treat `\\' as special:\n\
2181   `\\&' in NEWTEXT means substitute original matched text.\n\
2182   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2183        If Nth parens didn't match, substitute nothing.\n\
2184   `\\\\' means insert one `\\'.\n\
2185 FIXEDCASE and LITERAL are optional arguments.\n\
2186 Leaves point at end of replacement text.\n\
2187 \n\
2188 The optional fourth argument STRING can be a string to modify.\n\
2189 In that case, this function creates and returns a new string\n\
2190 which is made by replacing the part of STRING that was matched.\n\
2191 \n\
2192 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2193 It says to replace just that subexpression instead of the whole match.\n\
2194 This is useful only after a regular expression search or match\n\
2195 since only regular expressions have distinguished subexpressions.")
2196   (newtext, fixedcase, literal, string, subexp)
2197      Lisp_Object newtext, fixedcase, literal, string, subexp;
2198 {
2199   enum { nochange, all_caps, cap_initial } case_action;
2200   register int pos, pos_byte;
2201   int some_multiletter_word;
2202   int some_lowercase;
2203   int some_uppercase;
2204   int some_nonuppercase_initial;
2205   register int c, prevc;
2206   int inslen;
2207   int sub;
2208   int opoint, newpoint;
2209
2210   CHECK_STRING (newtext, 0);
2211
2212   if (! NILP (string))
2213     CHECK_STRING (string, 4);
2214
2215   case_action = nochange;       /* We tried an initialization */
2216                                 /* but some C compilers blew it */
2217
2218   if (search_regs.num_regs <= 0)
2219     error ("replace-match called before any match found");
2220
2221   if (NILP (subexp))
2222     sub = 0;
2223   else
2224     {
2225       CHECK_NUMBER (subexp, 3);
2226       sub = XINT (subexp);
2227       if (sub < 0 || sub >= search_regs.num_regs)
2228         args_out_of_range (subexp, make_number (search_regs.num_regs));
2229     }
2230
2231   if (NILP (string))
2232     {
2233       if (search_regs.start[sub] < BEGV
2234           || search_regs.start[sub] > search_regs.end[sub]
2235           || search_regs.end[sub] > ZV)
2236         args_out_of_range (make_number (search_regs.start[sub]),
2237                            make_number (search_regs.end[sub]));
2238     }
2239   else
2240     {
2241       if (search_regs.start[sub] < 0
2242           || search_regs.start[sub] > search_regs.end[sub]
2243           || search_regs.end[sub] > XSTRING (string)->size)
2244         args_out_of_range (make_number (search_regs.start[sub]),
2245                            make_number (search_regs.end[sub]));
2246     }
2247
2248   if (NILP (fixedcase))
2249     {
2250       /* Decide how to casify by examining the matched text. */
2251       int last;
2252
2253       pos = search_regs.start[sub];
2254       last = search_regs.end[sub];
2255
2256       if (NILP (string))
2257         pos_byte = CHAR_TO_BYTE (pos);
2258       else
2259         pos_byte = string_char_to_byte (string, pos);
2260
2261       prevc = '\n';
2262       case_action = all_caps;
2263
2264       /* some_multiletter_word is set nonzero if any original word
2265          is more than one letter long. */
2266       some_multiletter_word = 0;
2267       some_lowercase = 0;
2268       some_nonuppercase_initial = 0;
2269       some_uppercase = 0;
2270
2271       while (pos < last)
2272         {
2273           if (NILP (string))
2274             {
2275               c = FETCH_CHAR (pos_byte);
2276               INC_BOTH (pos, pos_byte);
2277             }
2278           else
2279             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2280
2281           if (LOWERCASEP (c))
2282             {
2283               /* Cannot be all caps if any original char is lower case */
2284
2285               some_lowercase = 1;
2286               if (SYNTAX (prevc) != Sword)
2287                 some_nonuppercase_initial = 1;
2288               else
2289                 some_multiletter_word = 1;
2290             }
2291           else if (!NOCASEP (c))
2292             {
2293               some_uppercase = 1;
2294               if (SYNTAX (prevc) != Sword)
2295                 ;
2296               else
2297                 some_multiletter_word = 1;
2298             }
2299           else
2300             {
2301               /* If the initial is a caseless word constituent,
2302                  treat that like a lowercase initial.  */
2303               if (SYNTAX (prevc) != Sword)
2304                 some_nonuppercase_initial = 1;
2305             }
2306
2307           prevc = c;
2308         }
2309
2310       /* Convert to all caps if the old text is all caps
2311          and has at least one multiletter word.  */
2312       if (! some_lowercase && some_multiletter_word)
2313         case_action = all_caps;
2314       /* Capitalize each word, if the old text has all capitalized words.  */
2315       else if (!some_nonuppercase_initial && some_multiletter_word)
2316         case_action = cap_initial;
2317       else if (!some_nonuppercase_initial && some_uppercase)
2318         /* Should x -> yz, operating on X, give Yz or YZ?
2319            We'll assume the latter.  */
2320         case_action = all_caps;
2321       else
2322         case_action = nochange;
2323     }
2324
2325   /* Do replacement in a string.  */
2326   if (!NILP (string))
2327     {
2328       Lisp_Object before, after;
2329
2330       before = Fsubstring (string, make_number (0),
2331                            make_number (search_regs.start[sub]));
2332       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2333
2334       /* Substitute parts of the match into NEWTEXT
2335          if desired.  */
2336       if (NILP (literal))
2337         {
2338           int lastpos = 0;
2339           int lastpos_byte = 0;
2340           /* We build up the substituted string in ACCUM.  */
2341           Lisp_Object accum;
2342           Lisp_Object middle;
2343           int length = STRING_BYTES (XSTRING (newtext));
2344
2345           accum = Qnil;
2346
2347           for (pos_byte = 0, pos = 0; pos_byte < length;)
2348             {
2349               int substart = -1;
2350               int subend;
2351               int delbackslash = 0;
2352
2353               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2354
2355               if (c == '\\')
2356                 {
2357                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2358                   if (c == '&')
2359                     {
2360                       substart = search_regs.start[sub];
2361                       subend = search_regs.end[sub];
2362                     }
2363                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2364                     {
2365                       if (search_regs.start[c - '0'] >= 0)
2366                         {
2367                           substart = search_regs.start[c - '0'];
2368                           subend = search_regs.end[c - '0'];
2369                         }
2370                     }
2371                   else if (c == '\\')
2372                     delbackslash = 1;
2373                   else
2374                     error ("Invalid use of `\\' in replacement text");
2375                 }
2376               if (substart >= 0)
2377                 {
2378                   if (pos - 2 != lastpos)
2379                     middle = substring_both (newtext, lastpos,
2380                                              lastpos_byte,
2381                                              pos - 2, pos_byte - 2);
2382                   else
2383                     middle = Qnil;
2384                   accum = concat3 (accum, middle,
2385                                    Fsubstring (string,
2386                                                make_number (substart),
2387                                                make_number (subend)));
2388                   lastpos = pos;
2389                   lastpos_byte = pos_byte;
2390                 }
2391               else if (delbackslash)
2392                 {
2393                   middle = substring_both (newtext, lastpos,
2394                                            lastpos_byte,
2395                                            pos - 1, pos_byte - 1);
2396
2397                   accum = concat2 (accum, middle);
2398                   lastpos = pos;
2399                   lastpos_byte = pos_byte;
2400                 }
2401             }
2402
2403           if (pos != lastpos)
2404             middle = substring_both (newtext, lastpos,
2405                                      lastpos_byte,
2406                                      pos, pos_byte);
2407           else
2408             middle = Qnil;
2409
2410           newtext = concat2 (accum, middle);
2411         }
2412
2413       /* Do case substitution in NEWTEXT if desired.  */
2414       if (case_action == all_caps)
2415         newtext = Fupcase (newtext);
2416       else if (case_action == cap_initial)
2417         newtext = Fupcase_initials (newtext);
2418
2419       return concat3 (before, newtext, after);
2420     }
2421
2422   /* Record point, the move (quietly) to the start of the match.  */
2423   if (PT >= search_regs.end[sub])
2424     opoint = PT - ZV;
2425   else if (PT > search_regs.start[sub])
2426     opoint = search_regs.end[sub] - ZV;
2427   else
2428     opoint = PT;
2429
2430   TEMP_SET_PT (search_regs.start[sub]);
2431
2432   /* We insert the replacement text before the old text, and then
2433      delete the original text.  This means that markers at the
2434      beginning or end of the original will float to the corresponding
2435      position in the replacement.  */
2436   if (!NILP (literal))
2437     Finsert_and_inherit (1, &newtext);
2438   else
2439     {
2440       int length = STRING_BYTES (XSTRING (newtext));
2441       unsigned char *substed;
2442       int substed_alloc_size, substed_len;
2443
2444       substed_alloc_size = length * 2 + 100;
2445       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2446       substed_len = 0;
2447
2448       /* Go thru NEWTEXT, producing the actual text to insert in SUBSTED.  */
2449
2450       for (pos_byte = 0, pos = 0; pos_byte < length;)
2451         {
2452           unsigned char str[MAX_MULTIBYTE_LENGTH];
2453           unsigned char *add_stuff;
2454           int add_len;
2455           int idx = -1;
2456
2457           FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2458
2459           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2460              or set IDX to a match index, which means put that part
2461              of the buffer text into SUBSTED.  */
2462
2463           if (c == '\\')
2464             {
2465               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2466               if (c == '&')
2467                 idx = sub;
2468               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2469                 {
2470                   if (search_regs.start[c - '0'] >= 1)
2471                     idx = c - '0';
2472                 }
2473               else if (c == '\\')
2474                 add_len = 1, add_stuff = "\\";
2475               else
2476                 error ("Invalid use of `\\' in replacement text");
2477             }
2478           else
2479             {
2480               add_len = CHAR_STRING (c, str);
2481               add_stuff = str;
2482             }
2483
2484           /* If we want to copy part of a previous match,
2485              set up ADD_STUFF and ADD_LEN to point to it.  */
2486           if (idx >= 0)
2487             {
2488               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2489               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2490               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2491                 move_gap (search_regs.start[idx]);
2492               add_stuff = BYTE_POS_ADDR (begbyte);
2493             }
2494
2495           /* Now the stuff we want to add to SUBSTED
2496              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2497
2498           /* Make sure SUBSTED is big enough.  */
2499           if (substed_len + add_len >= substed_alloc_size)
2500             {
2501               substed_alloc_size = substed_len + add_len + 500;
2502               substed = (unsigned char *) xrealloc (substed,
2503                                                     substed_alloc_size + 1);
2504             }
2505
2506           /* Now add to the end of SUBSTED.  */
2507           bcopy (add_stuff, substed + substed_len, add_len);
2508           substed_len += add_len;
2509         }
2510
2511       /* Now insert what we accumulated.  */
2512       insert_and_inherit (substed, substed_len);
2513
2514       xfree (substed);
2515     }
2516
2517   inslen = PT - (search_regs.start[sub]);
2518   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2519
2520   if (case_action == all_caps)
2521     Fupcase_region (make_number (PT - inslen), make_number (PT));
2522   else if (case_action == cap_initial)
2523     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2524
2525   newpoint = PT;
2526
2527   /* Put point back where it was in the text.  */
2528   if (opoint <= 0)
2529     TEMP_SET_PT (opoint + ZV);
2530   else
2531     TEMP_SET_PT (opoint);
2532
2533   /* Now move point "officially" to the start of the inserted replacement.  */
2534   move_if_not_intangible (newpoint);
2535
2536   return Qnil;
2537 }
2538 \f
2539 static Lisp_Object
2540 match_limit (num, beginningp)
2541      Lisp_Object num;
2542      int beginningp;
2543 {
2544   register int n;
2545
2546   CHECK_NUMBER (num, 0);
2547   n = XINT (num);
2548   if (n < 0 || n >= search_regs.num_regs)
2549     args_out_of_range (num, make_number (search_regs.num_regs));
2550   if (search_regs.num_regs <= 0
2551       || search_regs.start[n] < 0)
2552     return Qnil;
2553   return (make_number ((beginningp) ? search_regs.start[n]
2554                                     : search_regs.end[n]));
2555 }
2556
2557 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2558   "Return position of start of text matched by last search.\n\
2559 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2560   regexp.\n\
2561 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2562   SUBEXP pairs.\n\
2563 Zero means the entire text matched by the whole regexp or whole string.")
2564   (subexp)
2565      Lisp_Object subexp;
2566 {
2567   return match_limit (subexp, 1);
2568 }
2569
2570 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2571   "Return position of end of text matched by last search.\n\
2572 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2573   regexp.\n\
2574 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2575   SUBEXP pairs.\n\
2576 Zero means the entire text matched by the whole regexp or whole string.")
2577   (subexp)
2578      Lisp_Object subexp;
2579 {
2580   return match_limit (subexp, 0);
2581 }
2582
2583 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2584   "Return a list containing all info on what the last search matched.\n\
2585 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2586 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2587 if the last match was on a buffer; integers or nil if a string was matched.\n\
2588 Use `store-match-data' to reinstate the data in this list.\n\
2589 \n\
2590 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2591 \(rather than markers) to represent buffer positions.\n\
2592 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2593 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2594   (integers, reuse)
2595      Lisp_Object integers, reuse;
2596 {
2597   Lisp_Object tail, prev;
2598   Lisp_Object *data;
2599   int i, len;
2600
2601   if (NILP (last_thing_searched))
2602     return Qnil;
2603
2604   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2605                                  * sizeof (Lisp_Object));
2606
2607   len = -1;
2608   for (i = 0; i < search_regs.num_regs; i++)
2609     {
2610       int start = search_regs.start[i];
2611       if (start >= 0)
2612         {
2613           if (EQ (last_thing_searched, Qt)
2614               || ! NILP (integers))
2615             {
2616               XSETFASTINT (data[2 * i], start);
2617               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2618             }
2619           else if (BUFFERP (last_thing_searched))
2620             {
2621               data[2 * i] = Fmake_marker ();
2622               Fset_marker (data[2 * i],
2623                            make_number (start),
2624                            last_thing_searched);
2625               data[2 * i + 1] = Fmake_marker ();
2626               Fset_marker (data[2 * i + 1],
2627                            make_number (search_regs.end[i]),
2628                            last_thing_searched);
2629             }
2630           else
2631             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2632             abort ();
2633
2634           len = i;
2635         }
2636       else
2637         data[2 * i] = data [2 * i + 1] = Qnil;
2638     }
2639
2640   /* If REUSE is not usable, cons up the values and return them.  */
2641   if (! CONSP (reuse))
2642     return Flist (2 * len + 2, data);
2643
2644   /* If REUSE is a list, store as many value elements as will fit
2645      into the elements of REUSE.  */
2646   for (i = 0, tail = reuse; CONSP (tail);
2647        i++, tail = XCDR (tail))
2648     {
2649       if (i < 2 * len + 2)
2650         XCAR (tail) = data[i];
2651       else
2652         XCAR (tail) = Qnil;
2653       prev = tail;
2654     }
2655
2656   /* If we couldn't fit all value elements into REUSE,
2657      cons up the rest of them and add them to the end of REUSE.  */
2658   if (i < 2 * len + 2)
2659     XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2660
2661   return reuse;
2662 }
2663
2664
2665 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2666   "Set internal data on last search match from elements of LIST.\n\
2667 LIST should have been created by calling `match-data' previously.")
2668   (list)
2669      register Lisp_Object list;
2670 {
2671   register int i;
2672   register Lisp_Object marker;
2673
2674   if (running_asynch_code)
2675     save_search_regs ();
2676
2677   if (!CONSP (list) && !NILP (list))
2678     list = wrong_type_argument (Qconsp, list);
2679
2680   /* Unless we find a marker with a buffer in LIST, assume that this
2681      match data came from a string.  */
2682   last_thing_searched = Qt;
2683
2684   /* Allocate registers if they don't already exist.  */
2685   {
2686     int length = XFASTINT (Flength (list)) / 2;
2687
2688     if (length > search_regs.num_regs)
2689       {
2690         if (search_regs.num_regs == 0)
2691           {
2692             search_regs.start
2693               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2694             search_regs.end
2695               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2696           }
2697         else
2698           {
2699             search_regs.start
2700               = (regoff_t *) xrealloc (search_regs.start,
2701                                        length * sizeof (regoff_t));
2702             search_regs.end
2703               = (regoff_t *) xrealloc (search_regs.end,
2704                                        length * sizeof (regoff_t));
2705           }
2706
2707         search_regs.num_regs = length;
2708       }
2709   }
2710
2711   for (i = 0; i < search_regs.num_regs; i++)
2712     {
2713       marker = Fcar (list);
2714       if (NILP (marker))
2715         {
2716           search_regs.start[i] = -1;
2717           list = Fcdr (list);
2718         }
2719       else
2720         {
2721           if (MARKERP (marker))
2722             {
2723               if (XMARKER (marker)->buffer == 0)
2724                 XSETFASTINT (marker, 0);
2725               else
2726                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2727             }
2728
2729           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2730           search_regs.start[i] = XINT (marker);
2731           list = Fcdr (list);
2732
2733           marker = Fcar (list);
2734           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2735             XSETFASTINT (marker, 0);
2736
2737           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2738           search_regs.end[i] = XINT (marker);
2739         }
2740       list = Fcdr (list);
2741     }
2742
2743   return Qnil;
2744 }
2745
2746 /* If non-zero the match data have been saved in saved_search_regs
2747    during the execution of a sentinel or filter. */
2748 static int search_regs_saved;
2749 static struct re_registers saved_search_regs;
2750
2751 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2752    if asynchronous code (filter or sentinel) is running. */
2753 static void
2754 save_search_regs ()
2755 {
2756   if (!search_regs_saved)
2757     {
2758       saved_search_regs.num_regs = search_regs.num_regs;
2759       saved_search_regs.start = search_regs.start;
2760       saved_search_regs.end = search_regs.end;
2761       search_regs.num_regs = 0;
2762       search_regs.start = 0;
2763       search_regs.end = 0;
2764
2765       search_regs_saved = 1;
2766     }
2767 }
2768
2769 /* Called upon exit from filters and sentinels. */
2770 void
2771 restore_match_data ()
2772 {
2773   if (search_regs_saved)
2774     {
2775       if (search_regs.num_regs > 0)
2776         {
2777           xfree (search_regs.start);
2778           xfree (search_regs.end);
2779         }
2780       search_regs.num_regs = saved_search_regs.num_regs;
2781       search_regs.start = saved_search_regs.start;
2782       search_regs.end = saved_search_regs.end;
2783
2784       search_regs_saved = 0;
2785     }
2786 }
2787
2788 /* Quote a string to inactivate reg-expr chars */
2789
2790 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2791   "Return a regexp string which matches exactly STRING and nothing else.")
2792   (string)
2793      Lisp_Object string;
2794 {
2795   register unsigned char *in, *out, *end;
2796   register unsigned char *temp;
2797   int backslashes_added = 0;
2798
2799   CHECK_STRING (string, 0);
2800
2801   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2802
2803   /* Now copy the data into the new string, inserting escapes. */
2804
2805   in = XSTRING (string)->data;
2806   end = in + STRING_BYTES (XSTRING (string));
2807   out = temp;
2808
2809   for (; in != end; in++)
2810     {
2811       if (*in == '[' || *in == ']'
2812           || *in == '*' || *in == '.' || *in == '\\'
2813           || *in == '?' || *in == '+'
2814           || *in == '^' || *in == '$')
2815         *out++ = '\\', backslashes_added++;
2816       *out++ = *in;
2817     }
2818
2819   return make_specified_string (temp,
2820                                 XSTRING (string)->size + backslashes_added,
2821                                 out - temp,
2822                                 STRING_MULTIBYTE (string));
2823 }
2824 \f
2825 void
2826 syms_of_search ()
2827 {
2828   register int i;
2829
2830   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2831     {
2832       searchbufs[i].buf.allocated = 100;
2833       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2834       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2835       searchbufs[i].regexp = Qnil;
2836       staticpro (&searchbufs[i].regexp);
2837       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2838     }
2839   searchbuf_head = &searchbufs[0];
2840
2841   Qsearch_failed = intern ("search-failed");
2842   staticpro (&Qsearch_failed);
2843   Qinvalid_regexp = intern ("invalid-regexp");
2844   staticpro (&Qinvalid_regexp);
2845
2846   Fput (Qsearch_failed, Qerror_conditions,
2847         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2848   Fput (Qsearch_failed, Qerror_message,
2849         build_string ("Search failed"));
2850
2851   Fput (Qinvalid_regexp, Qerror_conditions,
2852         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2853   Fput (Qinvalid_regexp, Qerror_message,
2854         build_string ("Invalid regexp"));
2855
2856   last_thing_searched = Qnil;
2857   staticpro (&last_thing_searched);
2858
2859   defsubr (&Slooking_at);
2860   defsubr (&Sposix_looking_at);
2861   defsubr (&Sstring_match);
2862   defsubr (&Sposix_string_match);
2863   defsubr (&Ssearch_forward);
2864   defsubr (&Ssearch_backward);
2865   defsubr (&Sword_search_forward);
2866   defsubr (&Sword_search_backward);
2867   defsubr (&Sre_search_forward);
2868   defsubr (&Sre_search_backward);
2869   defsubr (&Sposix_search_forward);
2870   defsubr (&Sposix_search_backward);
2871   defsubr (&Sreplace_match);
2872   defsubr (&Smatch_beginning);
2873   defsubr (&Smatch_end);
2874   defsubr (&Smatch_data);
2875   defsubr (&Sset_match_data);
2876   defsubr (&Sregexp_quote);
2877 }