src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #ifdef STDC_HEADERS
  24 #include <stdlib.h>
  25 #endif
  26 #include "lisp.h"
  27 #include "syntax.h"
  28 #include "category.h"
  29 #include "buffer.h"
  30 #include "charset.h"
  31 #include "region-cache.h"
  32 #include "commands.h"
  33 #include "blockinput.h"
  34 #include "intervals.h"
  35
  36 #include <sys/types.h>
  37 #include "regex.h"
  38
  39 #define min(a, b) ((a) < (b) ? (a) : (b))
  40 #define max(a, b) ((a) > (b) ? (a) : (b))
  41
  42 #define REGEXP_CACHE_SIZE 20
  43
  44 /* If the regexp is non-nil, then the buffer contains the compiled form
  45    of that regexp, suitable for searching.  */
  46 struct regexp_cache
  47 {
  48   struct regexp_cache *next;
  49   Lisp_Object regexp;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 static void set_search_regs ();
  92 static void save_search_regs ();
  93 static int simple_search ();
  94 static int boyer_moore ();
  95 static int search_buffer ();
  96
  97 static void
  98 matcher_overflow ()
  99 {
 100   error ("Stack overflow in regexp matcher");
 101 }
 102
 103 #ifdef __STDC__
 104 #define CONST const
 105 #else
 106 #define CONST
 107 #endif
 108
 109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 110    PATTERN is the pattern to compile.
 111    CP is the place to put the result.
 112    TRANSLATE is a translation table for ignoring case, or nil for none.
 113    REGP is the structure that says where to store the "register"
 114    values that will result from matching this pattern.
 115    If it is 0, we should compile the pattern not to record any
 116    subexpression bounds.
 117    POSIX is nonzero if we want full backtracking (POSIX style)
 118    for this pattern.  0 means backtrack only enough to get a valid match.
 119    MULTIBYTE is nonzero if we want to handle multibyte characters in
 120    PATTERN.  0 means all multibyte characters are recognized just as
 121    sequences of binary data.  */
 122
 123 static void
 124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 125      struct regexp_cache *cp;
 126      Lisp_Object pattern;
 127      Lisp_Object translate;
 128      struct re_registers *regp;
 129      int posix;
 130      int multibyte;
 131 {
 132   unsigned char *raw_pattern;
 133   int raw_pattern_size;
 134   char *val;
 135   reg_syntax_t old;
 136
 137   /* MULTIBYTE says whether the text to be searched is multibyte.
 138      We must convert PATTERN to match that, or we will not really
 139      find things right.  */
 140
 141   if (multibyte == STRING_MULTIBYTE (pattern))
 142     {
 143       raw_pattern = (unsigned char *) XSTRING (pattern)->data;
 144       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 145     }
 146   else if (multibyte)
 147     {
 148       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 149                                                   XSTRING (pattern)->size);
 150       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 151       copy_text (XSTRING (pattern)->data, raw_pattern,
 152                  XSTRING (pattern)->size, 0, 1);
 153     }
 154   else
 155     {
 156       /* Converting multibyte to single-byte.
 157
 158          ??? Perhaps this conversion should be done in a special way
 159          by subtracting nonascii-insert-offset from each non-ASCII char,
 160          so that only the multibyte chars which really correspond to
 161          the chosen single-byte character set can possibly match.  */
 162       raw_pattern_size = XSTRING (pattern)->size;
 163       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 164       copy_text (XSTRING (pattern)->data, raw_pattern,
 165                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 166     }
 167
 168   cp->regexp = Qnil;
 169   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 170   cp->posix = posix;
 171   cp->buf.multibyte = multibyte;
 172   BLOCK_INPUT;
 173   old = re_set_syntax (RE_SYNTAX_EMACS
 174                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 175   val = (char *) re_compile_pattern ((char *)raw_pattern,
 176                                      raw_pattern_size, &cp->buf);
 177   re_set_syntax (old);
 178   UNBLOCK_INPUT;
 179   if (val)
 180     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 181
 182   cp->regexp = Fcopy_sequence (pattern);
 183 }
 184
 185 /* Shrink each compiled regexp buffer in the cache
 186    to the size actually used right now.
 187    This is called from garbage collection.  */
 188
 189 void
 190 shrink_regexp_cache ()
 191 {
 192   struct regexp_cache *cp, **cpp;
 193
 194   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 195     {
 196       cp->buf.allocated = cp->buf.used;
 197       cp->buf.buffer
 198         = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
 199     }
 200 }
 201
 202 /* Compile a regexp if necessary, but first check to see if there's one in
 203    the cache.
 204    PATTERN is the pattern to compile.
 205    TRANSLATE is a translation table for ignoring case, or nil for none.
 206    REGP is the structure that says where to store the "register"
 207    values that will result from matching this pattern.
 208    If it is 0, we should compile the pattern not to record any
 209    subexpression bounds.
 210    POSIX is nonzero if we want full backtracking (POSIX style)
 211    for this pattern.  0 means backtrack only enough to get a valid match.  */
 212
 213 struct re_pattern_buffer *
 214 compile_pattern (pattern, regp, translate, posix, multibyte)
 215      Lisp_Object pattern;
 216      struct re_registers *regp;
 217      Lisp_Object translate;
 218      int posix, multibyte;
 219 {
 220   struct regexp_cache *cp, **cpp;
 221
 222   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 223     {
 224       cp = *cpp;
 225       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 226           && !NILP (Fstring_equal (cp->regexp, pattern))
 227           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 228           && cp->posix == posix
 229           && cp->buf.multibyte == multibyte)
 230         break;
 231
 232       /* If we're at the end of the cache, compile into the last cell.  */
 233       if (cp->next == 0)
 234         {
 235           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 236           break;
 237         }
 238     }
 239
 240   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 241      either because we found it in the cache or because we just compiled it.
 242      Move it to the front of the queue to mark it as most recently used.  */
 243   *cpp = cp->next;
 244   cp->next = searchbuf_head;
 245   searchbuf_head = cp;
 246
 247   /* Advise the searching functions about the space we have allocated
 248      for register data.  */
 249   if (regp)
 250     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 251
 252   return &cp->buf;
 253 }
 254
 255 /* Error condition used for failing searches */
 256 Lisp_Object Qsearch_failed;
 257
 258 Lisp_Object
 259 signal_failure (arg)
 260      Lisp_Object arg;
 261 {
 262   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 263   return Qnil;
 264 }
 265 \f
 266 static Lisp_Object
 267 looking_at_1 (string, posix)
 268      Lisp_Object string;
 269      int posix;
 270 {
 271   Lisp_Object val;
 272   unsigned char *p1, *p2;
 273   int s1, s2;
 274   register int i;
 275   struct re_pattern_buffer *bufp;
 276
 277   if (running_asynch_code)
 278     save_search_regs ();
 279
 280   CHECK_STRING (string, 0);
 281   bufp = compile_pattern (string, &search_regs,
 282                           (!NILP (current_buffer->case_fold_search)
 283                            ? DOWNCASE_TABLE : Qnil),
 284                           posix,
 285                           !NILP (current_buffer->enable_multibyte_characters));
 286
 287   immediate_quit = 1;
 288   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 289
 290   /* Get pointers and sizes of the two strings
 291      that make up the visible portion of the buffer. */
 292
 293   p1 = BEGV_ADDR;
 294   s1 = GPT_BYTE - BEGV_BYTE;
 295   p2 = GAP_END_ADDR;
 296   s2 = ZV_BYTE - GPT_BYTE;
 297   if (s1 < 0)
 298     {
 299       p2 = p1;
 300       s2 = ZV_BYTE - BEGV_BYTE;
 301       s1 = 0;
 302     }
 303   if (s2 < 0)
 304     {
 305       s1 = ZV_BYTE - BEGV_BYTE;
 306       s2 = 0;
 307     }
 308
 309   re_match_object = Qnil;
 310
 311   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 312                   PT_BYTE - BEGV_BYTE, &search_regs,
 313                   ZV_BYTE - BEGV_BYTE);
 314   if (i == -2)
 315     matcher_overflow ();
 316
 317   val = (0 <= i ? Qt : Qnil);
 318   if (i >= 0)
 319     for (i = 0; i < search_regs.num_regs; i++)
 320       if (search_regs.start[i] >= 0)
 321         {
 322           search_regs.start[i]
 323             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 324           search_regs.end[i]
 325             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 326         }
 327   XSETBUFFER (last_thing_searched, current_buffer);
 328   immediate_quit = 0;
 329   return val;
 330 }
 331
 332 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 333   "Return t if text after point matches regular expression REGEXP.\n\
 334 This function modifies the match data that `match-beginning',\n\
 335 `match-end' and `match-data' access; save and restore the match\n\
 336 data if you want to preserve them.")
 337   (regexp)
 338      Lisp_Object regexp;
 339 {
 340   return looking_at_1 (regexp, 0);
 341 }
 342
 343 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 344   "Return t if text after point matches regular expression REGEXP.\n\
 345 Find the longest match, in accord with Posix regular expression rules.\n\
 346 This function modifies the match data that `match-beginning',\n\
 347 `match-end' and `match-data' access; save and restore the match\n\
 348 data if you want to preserve them.")
 349   (regexp)
 350      Lisp_Object regexp;
 351 {
 352   return looking_at_1 (regexp, 1);
 353 }
 354 \f
 355 static Lisp_Object
 356 string_match_1 (regexp, string, start, posix)
 357      Lisp_Object regexp, string, start;
 358      int posix;
 359 {
 360   int val;
 361   struct re_pattern_buffer *bufp;
 362   int pos, pos_byte;
 363   int i;
 364
 365   if (running_asynch_code)
 366     save_search_regs ();
 367
 368   CHECK_STRING (regexp, 0);
 369   CHECK_STRING (string, 1);
 370
 371   if (NILP (start))
 372     pos = 0, pos_byte = 0;
 373   else
 374     {
 375       int len = XSTRING (string)->size;
 376
 377       CHECK_NUMBER (start, 2);
 378       pos = XINT (start);
 379       if (pos < 0 && -pos <= len)
 380         pos = len + pos;
 381       else if (0 > pos || pos > len)
 382         args_out_of_range (string, start);
 383       pos_byte = string_char_to_byte (string, pos);
 384     }
 385
 386   bufp = compile_pattern (regexp, &search_regs,
 387                           (!NILP (current_buffer->case_fold_search)
 388                            ? DOWNCASE_TABLE : Qnil),
 389                           posix,
 390                           STRING_MULTIBYTE (string));
 391   immediate_quit = 1;
 392   re_match_object = string;
 393
 394   val = re_search (bufp, (char *) XSTRING (string)->data,
 395                    STRING_BYTES (XSTRING (string)), pos_byte,
 396                    STRING_BYTES (XSTRING (string)) - pos_byte,
 397                    &search_regs);
 398   immediate_quit = 0;
 399   last_thing_searched = Qt;
 400   if (val == -2)
 401     matcher_overflow ();
 402   if (val < 0) return Qnil;
 403
 404   for (i = 0; i < search_regs.num_regs; i++)
 405     if (search_regs.start[i] >= 0)
 406       {
 407         search_regs.start[i]
 408           = string_byte_to_char (string, search_regs.start[i]);
 409         search_regs.end[i]
 410           = string_byte_to_char (string, search_regs.end[i]);
 411       }
 412
 413   return make_number (string_byte_to_char (string, val));
 414 }
 415
 416 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 417   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 418 If third arg START is non-nil, start search at that index in STRING.\n\
 419 For index of first char beyond the match, do (match-end 0).\n\
 420 `match-end' and `match-beginning' also give indices of substrings\n\
 421 matched by parenthesis constructs in the pattern.")
 422   (regexp, string, start)
 423      Lisp_Object regexp, string, start;
 424 {
 425   return string_match_1 (regexp, string, start, 0);
 426 }
 427
 428 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 429   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 430 Find the longest match, in accord with Posix regular expression rules.\n\
 431 If third arg START is non-nil, start search at that index in STRING.\n\
 432 For index of first char beyond the match, do (match-end 0).\n\
 433 `match-end' and `match-beginning' also give indices of substrings\n\
 434 matched by parenthesis constructs in the pattern.")
 435   (regexp, string, start)
 436      Lisp_Object regexp, string, start;
 437 {
 438   return string_match_1 (regexp, string, start, 1);
 439 }
 440
 441 /* Match REGEXP against STRING, searching all of STRING,
 442    and return the index of the match, or negative on failure.
 443    This does not clobber the match data.  */
 444
 445 int
 446 fast_string_match (regexp, string)
 447      Lisp_Object regexp, string;
 448 {
 449   int val;
 450   struct re_pattern_buffer *bufp;
 451
 452   bufp = compile_pattern (regexp, 0, Qnil,
 453                           0, STRING_MULTIBYTE (string));
 454   immediate_quit = 1;
 455   re_match_object = string;
 456
 457   val = re_search (bufp, (char *) XSTRING (string)->data,
 458                    STRING_BYTES (XSTRING (string)), 0,
 459                    STRING_BYTES (XSTRING (string)), 0);
 460   immediate_quit = 0;
 461   return val;
 462 }
 463
 464 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 465    and return the index of the match, or negative on failure.
 466    This does not clobber the match data.
 467    We assume that STRING contains single-byte characters.  */
 468
 469 extern Lisp_Object Vascii_downcase_table;
 470
 471 int
 472 fast_c_string_match_ignore_case (regexp, string)
 473      Lisp_Object regexp;
 474      char *string;
 475 {
 476   int val;
 477   struct re_pattern_buffer *bufp;
 478   int len = strlen (string);
 479
 480   regexp = string_make_unibyte (regexp);
 481   re_match_object = Qt;
 482   bufp = compile_pattern (regexp, 0,
 483                           Vascii_downcase_table, 0,
 484                           0);
 485   immediate_quit = 1;
 486   val = re_search (bufp, string, len, 0, len, 0);
 487   immediate_quit = 0;
 488   return val;
 489 }
 490 \f
 491 /* The newline cache: remembering which sections of text have no newlines.  */
 492
 493 /* If the user has requested newline caching, make sure it's on.
 494    Otherwise, make sure it's off.
 495    This is our cheezy way of associating an action with the change of
 496    state of a buffer-local variable.  */
 497 static void
 498 newline_cache_on_off (buf)
 499      struct buffer *buf;
 500 {
 501   if (NILP (buf->cache_long_line_scans))
 502     {
 503       /* It should be off.  */
 504       if (buf->newline_cache)
 505         {
 506           free_region_cache (buf->newline_cache);
 507           buf->newline_cache = 0;
 508         }
 509     }
 510   else
 511     {
 512       /* It should be on.  */
 513       if (buf->newline_cache == 0)
 514         buf->newline_cache = new_region_cache ();
 515     }
 516 }
 517
 518 \f
 519 /* Search for COUNT instances of the character TARGET between START and END.
 520
 521    If COUNT is positive, search forwards; END must be >= START.
 522    If COUNT is negative, search backwards for the -COUNTth instance;
 523       END must be <= START.
 524    If COUNT is zero, do anything you please; run rogue, for all I care.
 525
 526    If END is zero, use BEGV or ZV instead, as appropriate for the
 527    direction indicated by COUNT.
 528
 529    If we find COUNT instances, set *SHORTAGE to zero, and return the
 530    position after the COUNTth match.  Note that for reverse motion
 531    this is not the same as the usual convention for Emacs motion commands.
 532
 533    If we don't find COUNT instances before reaching END, set *SHORTAGE
 534    to the number of TARGETs left unfound, and return END.
 535
 536    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 537    except when inside redisplay.  */
 538
 539 int
 540 scan_buffer (target, start, end, count, shortage, allow_quit)
 541      register int target;
 542      int start, end;
 543      int count;
 544      int *shortage;
 545      int allow_quit;
 546 {
 547   struct region_cache *newline_cache;
 548   int direction;
 549
 550   if (count > 0)
 551     {
 552       direction = 1;
 553       if (! end) end = ZV;
 554     }
 555   else
 556     {
 557       direction = -1;
 558       if (! end) end = BEGV;
 559     }
 560
 561   newline_cache_on_off (current_buffer);
 562   newline_cache = current_buffer->newline_cache;
 563
 564   if (shortage != 0)
 565     *shortage = 0;
 566
 567   immediate_quit = allow_quit;
 568
 569   if (count > 0)
 570     while (start != end)
 571       {
 572         /* Our innermost scanning loop is very simple; it doesn't know
 573            about gaps, buffer ends, or the newline cache.  ceiling is
 574            the position of the last character before the next such
 575            obstacle --- the last character the dumb search loop should
 576            examine.  */
 577         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 578         int start_byte = CHAR_TO_BYTE (start);
 579         int tem;
 580
 581         /* If we're looking for a newline, consult the newline cache
 582            to see where we can avoid some scanning.  */
 583         if (target == '\n' && newline_cache)
 584           {
 585             int next_change;
 586             immediate_quit = 0;
 587             while (region_cache_forward
 588                    (current_buffer, newline_cache, start_byte, &next_change))
 589               start_byte = next_change;
 590             immediate_quit = allow_quit;
 591
 592             /* START should never be after END.  */
 593             if (start_byte > ceiling_byte)
 594               start_byte = ceiling_byte;
 595
 596             /* Now the text after start is an unknown region, and
 597                next_change is the position of the next known region. */
 598             ceiling_byte = min (next_change - 1, ceiling_byte);
 599           }
 600
 601         /* The dumb loop can only scan text stored in contiguous
 602            bytes. BUFFER_CEILING_OF returns the last character
 603            position that is contiguous, so the ceiling is the
 604            position after that.  */
 605         tem = BUFFER_CEILING_OF (start_byte);
 606         ceiling_byte = min (tem, ceiling_byte);
 607
 608         {
 609           /* The termination address of the dumb loop.  */
 610           register unsigned char *ceiling_addr
 611             = BYTE_POS_ADDR (ceiling_byte) + 1;
 612           register unsigned char *cursor
 613             = BYTE_POS_ADDR (start_byte);
 614           unsigned char *base = cursor;
 615
 616           while (cursor < ceiling_addr)
 617             {
 618               unsigned char *scan_start = cursor;
 619
 620               /* The dumb loop.  */
 621               while (*cursor != target && ++cursor < ceiling_addr)
 622                 ;
 623
 624               /* If we're looking for newlines, cache the fact that
 625                  the region from start to cursor is free of them. */
 626               if (target == '\n' && newline_cache)
 627                 know_region_cache (current_buffer, newline_cache,
 628                                    start_byte + scan_start - base,
 629                                    start_byte + cursor - base);
 630
 631               /* Did we find the target character?  */
 632               if (cursor < ceiling_addr)
 633                 {
 634                   if (--count == 0)
 635                     {
 636                       immediate_quit = 0;
 637                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 638                     }
 639                   cursor++;
 640                 }
 641             }
 642
 643           start = BYTE_TO_CHAR (start_byte + cursor - base);
 644         }
 645       }
 646   else
 647     while (start > end)
 648       {
 649         /* The last character to check before the next obstacle.  */
 650         int ceiling_byte = CHAR_TO_BYTE (end);
 651         int start_byte = CHAR_TO_BYTE (start);
 652         int tem;
 653
 654         /* Consult the newline cache, if appropriate.  */
 655         if (target == '\n' && newline_cache)
 656           {
 657             int next_change;
 658             immediate_quit = 0;
 659             while (region_cache_backward
 660                    (current_buffer, newline_cache, start_byte, &next_change))
 661               start_byte = next_change;
 662             immediate_quit = allow_quit;
 663
 664             /* Start should never be at or before end.  */
 665             if (start_byte <= ceiling_byte)
 666               start_byte = ceiling_byte + 1;
 667
 668             /* Now the text before start is an unknown region, and
 669                next_change is the position of the next known region. */
 670             ceiling_byte = max (next_change, ceiling_byte);
 671           }
 672
 673         /* Stop scanning before the gap.  */
 674         tem = BUFFER_FLOOR_OF (start_byte - 1);
 675         ceiling_byte = max (tem, ceiling_byte);
 676
 677         {
 678           /* The termination address of the dumb loop.  */
 679           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 680           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 681           unsigned char *base = cursor;
 682
 683           while (cursor >= ceiling_addr)
 684             {
 685               unsigned char *scan_start = cursor;
 686
 687               while (*cursor != target && --cursor >= ceiling_addr)
 688                 ;
 689
 690               /* If we're looking for newlines, cache the fact that
 691                  the region from after the cursor to start is free of them.  */
 692               if (target == '\n' && newline_cache)
 693                 know_region_cache (current_buffer, newline_cache,
 694                                    start_byte + cursor - base,
 695                                    start_byte + scan_start - base);
 696
 697               /* Did we find the target character?  */
 698               if (cursor >= ceiling_addr)
 699                 {
 700                   if (++count >= 0)
 701                     {
 702                       immediate_quit = 0;
 703                       return BYTE_TO_CHAR (start_byte + cursor - base);
 704                     }
 705                   cursor--;
 706                 }
 707             }
 708
 709           start = BYTE_TO_CHAR (start_byte + cursor - base);
 710         }
 711       }
 712
 713   immediate_quit = 0;
 714   if (shortage != 0)
 715     *shortage = count * direction;
 716   return start;
 717 }
 718 \f
 719 /* Search for COUNT instances of a line boundary, which means either a
 720    newline or (if selective display enabled) a carriage return.
 721    Start at START.  If COUNT is negative, search backwards.
 722
 723    We report the resulting position by calling TEMP_SET_PT_BOTH.
 724
 725    If we find COUNT instances. we position after (always after,
 726    even if scanning backwards) the COUNTth match, and return 0.
 727
 728    If we don't find COUNT instances before reaching the end of the
 729    buffer (or the beginning, if scanning backwards), we return
 730    the number of line boundaries left unfound, and position at
 731    the limit we bumped up against.
 732
 733    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 734    except in special cases.  */
 735
 736 int
 737 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 738      int start, start_byte;
 739      int limit, limit_byte;
 740      register int count;
 741      int allow_quit;
 742 {
 743   int direction = ((count > 0) ? 1 : -1);
 744
 745   register unsigned char *cursor;
 746   unsigned char *base;
 747
 748   register int ceiling;
 749   register unsigned char *ceiling_addr;
 750
 751   int old_immediate_quit = immediate_quit;
 752
 753   /* If we are not in selective display mode,
 754      check only for newlines.  */
 755   int selective_display = (!NILP (current_buffer->selective_display)
 756                            && !INTEGERP (current_buffer->selective_display));
 757
 758   /* The code that follows is like scan_buffer
 759      but checks for either newline or carriage return.  */
 760
 761   if (allow_quit)
 762     immediate_quit++;
 763
 764   start_byte = CHAR_TO_BYTE (start);
 765
 766   if (count > 0)
 767     {
 768       while (start_byte < limit_byte)
 769         {
 770           ceiling =  BUFFER_CEILING_OF (start_byte);
 771           ceiling = min (limit_byte - 1, ceiling);
 772           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 773           base = (cursor = BYTE_POS_ADDR (start_byte));
 774           while (1)
 775             {
 776               while (*cursor != '\n' && ++cursor != ceiling_addr)
 777                 ;
 778
 779               if (cursor != ceiling_addr)
 780                 {
 781                   if (--count == 0)
 782                     {
 783                       immediate_quit = old_immediate_quit;
 784                       start_byte = start_byte + cursor - base + 1;
 785                       start = BYTE_TO_CHAR (start_byte);
 786                       TEMP_SET_PT_BOTH (start, start_byte);
 787                       return 0;
 788                     }
 789                   else
 790                     if (++cursor == ceiling_addr)
 791                       break;
 792                 }
 793               else
 794                 break;
 795             }
 796           start_byte += cursor - base;
 797         }
 798     }
 799   else
 800     {
 801       while (start_byte > limit_byte)
 802         {
 803           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 804           ceiling = max (limit_byte, ceiling);
 805           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 806           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 807           while (1)
 808             {
 809               while (--cursor != ceiling_addr && *cursor != '\n')
 810                 ;
 811
 812               if (cursor != ceiling_addr)
 813                 {
 814                   if (++count == 0)
 815                     {
 816                       immediate_quit = old_immediate_quit;
 817                       /* Return the position AFTER the match we found.  */
 818                       start_byte = start_byte + cursor - base + 1;
 819                       start = BYTE_TO_CHAR (start_byte);
 820                       TEMP_SET_PT_BOTH (start, start_byte);
 821                       return 0;
 822                     }
 823                 }
 824               else
 825                 break;
 826             }
 827           /* Here we add 1 to compensate for the last decrement
 828              of CURSOR, which took it past the valid range.  */
 829           start_byte += cursor - base + 1;
 830         }
 831     }
 832
 833   TEMP_SET_PT_BOTH (limit, limit_byte);
 834   immediate_quit = old_immediate_quit;
 835
 836   return count * direction;
 837 }
 838
 839 int
 840 find_next_newline_no_quit (from, cnt)
 841      register int from, cnt;
 842 {
 843   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 844 }
 845
 846 /* Like find_next_newline, but returns position before the newline,
 847    not after, and only search up to TO.  This isn't just
 848    find_next_newline (...)-1, because you might hit TO.  */
 849
 850 int
 851 find_before_next_newline (from, to, cnt)
 852      int from, to, cnt;
 853 {
 854   int shortage;
 855   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 856
 857   if (shortage == 0)
 858     pos--;
 859
 860   return pos;
 861 }
 862 \f
 863 /* Subroutines of Lisp buffer search functions. */
 864
 865 static Lisp_Object
 866 search_command (string, bound, noerror, count, direction, RE, posix)
 867      Lisp_Object string, bound, noerror, count;
 868      int direction;
 869      int RE;
 870      int posix;
 871 {
 872   register int np;
 873   int lim, lim_byte;
 874   int n = direction;
 875
 876   if (!NILP (count))
 877     {
 878       CHECK_NUMBER (count, 3);
 879       n *= XINT (count);
 880     }
 881
 882   CHECK_STRING (string, 0);
 883   if (NILP (bound))
 884     {
 885       if (n > 0)
 886         lim = ZV, lim_byte = ZV_BYTE;
 887       else
 888         lim = BEGV, lim_byte = BEGV_BYTE;
 889     }
 890   else
 891     {
 892       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 893       lim = XINT (bound);
 894       if (n > 0 ? lim < PT : lim > PT)
 895         error ("Invalid search bound (wrong side of point)");
 896       if (lim > ZV)
 897         lim = ZV, lim_byte = ZV_BYTE;
 898       else if (lim < BEGV)
 899         lim = BEGV, lim_byte = BEGV_BYTE;
 900       else
 901         lim_byte = CHAR_TO_BYTE (lim);
 902     }
 903
 904   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 905                       (!NILP (current_buffer->case_fold_search)
 906                        ? current_buffer->case_canon_table
 907                        : Qnil),
 908                       (!NILP (current_buffer->case_fold_search)
 909                        ? current_buffer->case_eqv_table
 910                        : Qnil),
 911                       posix);
 912   if (np <= 0)
 913     {
 914       if (NILP (noerror))
 915         return signal_failure (string);
 916       if (!EQ (noerror, Qt))
 917         {
 918           if (lim < BEGV || lim > ZV)
 919             abort ();
 920           SET_PT_BOTH (lim, lim_byte);
 921           return Qnil;
 922 #if 0 /* This would be clean, but maybe programs depend on
 923          a value of nil here.  */
 924           np = lim;
 925 #endif
 926         }
 927       else
 928         return Qnil;
 929     }
 930
 931   if (np < BEGV || np > ZV)
 932     abort ();
 933
 934   SET_PT (np);
 935
 936   return make_number (np);
 937 }
 938 \f
 939 /* Return 1 if REGEXP it matches just one constant string.  */
 940
 941 static int
 942 trivial_regexp_p (regexp)
 943      Lisp_Object regexp;
 944 {
 945   int len = STRING_BYTES (XSTRING (regexp));
 946   unsigned char *s = XSTRING (regexp)->data;
 947   unsigned char c;
 948   while (--len >= 0)
 949     {
 950       switch (*s++)
 951         {
 952         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 953           return 0;
 954         case '\\':
 955           if (--len < 0)
 956             return 0;
 957           switch (*s++)
 958             {
 959             case '|': case '(': case ')': case '`': case '\'': case 'b':
 960             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 961             case 'S': case '=':
 962             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 963             case '1': case '2': case '3': case '4': case '5':
 964             case '6': case '7': case '8': case '9':
 965               return 0;
 966             }
 967         }
 968     }
 969   return 1;
 970 }
 971
 972 /* Search for the n'th occurrence of STRING in the current buffer,
 973    starting at position POS and stopping at position LIM,
 974    treating STRING as a literal string if RE is false or as
 975    a regular expression if RE is true.
 976
 977    If N is positive, searching is forward and LIM must be greater than POS.
 978    If N is negative, searching is backward and LIM must be less than POS.
 979
 980    Returns -x if x occurrences remain to be found (x > 0),
 981    or else the position at the beginning of the Nth occurrence
 982    (if searching backward) or the end (if searching forward).
 983
 984    POSIX is nonzero if we want full backtracking (POSIX style)
 985    for this pattern.  0 means backtrack only enough to get a valid match.  */
 986
 987 #define TRANSLATE(out, trt, d)                  \
 988 do                                              \
 989   {                                             \
 990     if (! NILP (trt))                           \
 991       {                                         \
 992         Lisp_Object temp;                       \
 993         temp = Faref (trt, make_number (d));    \
 994         if (INTEGERP (temp))                    \
 995           out = XINT (temp);                    \
 996         else                                    \
 997           out = d;                              \
 998       }                                         \
 999     else                                        \
1000       out = d;                                  \
1001   }                                             \
1002 while (0)
1003
1004 static int
1005 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1006                RE, trt, inverse_trt, posix)
1007      Lisp_Object string;
1008      int pos;
1009      int pos_byte;
1010      int lim;
1011      int lim_byte;
1012      int n;
1013      int RE;
1014      Lisp_Object trt;
1015      Lisp_Object inverse_trt;
1016      int posix;
1017 {
1018   int len = XSTRING (string)->size;
1019   int len_byte = STRING_BYTES (XSTRING (string));
1020   register int i;
1021
1022   if (running_asynch_code)
1023     save_search_regs ();
1024
1025   /* Searching 0 times means don't move.  */
1026   /* Null string is found at starting position.  */
1027   if (len == 0 || n == 0)
1028     {
1029       set_search_regs (pos, 0);
1030       return pos;
1031     }
1032
1033   if (RE && !trivial_regexp_p (string))
1034     {
1035       unsigned char *p1, *p2;
1036       int s1, s2;
1037       struct re_pattern_buffer *bufp;
1038
1039       bufp = compile_pattern (string, &search_regs, trt, posix,
1040                               !NILP (current_buffer->enable_multibyte_characters));
1041
1042       immediate_quit = 1;       /* Quit immediately if user types ^G,
1043                                    because letting this function finish
1044                                    can take too long. */
1045       QUIT;                     /* Do a pending quit right away,
1046                                    to avoid paradoxical behavior */
1047       /* Get pointers and sizes of the two strings
1048          that make up the visible portion of the buffer. */
1049
1050       p1 = BEGV_ADDR;
1051       s1 = GPT_BYTE - BEGV_BYTE;
1052       p2 = GAP_END_ADDR;
1053       s2 = ZV_BYTE - GPT_BYTE;
1054       if (s1 < 0)
1055         {
1056           p2 = p1;
1057           s2 = ZV_BYTE - BEGV_BYTE;
1058           s1 = 0;
1059         }
1060       if (s2 < 0)
1061         {
1062           s1 = ZV_BYTE - BEGV_BYTE;
1063           s2 = 0;
1064         }
1065       re_match_object = Qnil;
1066
1067       while (n < 0)
1068         {
1069           int val;
1070           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1071                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1072                              &search_regs,
1073                              /* Don't allow match past current point */
1074                              pos_byte - BEGV_BYTE);
1075           if (val == -2)
1076             {
1077               matcher_overflow ();
1078             }
1079           if (val >= 0)
1080             {
1081               pos_byte = search_regs.start[0] + BEGV_BYTE;
1082               for (i = 0; i < search_regs.num_regs; i++)
1083                 if (search_regs.start[i] >= 0)
1084                   {
1085                     search_regs.start[i]
1086                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1087                     search_regs.end[i]
1088                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1089                   }
1090               XSETBUFFER (last_thing_searched, current_buffer);
1091               /* Set pos to the new position. */
1092               pos = search_regs.start[0];
1093             }
1094           else
1095             {
1096               immediate_quit = 0;
1097               return (n);
1098             }
1099           n++;
1100         }
1101       while (n > 0)
1102         {
1103           int val;
1104           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106                              &search_regs,
1107                              lim_byte - BEGV_BYTE);
1108           if (val == -2)
1109             {
1110               matcher_overflow ();
1111             }
1112           if (val >= 0)
1113             {
1114               pos_byte = search_regs.end[0] + BEGV_BYTE;
1115               for (i = 0; i < search_regs.num_regs; i++)
1116                 if (search_regs.start[i] >= 0)
1117                   {
1118                     search_regs.start[i]
1119                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1120                     search_regs.end[i]
1121                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1122                   }
1123               XSETBUFFER (last_thing_searched, current_buffer);
1124               pos = search_regs.end[0];
1125             }
1126           else
1127             {
1128               immediate_quit = 0;
1129               return (0 - n);
1130             }
1131           n--;
1132         }
1133       immediate_quit = 0;
1134       return (pos);
1135     }
1136   else                          /* non-RE case */
1137     {
1138       unsigned char *raw_pattern, *pat;
1139       int raw_pattern_size;
1140       int raw_pattern_size_byte;
1141       unsigned char *patbuf;
1142       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1143       unsigned char *base_pat = XSTRING (string)->data;
1144       int charset_base = -1;
1145       int simple = 1;
1146
1147       /* MULTIBYTE says whether the text to be searched is multibyte.
1148          We must convert PATTERN to match that, or we will not really
1149          find things right.  */
1150
1151       if (multibyte == STRING_MULTIBYTE (string))
1152         {
1153           raw_pattern = (unsigned char *) XSTRING (string)->data;
1154           raw_pattern_size = XSTRING (string)->size;
1155           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1156         }
1157       else if (multibyte)
1158         {
1159           raw_pattern_size = XSTRING (string)->size;
1160           raw_pattern_size_byte
1161             = count_size_as_multibyte (XSTRING (string)->data,
1162                                        raw_pattern_size);
1163           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1164           copy_text (XSTRING (string)->data, raw_pattern,
1165                      XSTRING (string)->size, 0, 1);
1166         }
1167       else
1168         {
1169           /* Converting multibyte to single-byte.
1170
1171              ??? Perhaps this conversion should be done in a special way
1172              by subtracting nonascii-insert-offset from each non-ASCII char,
1173              so that only the multibyte chars which really correspond to
1174              the chosen single-byte character set can possibly match.  */
1175           raw_pattern_size = XSTRING (string)->size;
1176           raw_pattern_size_byte = XSTRING (string)->size;
1177           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1178           copy_text (XSTRING (string)->data, raw_pattern,
1179                      STRING_BYTES (XSTRING (string)), 1, 0);
1180         }
1181
1182       /* Copy and optionally translate the pattern.  */
1183       len = raw_pattern_size;
1184       len_byte = raw_pattern_size_byte;
1185       patbuf = (unsigned char *) alloca (len_byte);
1186       pat = patbuf;
1187       base_pat = raw_pattern;
1188       if (multibyte)
1189         {
1190           while (--len >= 0)
1191             {
1192               unsigned char workbuf[4], *str;
1193               int c, translated, inverse;
1194               int in_charlen, charlen;
1195
1196               /* If we got here and the RE flag is set, it's because we're
1197                  dealing with a regexp known to be trivial, so the backslash
1198                  just quotes the next character.  */
1199               if (RE && *base_pat == '\\')
1200                 {
1201                   len--;
1202                   len_byte--;
1203                   base_pat++;
1204                 }
1205
1206               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1207               /* Translate the character, if requested.  */
1208               TRANSLATE (translated, trt, c);
1209               /* If translation changed the byte-length, go back
1210                  to the original character.  */
1211               charlen = CHAR_STRING (translated, workbuf, str);
1212               if (in_charlen != charlen)
1213                 {
1214                   translated = c;
1215                   charlen = CHAR_STRING (c, workbuf, str);
1216                 }
1217
1218               TRANSLATE (inverse, inverse_trt, c);
1219
1220               /* Did this char actually get translated?
1221                  Would any other char get translated into it?  */
1222               if (translated != c || inverse != c)
1223                 {
1224                   /* Keep track of which character set row
1225                      contains the characters that need translation.  */
1226                   int charset_base_code = c & ~0xff;
1227                   if (charset_base == -1)
1228                     charset_base = charset_base_code;
1229                   else if (charset_base != charset_base_code)
1230                     /* If two different rows appear, needing translation,
1231                        then we cannot use boyer_moore search.  */
1232                     simple = 0;
1233                     /* ??? Handa: this must do simple = 0
1234                        if c is a composite character.  */
1235                 }
1236
1237               /* Store this character into the translated pattern.  */
1238               bcopy (str, pat, charlen);
1239               pat += charlen;
1240               base_pat += in_charlen;
1241               len_byte -= in_charlen;
1242             }
1243         }
1244       else
1245         {
1246           while (--len >= 0)
1247             {
1248               int c, translated, inverse;
1249
1250               /* If we got here and the RE flag is set, it's because we're
1251                  dealing with a regexp known to be trivial, so the backslash
1252                  just quotes the next character.  */
1253               if (RE && *base_pat == '\\')
1254                 {
1255                   len--;
1256                   base_pat++;
1257                 }
1258               c = *base_pat++;
1259               TRANSLATE (translated, trt, c);
1260               TRANSLATE (inverse, inverse_trt, c);
1261
1262               /* Did this char actually get translated?
1263                  Would any other char get translated into it?  */
1264               if (translated != c || inverse != c)
1265                 {
1266                   /* Keep track of which character set row
1267                      contains the characters that need translation.  */
1268                   int charset_base_code = c & ~0xff;
1269                   if (charset_base == -1)
1270                     charset_base = charset_base_code;
1271                   else if (charset_base != charset_base_code)
1272                     /* If two different rows appear, needing translation,
1273                        then we cannot use boyer_moore search.  */
1274                     simple = 0;
1275                 }
1276               *pat++ = translated;
1277             }
1278         }
1279
1280       len_byte = pat - patbuf;
1281       len = raw_pattern_size;
1282       pat = base_pat = patbuf;
1283
1284       if (simple)
1285         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1286                             pos, pos_byte, lim, lim_byte,
1287                             charset_base);
1288       else
1289         return simple_search (n, pat, len, len_byte, trt,
1290                               pos, pos_byte, lim, lim_byte);
1291     }
1292 }
1293 \f
1294 /* Do a simple string search N times for the string PAT,
1295    whose length is LEN/LEN_BYTE,
1296    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1297    TRT is the translation table.
1298
1299    Return the character position where the match is found.
1300    Otherwise, if M matches remained to be found, return -M.
1301
1302    This kind of search works regardless of what is in PAT and
1303    regardless of what is in TRT.  It is used in cases where
1304    boyer_moore cannot work.  */
1305
1306 static int
1307 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1308      int n;
1309      unsigned char *pat;
1310      int len, len_byte;
1311      Lisp_Object trt;
1312      int pos, pos_byte;
1313      int lim, lim_byte;
1314 {
1315   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1316   int forward = n > 0;
1317
1318   if (lim > pos && multibyte)
1319     while (n > 0)
1320       {
1321         while (1)
1322           {
1323             /* Try matching at position POS.  */
1324             int this_pos = pos;
1325             int this_pos_byte = pos_byte;
1326             int this_len = len;
1327             int this_len_byte = len_byte;
1328             unsigned char *p = pat;
1329             if (pos + len > lim)
1330               goto stop;
1331
1332             while (this_len > 0)
1333               {
1334                 int charlen, buf_charlen;
1335                 int pat_ch, buf_ch;
1336
1337                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1338                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1339                                                  ZV_BYTE - this_pos_byte,
1340                                                  buf_charlen);
1341                 TRANSLATE (buf_ch, trt, buf_ch);
1342
1343                 if (buf_ch != pat_ch)
1344                   break;
1345
1346                 this_len_byte -= charlen;
1347                 this_len--;
1348                 p += charlen;
1349
1350                 this_pos_byte += buf_charlen;
1351                 this_pos++;
1352               }
1353
1354             if (this_len == 0)
1355               {
1356                 pos += len;
1357                 pos_byte += len_byte;
1358                 break;
1359               }
1360
1361             INC_BOTH (pos, pos_byte);
1362           }
1363
1364         n--;
1365       }
1366   else if (lim > pos)
1367     while (n > 0)
1368       {
1369         while (1)
1370           {
1371             /* Try matching at position POS.  */
1372             int this_pos = pos;
1373             int this_len = len;
1374             unsigned char *p = pat;
1375
1376             if (pos + len > lim)
1377               goto stop;
1378
1379             while (this_len > 0)
1380               {
1381                 int pat_ch = *p++;
1382                 int buf_ch = FETCH_BYTE (this_pos);
1383                 TRANSLATE (buf_ch, trt, buf_ch);
1384
1385                 if (buf_ch != pat_ch)
1386                   break;
1387
1388                 this_len--;
1389                 this_pos++;
1390               }
1391
1392             if (this_len == 0)
1393               {
1394                 pos += len;
1395                 break;
1396               }
1397
1398             pos++;
1399           }
1400
1401         n--;
1402       }
1403   /* Backwards search.  */
1404   else if (lim < pos && multibyte)
1405     while (n < 0)
1406       {
1407         while (1)
1408           {
1409             /* Try matching at position POS.  */
1410             int this_pos = pos - len;
1411             int this_pos_byte = pos_byte - len_byte;
1412             int this_len = len;
1413             int this_len_byte = len_byte;
1414             unsigned char *p = pat;
1415
1416             if (pos - len < lim)
1417               goto stop;
1418
1419             while (this_len > 0)
1420               {
1421                 int charlen, buf_charlen;
1422                 int pat_ch, buf_ch;
1423
1424                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1425                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1426                                                  ZV_BYTE - this_pos_byte,
1427                                                  buf_charlen);
1428                 TRANSLATE (buf_ch, trt, buf_ch);
1429
1430                 if (buf_ch != pat_ch)
1431                   break;
1432
1433                 this_len_byte -= charlen;
1434                 this_len--;
1435                 p += charlen;
1436                 this_pos_byte += buf_charlen;
1437                 this_pos++;
1438               }
1439
1440             if (this_len == 0)
1441               {
1442                 pos -= len;
1443                 pos_byte -= len_byte;
1444                 break;
1445               }
1446
1447             DEC_BOTH (pos, pos_byte);
1448           }
1449
1450         n++;
1451       }
1452   else if (lim < pos)
1453     while (n < 0)
1454       {
1455         while (1)
1456           {
1457             /* Try matching at position POS.  */
1458             int this_pos = pos - len;
1459             int this_len = len;
1460             unsigned char *p = pat;
1461
1462             if (pos - len < lim)
1463               goto stop;
1464
1465             while (this_len > 0)
1466               {
1467                 int pat_ch = *p++;
1468                 int buf_ch = FETCH_BYTE (this_pos);
1469                 TRANSLATE (buf_ch, trt, buf_ch);
1470
1471                 if (buf_ch != pat_ch)
1472                   break;
1473                 this_len--;
1474                 this_pos++;
1475               }
1476
1477             if (this_len == 0)
1478               {
1479                 pos -= len;
1480                 break;
1481               }
1482
1483             pos--;
1484           }
1485
1486         n++;
1487       }
1488
1489  stop:
1490   if (n == 0)
1491     {
1492       if (forward)
1493         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1494       else
1495         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1496
1497       return pos;
1498     }
1499   else if (n > 0)
1500     return -n;
1501   else
1502     return n;
1503 }
1504 \f
1505 /* Do Boyer-Moore search N times for the string PAT,
1506    whose length is LEN/LEN_BYTE,
1507    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1508    DIRECTION says which direction we search in.
1509    TRT and INVERSE_TRT are translation tables.
1510
1511    This kind of search works if all the characters in PAT that have
1512    nontrivial translation are the same aside from the last byte.  This
1513    makes it possible to translate just the last byte of a character,
1514    and do so after just a simple test of the context.
1515
1516    If that criterion is not satisfied, do not call this function.  */
1517
1518 static int
1519 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1520              pos, pos_byte, lim, lim_byte, charset_base)
1521      int n;
1522      unsigned char *base_pat;
1523      int len, len_byte;
1524      Lisp_Object trt;
1525      Lisp_Object inverse_trt;
1526      int pos, pos_byte;
1527      int lim, lim_byte;
1528      int charset_base;
1529 {
1530   int direction = ((n > 0) ? 1 : -1);
1531   register int dirlen;
1532   int infinity, limit, k, stride_for_teases;
1533   register int *BM_tab;
1534   int *BM_tab_base;
1535   register unsigned char *cursor, *p_limit;
1536   register int i, j;
1537   unsigned char *pat, *pat_end;
1538   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1539
1540   unsigned char simple_translate[0400];
1541   int translate_prev_byte;
1542   int translate_anteprev_byte;
1543
1544 #ifdef C_ALLOCA
1545   int BM_tab_space[0400];
1546   BM_tab = &BM_tab_space[0];
1547 #else
1548   BM_tab = (int *) alloca (0400 * sizeof (int));
1549 #endif
1550   /* The general approach is that we are going to maintain that we know */
1551   /* the first (closest to the present position, in whatever direction */
1552   /* we're searching) character that could possibly be the last */
1553   /* (furthest from present position) character of a valid match.  We */
1554   /* advance the state of our knowledge by looking at that character */
1555   /* and seeing whether it indeed matches the last character of the */
1556   /* pattern.  If it does, we take a closer look.  If it does not, we */
1557   /* move our pointer (to putative last characters) as far as is */
1558   /* logically possible.  This amount of movement, which I call a */
1559   /* stride, will be the length of the pattern if the actual character */
1560   /* appears nowhere in the pattern, otherwise it will be the distance */
1561   /* from the last occurrence of that character to the end of the */
1562   /* pattern. */
1563   /* As a coding trick, an enormous stride is coded into the table for */
1564   /* characters that match the last character.  This allows use of only */
1565   /* a single test, a test for having gone past the end of the */
1566   /* permissible match region, to test for both possible matches (when */
1567   /* the stride goes past the end immediately) and failure to */
1568   /* match (where you get nudged past the end one stride at a time). */
1569
1570   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1571   /* is determined only by the last character of the putative match. */
1572   /* If that character does not match, we will stride the proper */
1573   /* distance to propose a match that superimposes it on the last */
1574   /* instance of a character that matches it (per trt), or misses */
1575   /* it entirely if there is none. */
1576
1577   dirlen = len_byte * direction;
1578   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1579
1580   /* Record position after the end of the pattern.  */
1581   pat_end = base_pat + len_byte;
1582   /* BASE_PAT points to a character that we start scanning from.
1583      It is the first character in a forward search,
1584      the last character in a backward search.  */
1585   if (direction < 0)
1586     base_pat = pat_end - 1;
1587
1588   BM_tab_base = BM_tab;
1589   BM_tab += 0400;
1590   j = dirlen;           /* to get it in a register */
1591   /* A character that does not appear in the pattern induces a */
1592   /* stride equal to the pattern length. */
1593   while (BM_tab_base != BM_tab)
1594     {
1595       *--BM_tab = j;
1596       *--BM_tab = j;
1597       *--BM_tab = j;
1598       *--BM_tab = j;
1599     }
1600
1601   /* We use this for translation, instead of TRT itself.
1602      We fill this in to handle the characters that actually
1603      occur in the pattern.  Others don't matter anyway!  */
1604   bzero (simple_translate, sizeof simple_translate);
1605   for (i = 0; i < 0400; i++)
1606     simple_translate[i] = i;
1607
1608   i = 0;
1609   while (i != infinity)
1610     {
1611       unsigned char *ptr = base_pat + i;
1612       i += direction;
1613       if (i == dirlen)
1614         i = infinity;
1615       if (! NILP (trt))
1616         {
1617           int ch;
1618           int untranslated;
1619           int this_translated = 1;
1620
1621           if (multibyte
1622               /* Is *PTR the last byte of a character?  */
1623               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1624             {
1625               unsigned char *charstart = ptr;
1626               while (! CHAR_HEAD_P (*charstart))
1627                 charstart--;
1628               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1629               if (charset_base == (untranslated & ~0xff))
1630                 {
1631                   TRANSLATE (ch, trt, untranslated);
1632                   if (! CHAR_HEAD_P (*ptr))
1633                     {
1634                       translate_prev_byte = ptr[-1];
1635                       if (! CHAR_HEAD_P (translate_prev_byte))
1636                         translate_anteprev_byte = ptr[-2];
1637                     }
1638                 }
1639               else
1640                 {
1641                   this_translated = 0;
1642                   ch = *ptr;
1643                 }
1644             }
1645           else if (!multibyte)
1646             TRANSLATE (ch, trt, *ptr);
1647           else
1648             {
1649               ch = *ptr;
1650               this_translated = 0;
1651             }
1652
1653           if (ch > 0400)
1654             j = ((unsigned char) ch) | 0200;
1655           else
1656             j = (unsigned char) ch;
1657
1658           if (i == infinity)
1659             stride_for_teases = BM_tab[j];
1660
1661           BM_tab[j] = dirlen - i;
1662           /* A translation table is accompanied by its inverse -- see */
1663           /* comment following downcase_table for details */
1664           if (this_translated)
1665             {
1666               int starting_ch = ch;
1667               int starting_j = j;
1668               while (1)
1669                 {
1670                   TRANSLATE (ch, inverse_trt, ch);
1671                   if (ch > 0400)
1672                     j = ((unsigned char) ch) | 0200;
1673                   else
1674                     j = (unsigned char) ch;
1675
1676                   /* For all the characters that map into CH,
1677                      set up simple_translate to map the last byte
1678                      into STARTING_J.  */
1679                   simple_translate[j] = starting_j;
1680                   if (ch == starting_ch)
1681                     break;
1682                   BM_tab[j] = dirlen - i;
1683                 }
1684             }
1685         }
1686       else
1687         {
1688           j = *ptr;
1689
1690           if (i == infinity)
1691             stride_for_teases = BM_tab[j];
1692           BM_tab[j] = dirlen - i;
1693         }
1694       /* stride_for_teases tells how much to stride if we get a */
1695       /* match on the far character but are subsequently */
1696       /* disappointed, by recording what the stride would have been */
1697       /* for that character if the last character had been */
1698       /* different. */
1699     }
1700   infinity = dirlen - infinity;
1701   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1702   /* loop invariant - POS_BYTE points at where last char (first
1703      char if reverse) of pattern would align in a possible match.  */
1704   while (n != 0)
1705     {
1706       int tail_end;
1707       unsigned char *tail_end_ptr;
1708
1709       /* It's been reported that some (broken) compiler thinks that
1710          Boolean expressions in an arithmetic context are unsigned.
1711          Using an explicit ?1:0 prevents this.  */
1712       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1713           < 0)
1714         return (n * (0 - direction));
1715       /* First we do the part we can by pointers (maybe nothing) */
1716       QUIT;
1717       pat = base_pat;
1718       limit = pos_byte - dirlen + direction;
1719       if (direction > 0)
1720         {
1721           limit = BUFFER_CEILING_OF (limit);
1722           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1723              can take on without hitting edge of buffer or the gap.  */
1724           limit = min (limit, pos_byte + 20000);
1725           limit = min (limit, lim_byte - 1);
1726         }
1727       else
1728         {
1729           limit = BUFFER_FLOOR_OF (limit);
1730           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1731              can take on without hitting edge of buffer or the gap.  */
1732           limit = max (limit, pos_byte - 20000);
1733           limit = max (limit, lim_byte);
1734         }
1735       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1736       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1737
1738       if ((limit - pos_byte) * direction > 20)
1739         {
1740           unsigned char *p2;
1741
1742           p_limit = BYTE_POS_ADDR (limit);
1743           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1744           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1745           while (1)             /* use one cursor setting as long as i can */
1746             {
1747               if (direction > 0) /* worth duplicating */
1748                 {
1749                   /* Use signed comparison if appropriate
1750                      to make cursor+infinity sure to be > p_limit.
1751                      Assuming that the buffer lies in a range of addresses
1752                      that are all "positive" (as ints) or all "negative",
1753                      either kind of comparison will work as long
1754                      as we don't step by infinity.  So pick the kind
1755                      that works when we do step by infinity.  */
1756                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1757                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1758                       cursor += BM_tab[*cursor];
1759                   else
1760                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1761                       cursor += BM_tab[*cursor];
1762                 }
1763               else
1764                 {
1765                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1766                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1767                       cursor += BM_tab[*cursor];
1768                   else
1769                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1770                       cursor += BM_tab[*cursor];
1771                 }
1772 /* If you are here, cursor is beyond the end of the searched region. */
1773 /* This can happen if you match on the far character of the pattern, */
1774 /* because the "stride" of that character is infinity, a number able */
1775 /* to throw you well beyond the end of the search.  It can also */
1776 /* happen if you fail to match within the permitted region and would */
1777 /* otherwise try a character beyond that region */
1778               if ((cursor - p_limit) * direction <= len_byte)
1779                 break;  /* a small overrun is genuine */
1780               cursor -= infinity; /* large overrun = hit */
1781               i = dirlen - direction;
1782               if (! NILP (trt))
1783                 {
1784                   while ((i -= direction) + direction != 0)
1785                     {
1786                       int ch;
1787                       cursor -= direction;
1788                       /* Translate only the last byte of a character.  */
1789                       if (! multibyte
1790                           || ((cursor == tail_end_ptr
1791                                || CHAR_HEAD_P (cursor[1]))
1792                               && (CHAR_HEAD_P (cursor[0])
1793                                   || (translate_prev_byte == cursor[-1]
1794                                       && (CHAR_HEAD_P (translate_prev_byte)
1795                                           || translate_anteprev_byte == cursor[-2])))))
1796                         ch = simple_translate[*cursor];
1797                       else
1798                         ch = *cursor;
1799                       if (pat[i] != ch)
1800                         break;
1801                     }
1802                 }
1803               else
1804                 {
1805                   while ((i -= direction) + direction != 0)
1806                     {
1807                       cursor -= direction;
1808                       if (pat[i] != *cursor)
1809                         break;
1810                     }
1811                 }
1812               cursor += dirlen - i - direction; /* fix cursor */
1813               if (i + direction == 0)
1814                 {
1815                   int position;
1816
1817                   cursor -= direction;
1818
1819                   position = pos_byte + cursor - p2 + ((direction > 0)
1820                                                        ? 1 - len_byte : 0);
1821                   set_search_regs (position, len_byte);
1822
1823                   if ((n -= direction) != 0)
1824                     cursor += dirlen; /* to resume search */
1825                   else
1826                     return ((direction > 0)
1827                             ? search_regs.end[0] : search_regs.start[0]);
1828                 }
1829               else
1830                 cursor += stride_for_teases; /* <sigh> we lose -  */
1831             }
1832           pos_byte += cursor - p2;
1833         }
1834       else
1835         /* Now we'll pick up a clump that has to be done the hard */
1836         /* way because it covers a discontinuity */
1837         {
1838           limit = ((direction > 0)
1839                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1840                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1841           limit = ((direction > 0)
1842                    ? min (limit + len_byte, lim_byte - 1)
1843                    : max (limit - len_byte, lim_byte));
1844           /* LIMIT is now the last value POS_BYTE can have
1845              and still be valid for a possible match.  */
1846           while (1)
1847             {
1848               /* This loop can be coded for space rather than */
1849               /* speed because it will usually run only once. */
1850               /* (the reach is at most len + 21, and typically */
1851               /* does not exceed len) */
1852               while ((limit - pos_byte) * direction >= 0)
1853                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1854               /* now run the same tests to distinguish going off the */
1855               /* end, a match or a phony match. */
1856               if ((pos_byte - limit) * direction <= len_byte)
1857                 break;  /* ran off the end */
1858               /* Found what might be a match.
1859                  Set POS_BYTE back to last (first if reverse) pos.  */
1860               pos_byte -= infinity;
1861               i = dirlen - direction;
1862               while ((i -= direction) + direction != 0)
1863                 {
1864                   int ch;
1865                   unsigned char *ptr;
1866                   pos_byte -= direction;
1867                   ptr = BYTE_POS_ADDR (pos_byte);
1868                   /* Translate only the last byte of a character.  */
1869                   if (! multibyte
1870                       || ((ptr == tail_end_ptr
1871                            || CHAR_HEAD_P (ptr[1]))
1872                           && (CHAR_HEAD_P (ptr[0])
1873                               || (translate_prev_byte == ptr[-1]
1874                                   && (CHAR_HEAD_P (translate_prev_byte)
1875                                       || translate_anteprev_byte == ptr[-2])))))
1876                     ch = simple_translate[*ptr];
1877                   else
1878                     ch = *ptr;
1879                   if (pat[i] != ch)
1880                     break;
1881                 }
1882               /* Above loop has moved POS_BYTE part or all the way
1883                  back to the first pos (last pos if reverse).
1884                  Set it once again at the last (first if reverse) char.  */
1885               pos_byte += dirlen - i- direction;
1886               if (i + direction == 0)
1887                 {
1888                   int position;
1889                   pos_byte -= direction;
1890
1891                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1892
1893                   set_search_regs (position, len_byte);
1894
1895                   if ((n -= direction) != 0)
1896                     pos_byte += dirlen; /* to resume search */
1897                   else
1898                     return ((direction > 0)
1899                             ? search_regs.end[0] : search_regs.start[0]);
1900                 }
1901               else
1902                 pos_byte += stride_for_teases;
1903             }
1904           }
1905       /* We have done one clump.  Can we continue? */
1906       if ((lim_byte - pos_byte) * direction < 0)
1907         return ((0 - n) * direction);
1908     }
1909   return BYTE_TO_CHAR (pos_byte);
1910 }
1911
1912 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1913    for the overall match just found in the current buffer.
1914    Also clear out the match data for registers 1 and up.  */
1915
1916 static void
1917 set_search_regs (beg_byte, nbytes)
1918      int beg_byte, nbytes;
1919 {
1920   int i;
1921
1922   /* Make sure we have registers in which to store
1923      the match position.  */
1924   if (search_regs.num_regs == 0)
1925     {
1926       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1927       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1928       search_regs.num_regs = 2;
1929     }
1930
1931   /* Clear out the other registers.  */
1932   for (i = 1; i < search_regs.num_regs; i++)
1933     {
1934       search_regs.start[i] = -1;
1935       search_regs.end[i] = -1;
1936     }
1937
1938   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1939   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1940   XSETBUFFER (last_thing_searched, current_buffer);
1941 }
1942 \f
1943 /* Given a string of words separated by word delimiters,
1944   compute a regexp that matches those exact words
1945   separated by arbitrary punctuation.  */
1946
1947 static Lisp_Object
1948 wordify (string)
1949      Lisp_Object string;
1950 {
1951   register unsigned char *p, *o;
1952   register int i, i_byte, len, punct_count = 0, word_count = 0;
1953   Lisp_Object val;
1954   int prev_c = 0;
1955   int adjust;
1956
1957   CHECK_STRING (string, 0);
1958   p = XSTRING (string)->data;
1959   len = XSTRING (string)->size;
1960
1961   for (i = 0, i_byte = 0; i < len; )
1962     {
1963       int c;
1964
1965       if (STRING_MULTIBYTE (string))
1966         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1967       else
1968         c = XSTRING (string)->data[i++];
1969
1970       if (SYNTAX (c) != Sword)
1971         {
1972           punct_count++;
1973           if (i > 0 && SYNTAX (prev_c) == Sword)
1974             word_count++;
1975         }
1976
1977       prev_c = c;
1978     }
1979
1980   if (SYNTAX (prev_c) == Sword)
1981     word_count++;
1982   if (!word_count)
1983     return build_string ("");
1984
1985   adjust = - punct_count + 5 * (word_count - 1) + 4;
1986   if (STRING_MULTIBYTE (string))
1987     val = make_uninit_multibyte_string (len + adjust,
1988                                         STRING_BYTES (XSTRING (string))
1989                                         + adjust);
1990   else
1991     val = make_uninit_string (len + adjust);
1992
1993   o = XSTRING (val)->data;
1994   *o++ = '\\';
1995   *o++ = 'b';
1996   prev_c = 0;
1997
1998   for (i = 0, i_byte = 0; i < len; )
1999     {
2000       int c;
2001       int i_byte_orig = i_byte;
2002
2003       if (STRING_MULTIBYTE (string))
2004         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2005       else
2006         {
2007           c = XSTRING (string)->data[i++];
2008           i_byte++;
2009         }
2010
2011       if (SYNTAX (c) == Sword)
2012         {
2013           bcopy (&XSTRING (string)->data[i_byte_orig], o,
2014                  i_byte - i_byte_orig);
2015           o += i_byte - i_byte_orig;
2016         }
2017       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2018         {
2019           *o++ = '\\';
2020           *o++ = 'W';
2021           *o++ = '\\';
2022           *o++ = 'W';
2023           *o++ = '*';
2024         }
2025
2026       prev_c = c;
2027     }
2028
2029   *o++ = '\\';
2030   *o++ = 'b';
2031
2032   return val;
2033 }
2034 \f
2035 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2036   "MSearch backward: ",
2037   "Search backward from point for STRING.\n\
2038 Set point to the beginning of the occurrence found, and return point.\n\
2039 An optional second argument bounds the search; it is a buffer position.\n\
2040 The match found must not extend before that position.\n\
2041 Optional third argument, if t, means if fail just return nil (no error).\n\
2042  If not nil and not t, position at limit of search and return nil.\n\
2043 Optional fourth argument is repeat count--search for successive occurrences.\n\
2044 See also the functions `match-beginning', `match-end' and `replace-match'.")
2045   (string, bound, noerror, count)
2046      Lisp_Object string, bound, noerror, count;
2047 {
2048   return search_command (string, bound, noerror, count, -1, 0, 0);
2049 }
2050
2051 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2052   "Search forward from point for STRING.\n\
2053 Set point to the end of the occurrence found, and return point.\n\
2054 An optional second argument bounds the search; it is a buffer position.\n\
2055 The match found must not extend after that position.  nil is equivalent\n\
2056   to (point-max).\n\
2057 Optional third argument, if t, means if fail just return nil (no error).\n\
2058   If not nil and not t, move to limit of search and return nil.\n\
2059 Optional fourth argument is repeat count--search for successive occurrences.\n\
2060 See also the functions `match-beginning', `match-end' and `replace-match'.")
2061   (string, bound, noerror, count)
2062      Lisp_Object string, bound, noerror, count;
2063 {
2064   return search_command (string, bound, noerror, count, 1, 0, 0);
2065 }
2066
2067 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2068   "sWord search backward: ",
2069   "Search backward from point for STRING, ignoring differences in punctuation.\n\
2070 Set point to the beginning of the occurrence found, and return point.\n\
2071 An optional second argument bounds the search; it is a buffer position.\n\
2072 The match found must not extend before that position.\n\
2073 Optional third argument, if t, means if fail just return nil (no error).\n\
2074   If not nil and not t, move to limit of search and return nil.\n\
2075 Optional fourth argument is repeat count--search for successive occurrences.")
2076   (string, bound, noerror, count)
2077      Lisp_Object string, bound, noerror, count;
2078 {
2079   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2080 }
2081
2082 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2083   "sWord search: ",
2084   "Search forward from point for STRING, ignoring differences in punctuation.\n\
2085 Set point to the end of the occurrence found, and return point.\n\
2086 An optional second argument bounds the search; it is a buffer position.\n\
2087 The match found must not extend after that position.\n\
2088 Optional third argument, if t, means if fail just return nil (no error).\n\
2089   If not nil and not t, move to limit of search and return nil.\n\
2090 Optional fourth argument is repeat count--search for successive occurrences.")
2091   (string, bound, noerror, count)
2092      Lisp_Object string, bound, noerror, count;
2093 {
2094   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2095 }
2096
2097 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2098   "sRE search backward: ",
2099   "Search backward from point for match for regular expression REGEXP.\n\
2100 Set point to the beginning of the match, and return point.\n\
2101 The match found is the one starting last in the buffer\n\
2102 and yet ending before the origin of the search.\n\
2103 An optional second argument bounds the search; it is a buffer position.\n\
2104 The match found must start at or after that position.\n\
2105 Optional third argument, if t, means if fail just return nil (no error).\n\
2106   If not nil and not t, move to limit of search and return nil.\n\
2107 Optional fourth argument is repeat count--search for successive occurrences.\n\
2108 See also the functions `match-beginning', `match-end' and `replace-match'.")
2109   (regexp, bound, noerror, count)
2110      Lisp_Object regexp, bound, noerror, count;
2111 {
2112   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2113 }
2114
2115 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2116   "sRE search: ",
2117   "Search forward from point for regular expression REGEXP.\n\
2118 Set point to the end of the occurrence found, and return point.\n\
2119 An optional second argument bounds the search; it is a buffer position.\n\
2120 The match found must not extend after that position.\n\
2121 Optional third argument, if t, means if fail just return nil (no error).\n\
2122   If not nil and not t, move to limit of search and return nil.\n\
2123 Optional fourth argument is repeat count--search for successive occurrences.\n\
2124 See also the functions `match-beginning', `match-end' and `replace-match'.")
2125   (regexp, bound, noerror, count)
2126      Lisp_Object regexp, bound, noerror, count;
2127 {
2128   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2129 }
2130
2131 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2132   "sPosix search backward: ",
2133   "Search backward from point for match for regular expression REGEXP.\n\
2134 Find the longest match in accord with Posix regular expression rules.\n\
2135 Set point to the beginning of the match, and return point.\n\
2136 The match found is the one starting last in the buffer\n\
2137 and yet ending before the origin of the search.\n\
2138 An optional second argument bounds the search; it is a buffer position.\n\
2139 The match found must start at or after that position.\n\
2140 Optional third argument, if t, means if fail just return nil (no error).\n\
2141   If not nil and not t, move to limit of search and return nil.\n\
2142 Optional fourth argument is repeat count--search for successive occurrences.\n\
2143 See also the functions `match-beginning', `match-end' and `replace-match'.")
2144   (regexp, bound, noerror, count)
2145      Lisp_Object regexp, bound, noerror, count;
2146 {
2147   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2148 }
2149
2150 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2151   "sPosix search: ",
2152   "Search forward from point for regular expression REGEXP.\n\
2153 Find the longest match in accord with Posix regular expression rules.\n\
2154 Set point to the end of the occurrence found, and return point.\n\
2155 An optional second argument bounds the search; it is a buffer position.\n\
2156 The match found must not extend after that position.\n\
2157 Optional third argument, if t, means if fail just return nil (no error).\n\
2158   If not nil and not t, move to limit of search and return nil.\n\
2159 Optional fourth argument is repeat count--search for successive occurrences.\n\
2160 See also the functions `match-beginning', `match-end' and `replace-match'.")
2161   (regexp, bound, noerror, count)
2162      Lisp_Object regexp, bound, noerror, count;
2163 {
2164   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2165 }
2166 \f
2167 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2168   "Replace text matched by last search with NEWTEXT.\n\
2169 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2170 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2171 based on the replaced text.\n\
2172 If the replaced text has only capital letters\n\
2173 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2174 If the replaced text has at least one word starting with a capital letter,\n\
2175 then capitalize each word in NEWTEXT.\n\n\
2176 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2177 Otherwise treat `\\' as special:\n\
2178   `\\&' in NEWTEXT means substitute original matched text.\n\
2179   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2180        If Nth parens didn't match, substitute nothing.\n\
2181   `\\\\' means insert one `\\'.\n\
2182 FIXEDCASE and LITERAL are optional arguments.\n\
2183 Leaves point at end of replacement text.\n\
2184 \n\
2185 The optional fourth argument STRING can be a string to modify.\n\
2186 In that case, this function creates and returns a new string\n\
2187 which is made by replacing the part of STRING that was matched.\n\
2188 \n\
2189 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2190 It says to replace just that subexpression instead of the whole match.\n\
2191 This is useful only after a regular expression search or match\n\
2192 since only regular expressions have distinguished subexpressions.")
2193   (newtext, fixedcase, literal, string, subexp)
2194      Lisp_Object newtext, fixedcase, literal, string, subexp;
2195 {
2196   enum { nochange, all_caps, cap_initial } case_action;
2197   register int pos, pos_byte;
2198   int some_multiletter_word;
2199   int some_lowercase;
2200   int some_uppercase;
2201   int some_nonuppercase_initial;
2202   register int c, prevc;
2203   int inslen;
2204   int sub;
2205   int opoint, newpoint;
2206
2207   CHECK_STRING (newtext, 0);
2208
2209   if (! NILP (string))
2210     CHECK_STRING (string, 4);
2211
2212   case_action = nochange;       /* We tried an initialization */
2213                                 /* but some C compilers blew it */
2214
2215   if (search_regs.num_regs <= 0)
2216     error ("replace-match called before any match found");
2217
2218   if (NILP (subexp))
2219     sub = 0;
2220   else
2221     {
2222       CHECK_NUMBER (subexp, 3);
2223       sub = XINT (subexp);
2224       if (sub < 0 || sub >= search_regs.num_regs)
2225         args_out_of_range (subexp, make_number (search_regs.num_regs));
2226     }
2227
2228   if (NILP (string))
2229     {
2230       if (search_regs.start[sub] < BEGV
2231           || search_regs.start[sub] > search_regs.end[sub]
2232           || search_regs.end[sub] > ZV)
2233         args_out_of_range (make_number (search_regs.start[sub]),
2234                            make_number (search_regs.end[sub]));
2235     }
2236   else
2237     {
2238       if (search_regs.start[sub] < 0
2239           || search_regs.start[sub] > search_regs.end[sub]
2240           || search_regs.end[sub] > XSTRING (string)->size)
2241         args_out_of_range (make_number (search_regs.start[sub]),
2242                            make_number (search_regs.end[sub]));
2243     }
2244
2245   if (NILP (fixedcase))
2246     {
2247       /* Decide how to casify by examining the matched text. */
2248       int last;
2249
2250       pos = search_regs.start[sub];
2251       last = search_regs.end[sub];
2252
2253       if (NILP (string))
2254         pos_byte = CHAR_TO_BYTE (pos);
2255       else
2256         pos_byte = string_char_to_byte (string, pos);
2257
2258       prevc = '\n';
2259       case_action = all_caps;
2260
2261       /* some_multiletter_word is set nonzero if any original word
2262          is more than one letter long. */
2263       some_multiletter_word = 0;
2264       some_lowercase = 0;
2265       some_nonuppercase_initial = 0;
2266       some_uppercase = 0;
2267
2268       while (pos < last)
2269         {
2270           if (NILP (string))
2271             {
2272               c = FETCH_CHAR (pos_byte);
2273               INC_BOTH (pos, pos_byte);
2274             }
2275           else
2276             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2277
2278           if (LOWERCASEP (c))
2279             {
2280               /* Cannot be all caps if any original char is lower case */
2281
2282               some_lowercase = 1;
2283               if (SYNTAX (prevc) != Sword)
2284                 some_nonuppercase_initial = 1;
2285               else
2286                 some_multiletter_word = 1;
2287             }
2288           else if (!NOCASEP (c))
2289             {
2290               some_uppercase = 1;
2291               if (SYNTAX (prevc) != Sword)
2292                 ;
2293               else
2294                 some_multiletter_word = 1;
2295             }
2296           else
2297             {
2298               /* If the initial is a caseless word constituent,
2299                  treat that like a lowercase initial.  */
2300               if (SYNTAX (prevc) != Sword)
2301                 some_nonuppercase_initial = 1;
2302             }
2303
2304           prevc = c;
2305         }
2306
2307       /* Convert to all caps if the old text is all caps
2308          and has at least one multiletter word.  */
2309       if (! some_lowercase && some_multiletter_word)
2310         case_action = all_caps;
2311       /* Capitalize each word, if the old text has all capitalized words.  */
2312       else if (!some_nonuppercase_initial && some_multiletter_word)
2313         case_action = cap_initial;
2314       else if (!some_nonuppercase_initial && some_uppercase)
2315         /* Should x -> yz, operating on X, give Yz or YZ?
2316            We'll assume the latter.  */
2317         case_action = all_caps;
2318       else
2319         case_action = nochange;
2320     }
2321
2322   /* Do replacement in a string.  */
2323   if (!NILP (string))
2324     {
2325       Lisp_Object before, after;
2326
2327       before = Fsubstring (string, make_number (0),
2328                            make_number (search_regs.start[sub]));
2329       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2330
2331       /* Substitute parts of the match into NEWTEXT
2332          if desired.  */
2333       if (NILP (literal))
2334         {
2335           int lastpos = 0;
2336           int lastpos_byte = 0;
2337           /* We build up the substituted string in ACCUM.  */
2338           Lisp_Object accum;
2339           Lisp_Object middle;
2340           int length = STRING_BYTES (XSTRING (newtext));
2341
2342           accum = Qnil;
2343
2344           for (pos_byte = 0, pos = 0; pos_byte < length;)
2345             {
2346               int substart = -1;
2347               int subend;
2348               int delbackslash = 0;
2349
2350               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2351
2352               if (c == '\\')
2353                 {
2354                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2355                   if (c == '&')
2356                     {
2357                       substart = search_regs.start[sub];
2358                       subend = search_regs.end[sub];
2359                     }
2360                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2361                     {
2362                       if (search_regs.start[c - '0'] >= 0)
2363                         {
2364                           substart = search_regs.start[c - '0'];
2365                           subend = search_regs.end[c - '0'];
2366                         }
2367                     }
2368                   else if (c == '\\')
2369                     delbackslash = 1;
2370                   else
2371                     error ("Invalid use of `\\' in replacement text");
2372                 }
2373               if (substart >= 0)
2374                 {
2375                   if (pos - 2 != lastpos)
2376                     middle = substring_both (newtext, lastpos,
2377                                              lastpos_byte,
2378                                              pos - 2, pos_byte - 2);
2379                   else
2380                     middle = Qnil;
2381                   accum = concat3 (accum, middle,
2382                                    Fsubstring (string,
2383                                                make_number (substart),
2384                                                make_number (subend)));
2385                   lastpos = pos;
2386                   lastpos_byte = pos_byte;
2387                 }
2388               else if (delbackslash)
2389                 {
2390                   middle = substring_both (newtext, lastpos,
2391                                            lastpos_byte,
2392                                            pos - 1, pos_byte - 1);
2393
2394                   accum = concat2 (accum, middle);
2395                   lastpos = pos;
2396                   lastpos_byte = pos_byte;
2397                 }
2398             }
2399
2400           if (pos != lastpos)
2401             middle = substring_both (newtext, lastpos,
2402                                      lastpos_byte,
2403                                      pos, pos_byte);
2404           else
2405             middle = Qnil;
2406
2407           newtext = concat2 (accum, middle);
2408         }
2409
2410       /* Do case substitution in NEWTEXT if desired.  */
2411       if (case_action == all_caps)
2412         newtext = Fupcase (newtext);
2413       else if (case_action == cap_initial)
2414         newtext = Fupcase_initials (newtext);
2415
2416       return concat3 (before, newtext, after);
2417     }
2418
2419   /* Record point, the move (quietly) to the start of the match.  */
2420   if (PT > search_regs.start[sub])
2421     opoint = PT - ZV;
2422   else
2423     opoint = PT;
2424
2425   TEMP_SET_PT (search_regs.start[sub]);
2426
2427   /* We insert the replacement text before the old text, and then
2428      delete the original text.  This means that markers at the
2429      beginning or end of the original will float to the corresponding
2430      position in the replacement.  */
2431   if (!NILP (literal))
2432     Finsert_and_inherit (1, &newtext);
2433   else
2434     {
2435       struct gcpro gcpro1;
2436       int length = STRING_BYTES (XSTRING (newtext));
2437
2438       GCPRO1 (newtext);
2439
2440       for (pos_byte = 0, pos = 0; pos_byte < length;)
2441         {
2442           int offset = PT - search_regs.start[sub];
2443
2444           FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2445
2446           if (c == '\\')
2447             {
2448               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2449               if (c == '&')
2450                 Finsert_buffer_substring
2451                   (Fcurrent_buffer (),
2452                    make_number (search_regs.start[sub] + offset),
2453                    make_number (search_regs.end[sub] + offset));
2454               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2455                 {
2456                   if (search_regs.start[c - '0'] >= 1)
2457                     Finsert_buffer_substring
2458                       (Fcurrent_buffer (),
2459                        make_number (search_regs.start[c - '0'] + offset),
2460                        make_number (search_regs.end[c - '0'] + offset));
2461                 }
2462               else if (c == '\\')
2463                 insert_char (c);
2464               else
2465                 error ("Invalid use of `\\' in replacement text");
2466             }
2467           else
2468             insert_char (c);
2469         }
2470       UNGCPRO;
2471     }
2472
2473   inslen = PT - (search_regs.start[sub]);
2474   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2475
2476   if (case_action == all_caps)
2477     Fupcase_region (make_number (PT - inslen), make_number (PT));
2478   else if (case_action == cap_initial)
2479     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2480
2481   newpoint = PT;
2482
2483   /* Put point back where it was in the text.  */
2484   if (opoint <= 0)
2485     TEMP_SET_PT (opoint + ZV);
2486   else
2487     TEMP_SET_PT (opoint);
2488
2489   /* Now move point "officially" to the start of the inserted replacement.  */
2490   move_if_not_intangible (newpoint);
2491
2492   return Qnil;
2493 }
2494 \f
2495 static Lisp_Object
2496 match_limit (num, beginningp)
2497      Lisp_Object num;
2498      int beginningp;
2499 {
2500   register int n;
2501
2502   CHECK_NUMBER (num, 0);
2503   n = XINT (num);
2504   if (n < 0 || n >= search_regs.num_regs)
2505     args_out_of_range (num, make_number (search_regs.num_regs));
2506   if (search_regs.num_regs <= 0
2507       || search_regs.start[n] < 0)
2508     return Qnil;
2509   return (make_number ((beginningp) ? search_regs.start[n]
2510                                     : search_regs.end[n]));
2511 }
2512
2513 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2514   "Return position of start of text matched by last search.\n\
2515 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2516   regexp.\n\
2517 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2518   SUBEXP pairs.\n\
2519 Zero means the entire text matched by the whole regexp or whole string.")
2520   (subexp)
2521      Lisp_Object subexp;
2522 {
2523   return match_limit (subexp, 1);
2524 }
2525
2526 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2527   "Return position of end of text matched by last search.\n\
2528 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2529   regexp.\n\
2530 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2531   SUBEXP pairs.\n\
2532 Zero means the entire text matched by the whole regexp or whole string.")
2533   (subexp)
2534      Lisp_Object subexp;
2535 {
2536   return match_limit (subexp, 0);
2537 }
2538
2539 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2540   "Return a list containing all info on what the last search matched.\n\
2541 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2542 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2543 if the last match was on a buffer; integers or nil if a string was matched.\n\
2544 Use `store-match-data' to reinstate the data in this list.\n\
2545 \n\
2546 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2547 \(rather than markers) to represent buffer positions.\n\
2548 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2549 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2550   (integers, reuse)
2551      Lisp_Object integers, reuse;
2552 {
2553   Lisp_Object tail, prev;
2554   Lisp_Object *data;
2555   int i, len;
2556
2557   if (NILP (last_thing_searched))
2558     return Qnil;
2559
2560   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2561                                  * sizeof (Lisp_Object));
2562
2563   len = -1;
2564   for (i = 0; i < search_regs.num_regs; i++)
2565     {
2566       int start = search_regs.start[i];
2567       if (start >= 0)
2568         {
2569           if (EQ (last_thing_searched, Qt)
2570               || ! NILP (integers))
2571             {
2572               XSETFASTINT (data[2 * i], start);
2573               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2574             }
2575           else if (BUFFERP (last_thing_searched))
2576             {
2577               data[2 * i] = Fmake_marker ();
2578               Fset_marker (data[2 * i],
2579                            make_number (start),
2580                            last_thing_searched);
2581               data[2 * i + 1] = Fmake_marker ();
2582               Fset_marker (data[2 * i + 1],
2583                            make_number (search_regs.end[i]),
2584                            last_thing_searched);
2585             }
2586           else
2587             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2588             abort ();
2589
2590           len = i;
2591         }
2592       else
2593         data[2 * i] = data [2 * i + 1] = Qnil;
2594     }
2595
2596   /* If REUSE is not usable, cons up the values and return them.  */
2597   if (! CONSP (reuse))
2598     return Flist (2 * len + 2, data);
2599
2600   /* If REUSE is a list, store as many value elements as will fit
2601      into the elements of REUSE.  */
2602   for (i = 0, tail = reuse; CONSP (tail);
2603        i++, tail = XCONS (tail)->cdr)
2604     {
2605       if (i < 2 * len + 2)
2606         XCONS (tail)->car = data[i];
2607       else
2608         XCONS (tail)->car = Qnil;
2609       prev = tail;
2610     }
2611
2612   /* If we couldn't fit all value elements into REUSE,
2613      cons up the rest of them and add them to the end of REUSE.  */
2614   if (i < 2 * len + 2)
2615     XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2616
2617   return reuse;
2618 }
2619
2620
2621 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2622   "Set internal data on last search match from elements of LIST.\n\
2623 LIST should have been created by calling `match-data' previously.")
2624   (list)
2625      register Lisp_Object list;
2626 {
2627   register int i;
2628   register Lisp_Object marker;
2629
2630   if (running_asynch_code)
2631     save_search_regs ();
2632
2633   if (!CONSP (list) && !NILP (list))
2634     list = wrong_type_argument (Qconsp, list);
2635
2636   /* Unless we find a marker with a buffer in LIST, assume that this
2637      match data came from a string.  */
2638   last_thing_searched = Qt;
2639
2640   /* Allocate registers if they don't already exist.  */
2641   {
2642     int length = XFASTINT (Flength (list)) / 2;
2643
2644     if (length > search_regs.num_regs)
2645       {
2646         if (search_regs.num_regs == 0)
2647           {
2648             search_regs.start
2649               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2650             search_regs.end
2651               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2652           }
2653         else
2654           {
2655             search_regs.start
2656               = (regoff_t *) xrealloc (search_regs.start,
2657                                        length * sizeof (regoff_t));
2658             search_regs.end
2659               = (regoff_t *) xrealloc (search_regs.end,
2660                                        length * sizeof (regoff_t));
2661           }
2662
2663         search_regs.num_regs = length;
2664       }
2665   }
2666
2667   for (i = 0; i < search_regs.num_regs; i++)
2668     {
2669       marker = Fcar (list);
2670       if (NILP (marker))
2671         {
2672           search_regs.start[i] = -1;
2673           list = Fcdr (list);
2674         }
2675       else
2676         {
2677           if (MARKERP (marker))
2678             {
2679               if (XMARKER (marker)->buffer == 0)
2680                 XSETFASTINT (marker, 0);
2681               else
2682                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2683             }
2684
2685           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2686           search_regs.start[i] = XINT (marker);
2687           list = Fcdr (list);
2688
2689           marker = Fcar (list);
2690           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2691             XSETFASTINT (marker, 0);
2692
2693           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2694           search_regs.end[i] = XINT (marker);
2695         }
2696       list = Fcdr (list);
2697     }
2698
2699   return Qnil;
2700 }
2701
2702 /* If non-zero the match data have been saved in saved_search_regs
2703    during the execution of a sentinel or filter. */
2704 static int search_regs_saved;
2705 static struct re_registers saved_search_regs;
2706
2707 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2708    if asynchronous code (filter or sentinel) is running. */
2709 static void
2710 save_search_regs ()
2711 {
2712   if (!search_regs_saved)
2713     {
2714       saved_search_regs.num_regs = search_regs.num_regs;
2715       saved_search_regs.start = search_regs.start;
2716       saved_search_regs.end = search_regs.end;
2717       search_regs.num_regs = 0;
2718       search_regs.start = 0;
2719       search_regs.end = 0;
2720
2721       search_regs_saved = 1;
2722     }
2723 }
2724
2725 /* Called upon exit from filters and sentinels. */
2726 void
2727 restore_match_data ()
2728 {
2729   if (search_regs_saved)
2730     {
2731       if (search_regs.num_regs > 0)
2732         {
2733           xfree (search_regs.start);
2734           xfree (search_regs.end);
2735         }
2736       search_regs.num_regs = saved_search_regs.num_regs;
2737       search_regs.start = saved_search_regs.start;
2738       search_regs.end = saved_search_regs.end;
2739
2740       search_regs_saved = 0;
2741     }
2742 }
2743
2744 /* Quote a string to inactivate reg-expr chars */
2745
2746 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2747   "Return a regexp string which matches exactly STRING and nothing else.")
2748   (string)
2749      Lisp_Object string;
2750 {
2751   register unsigned char *in, *out, *end;
2752   register unsigned char *temp;
2753   int backslashes_added = 0;
2754
2755   CHECK_STRING (string, 0);
2756
2757   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2758
2759   /* Now copy the data into the new string, inserting escapes. */
2760
2761   in = XSTRING (string)->data;
2762   end = in + STRING_BYTES (XSTRING (string));
2763   out = temp;
2764
2765   for (; in != end; in++)
2766     {
2767       if (*in == '[' || *in == ']'
2768           || *in == '*' || *in == '.' || *in == '\\'
2769           || *in == '?' || *in == '+'
2770           || *in == '^' || *in == '$')
2771         *out++ = '\\', backslashes_added++;
2772       *out++ = *in;
2773     }
2774
2775   return make_specified_string (temp,
2776                                 XSTRING (string)->size + backslashes_added,
2777                                 out - temp,
2778                                 STRING_MULTIBYTE (string));
2779 }
2780 \f
2781 void
2782 syms_of_search ()
2783 {
2784   register int i;
2785
2786   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2787     {
2788       searchbufs[i].buf.allocated = 100;
2789       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2790       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2791       searchbufs[i].regexp = Qnil;
2792       staticpro (&searchbufs[i].regexp);
2793       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2794     }
2795   searchbuf_head = &searchbufs[0];
2796
2797   Qsearch_failed = intern ("search-failed");
2798   staticpro (&Qsearch_failed);
2799   Qinvalid_regexp = intern ("invalid-regexp");
2800   staticpro (&Qinvalid_regexp);
2801
2802   Fput (Qsearch_failed, Qerror_conditions,
2803         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2804   Fput (Qsearch_failed, Qerror_message,
2805         build_string ("Search failed"));
2806
2807   Fput (Qinvalid_regexp, Qerror_conditions,
2808         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2809   Fput (Qinvalid_regexp, Qerror_message,
2810         build_string ("Invalid regexp"));
2811
2812   last_thing_searched = Qnil;
2813   staticpro (&last_thing_searched);
2814
2815   defsubr (&Slooking_at);
2816   defsubr (&Sposix_looking_at);
2817   defsubr (&Sstring_match);
2818   defsubr (&Sposix_string_match);
2819   defsubr (&Ssearch_forward);
2820   defsubr (&Ssearch_backward);
2821   defsubr (&Sword_search_forward);
2822   defsubr (&Sword_search_backward);
2823   defsubr (&Sre_search_forward);
2824   defsubr (&Sre_search_backward);
2825   defsubr (&Sposix_search_forward);
2826   defsubr (&Sposix_search_backward);
2827   defsubr (&Sreplace_match);
2828   defsubr (&Smatch_beginning);
2829   defsubr (&Smatch_end);
2830   defsubr (&Smatch_data);
2831   defsubr (&Sset_match_data);
2832   defsubr (&Sregexp_quote);
2833 }