src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define min(a, b) ((a) < (b) ? (a) : (b))
  37 #define max(a, b) ((a) > (b) ? (a) : (b))
  38
  39 #define REGEXP_CACHE_SIZE 20
  40
  41 /* If the regexp is non-nil, then the buffer contains the compiled form
  42    of that regexp, suitable for searching.  */
  43 struct regexp_cache
  44 {
  45   struct regexp_cache *next;
  46   Lisp_Object regexp;
  47   struct re_pattern_buffer buf;
  48   char fastmap[0400];
  49   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  50   char posix;
  51 };
  52
  53 /* The instances of that struct.  */
  54 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  55
  56 /* The head of the linked list; points to the most recently used buffer.  */
  57 struct regexp_cache *searchbuf_head;
  58
  59
  60 /* Every call to re_match, etc., must pass &search_regs as the regs
  61    argument unless you can show it is unnecessary (i.e., if re_match
  62    is certainly going to be called again before region-around-match
  63    can be called).
  64
  65    Since the registers are now dynamically allocated, we need to make
  66    sure not to refer to the Nth register before checking that it has
  67    been allocated by checking search_regs.num_regs.
  68
  69    The regex code keeps track of whether it has allocated the search
  70    buffer using bits in the re_pattern_buffer.  This means that whenever
  71    you compile a new pattern, it completely forgets whether it has
  72    allocated any registers, and will allocate new registers the next
  73    time you call a searching or matching function.  Therefore, we need
  74    to call re_set_registers after compiling a new pattern or after
  75    setting the match registers, so that the regex functions will be
  76    able to free or re-allocate it properly.  */
  77 static struct re_registers search_regs;
  78
  79 /* The buffer in which the last search was performed, or
  80    Qt if the last search was done in a string;
  81    Qnil if no searching has been done yet.  */
  82 static Lisp_Object last_thing_searched;
  83
  84 /* error condition signaled when regexp compile_pattern fails */
  85
  86 Lisp_Object Qinvalid_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 #ifdef __STDC__
 101 #define CONST const
 102 #else
 103 #define CONST
 104 #endif
 105
 106 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 107    PATTERN is the pattern to compile.
 108    CP is the place to put the result.
 109    TRANSLATE is a translation table for ignoring case, or nil for none.
 110    REGP is the structure that says where to store the "register"
 111    values that will result from matching this pattern.
 112    If it is 0, we should compile the pattern not to record any
 113    subexpression bounds.
 114    POSIX is nonzero if we want full backtracking (POSIX style)
 115    for this pattern.  0 means backtrack only enough to get a valid match.
 116    MULTIBYTE is nonzero if we want to handle multibyte characters in
 117    PATTERN.  0 means all multibyte characters are recognized just as
 118    sequences of binary data.  */
 119
 120 static void
 121 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 122      struct regexp_cache *cp;
 123      Lisp_Object pattern;
 124      Lisp_Object translate;
 125      struct re_registers *regp;
 126      int posix;
 127      int multibyte;
 128 {
 129   unsigned char *raw_pattern;
 130   int raw_pattern_size;
 131   char *val;
 132   reg_syntax_t old;
 133
 134   /* MULTIBYTE says whether the text to be searched is multibyte.
 135      We must convert PATTERN to match that, or we will not really
 136      find things right.  */
 137
 138   if (multibyte == STRING_MULTIBYTE (pattern))
 139     {
 140       raw_pattern = (unsigned char *) XSTRING (pattern)->data;
 141       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 142     }
 143   else if (multibyte)
 144     {
 145       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 146                                                   XSTRING (pattern)->size);
 147       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 148       copy_text (XSTRING (pattern)->data, raw_pattern,
 149                  XSTRING (pattern)->size, 0, 1);
 150     }
 151   else
 152     {
 153       /* Converting multibyte to single-byte.
 154
 155          ??? Perhaps this conversion should be done in a special way
 156          by subtracting nonascii-insert-offset from each non-ASCII char,
 157          so that only the multibyte chars which really correspond to
 158          the chosen single-byte character set can possibly match.  */
 159       raw_pattern_size = XSTRING (pattern)->size;
 160       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 161       copy_text (XSTRING (pattern)->data, raw_pattern,
 162                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 163     }
 164
 165   cp->regexp = Qnil;
 166   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 167   cp->posix = posix;
 168   cp->buf.multibyte = multibyte;
 169   BLOCK_INPUT;
 170   old = re_set_syntax (RE_SYNTAX_EMACS | RE_CHAR_CLASSES
 171                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 172   val = (char *) re_compile_pattern ((char *)raw_pattern,
 173                                      raw_pattern_size, &cp->buf);
 174   re_set_syntax (old);
 175   UNBLOCK_INPUT;
 176   if (val)
 177     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 178
 179   cp->regexp = Fcopy_sequence (pattern);
 180 }
 181
 182 /* Shrink each compiled regexp buffer in the cache
 183    to the size actually used right now.
 184    This is called from garbage collection.  */
 185
 186 void
 187 shrink_regexp_cache ()
 188 {
 189   struct regexp_cache *cp, **cpp;
 190
 191   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 192     {
 193       cp->buf.allocated = cp->buf.used;
 194       cp->buf.buffer
 195         = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
 196     }
 197 }
 198
 199 /* Compile a regexp if necessary, but first check to see if there's one in
 200    the cache.
 201    PATTERN is the pattern to compile.
 202    TRANSLATE is a translation table for ignoring case, or nil for none.
 203    REGP is the structure that says where to store the "register"
 204    values that will result from matching this pattern.
 205    If it is 0, we should compile the pattern not to record any
 206    subexpression bounds.
 207    POSIX is nonzero if we want full backtracking (POSIX style)
 208    for this pattern.  0 means backtrack only enough to get a valid match.  */
 209
 210 struct re_pattern_buffer *
 211 compile_pattern (pattern, regp, translate, posix, multibyte)
 212      Lisp_Object pattern;
 213      struct re_registers *regp;
 214      Lisp_Object translate;
 215      int posix, multibyte;
 216 {
 217   struct regexp_cache *cp, **cpp;
 218
 219   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 220     {
 221       cp = *cpp;
 222       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 223           && !NILP (Fstring_equal (cp->regexp, pattern))
 224           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 225           && cp->posix == posix
 226           && cp->buf.multibyte == multibyte)
 227         break;
 228
 229       /* If we're at the end of the cache, compile into the last cell.  */
 230       if (cp->next == 0)
 231         {
 232           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 233           break;
 234         }
 235     }
 236
 237   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 238      either because we found it in the cache or because we just compiled it.
 239      Move it to the front of the queue to mark it as most recently used.  */
 240   *cpp = cp->next;
 241   cp->next = searchbuf_head;
 242   searchbuf_head = cp;
 243
 244   /* Advise the searching functions about the space we have allocated
 245      for register data.  */
 246   if (regp)
 247     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 248
 249   return &cp->buf;
 250 }
 251
 252 /* Error condition used for failing searches */
 253 Lisp_Object Qsearch_failed;
 254
 255 Lisp_Object
 256 signal_failure (arg)
 257      Lisp_Object arg;
 258 {
 259   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 260   return Qnil;
 261 }
 262 \f
 263 static Lisp_Object
 264 looking_at_1 (string, posix)
 265      Lisp_Object string;
 266      int posix;
 267 {
 268   Lisp_Object val;
 269   unsigned char *p1, *p2;
 270   int s1, s2;
 271   register int i;
 272   struct re_pattern_buffer *bufp;
 273
 274   if (running_asynch_code)
 275     save_search_regs ();
 276
 277   CHECK_STRING (string, 0);
 278   bufp = compile_pattern (string, &search_regs,
 279                           (!NILP (current_buffer->case_fold_search)
 280                            ? DOWNCASE_TABLE : Qnil),
 281                           posix,
 282                           !NILP (current_buffer->enable_multibyte_characters));
 283
 284   immediate_quit = 1;
 285   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 286
 287   /* Get pointers and sizes of the two strings
 288      that make up the visible portion of the buffer. */
 289
 290   p1 = BEGV_ADDR;
 291   s1 = GPT_BYTE - BEGV_BYTE;
 292   p2 = GAP_END_ADDR;
 293   s2 = ZV_BYTE - GPT_BYTE;
 294   if (s1 < 0)
 295     {
 296       p2 = p1;
 297       s2 = ZV_BYTE - BEGV_BYTE;
 298       s1 = 0;
 299     }
 300   if (s2 < 0)
 301     {
 302       s1 = ZV_BYTE - BEGV_BYTE;
 303       s2 = 0;
 304     }
 305
 306   re_match_object = Qnil;
 307
 308   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 309                   PT_BYTE - BEGV_BYTE, &search_regs,
 310                   ZV_BYTE - BEGV_BYTE);
 311   if (i == -2)
 312     matcher_overflow ();
 313
 314   val = (0 <= i ? Qt : Qnil);
 315   if (i >= 0)
 316     for (i = 0; i < search_regs.num_regs; i++)
 317       if (search_regs.start[i] >= 0)
 318         {
 319           search_regs.start[i]
 320             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 321           search_regs.end[i]
 322             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 323         }
 324   XSETBUFFER (last_thing_searched, current_buffer);
 325   immediate_quit = 0;
 326   return val;
 327 }
 328
 329 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 330   "Return t if text after point matches regular expression REGEXP.\n\
 331 This function modifies the match data that `match-beginning',\n\
 332 `match-end' and `match-data' access; save and restore the match\n\
 333 data if you want to preserve them.")
 334   (regexp)
 335      Lisp_Object regexp;
 336 {
 337   return looking_at_1 (regexp, 0);
 338 }
 339
 340 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 341   "Return t if text after point matches regular expression REGEXP.\n\
 342 Find the longest match, in accord with Posix regular expression rules.\n\
 343 This function modifies the match data that `match-beginning',\n\
 344 `match-end' and `match-data' access; save and restore the match\n\
 345 data if you want to preserve them.")
 346   (regexp)
 347      Lisp_Object regexp;
 348 {
 349   return looking_at_1 (regexp, 1);
 350 }
 351 \f
 352 static Lisp_Object
 353 string_match_1 (regexp, string, start, posix)
 354      Lisp_Object regexp, string, start;
 355      int posix;
 356 {
 357   int val;
 358   struct re_pattern_buffer *bufp;
 359   int pos, pos_byte;
 360   int i;
 361
 362   if (running_asynch_code)
 363     save_search_regs ();
 364
 365   CHECK_STRING (regexp, 0);
 366   CHECK_STRING (string, 1);
 367
 368   if (NILP (start))
 369     pos = 0, pos_byte = 0;
 370   else
 371     {
 372       int len = XSTRING (string)->size;
 373
 374       CHECK_NUMBER (start, 2);
 375       pos = XINT (start);
 376       if (pos < 0 && -pos <= len)
 377         pos = len + pos;
 378       else if (0 > pos || pos > len)
 379         args_out_of_range (string, start);
 380       pos_byte = string_char_to_byte (string, pos);
 381     }
 382
 383   bufp = compile_pattern (regexp, &search_regs,
 384                           (!NILP (current_buffer->case_fold_search)
 385                            ? DOWNCASE_TABLE : Qnil),
 386                           posix,
 387                           STRING_MULTIBYTE (string));
 388   immediate_quit = 1;
 389   re_match_object = string;
 390
 391   val = re_search (bufp, (char *) XSTRING (string)->data,
 392                    STRING_BYTES (XSTRING (string)), pos_byte,
 393                    STRING_BYTES (XSTRING (string)) - pos_byte,
 394                    &search_regs);
 395   immediate_quit = 0;
 396   last_thing_searched = Qt;
 397   if (val == -2)
 398     matcher_overflow ();
 399   if (val < 0) return Qnil;
 400
 401   for (i = 0; i < search_regs.num_regs; i++)
 402     if (search_regs.start[i] >= 0)
 403       {
 404         search_regs.start[i]
 405           = string_byte_to_char (string, search_regs.start[i]);
 406         search_regs.end[i]
 407           = string_byte_to_char (string, search_regs.end[i]);
 408       }
 409
 410   return make_number (string_byte_to_char (string, val));
 411 }
 412
 413 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 414   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 415 Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
 416 If third arg START is non-nil, start search at that index in STRING.\n\
 417 For index of first char beyond the match, do (match-end 0).\n\
 418 `match-end' and `match-beginning' also give indices of substrings\n\
 419 matched by parenthesis constructs in the pattern.")
 420   (regexp, string, start)
 421      Lisp_Object regexp, string, start;
 422 {
 423   return string_match_1 (regexp, string, start, 0);
 424 }
 425
 426 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 427   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 428 Find the longest match, in accord with Posix regular expression rules.\n\
 429 Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
 430 If third arg START is non-nil, start search at that index in STRING.\n\
 431 For index of first char beyond the match, do (match-end 0).\n\
 432 `match-end' and `match-beginning' also give indices of substrings\n\
 433 matched by parenthesis constructs in the pattern.")
 434   (regexp, string, start)
 435      Lisp_Object regexp, string, start;
 436 {
 437   return string_match_1 (regexp, string, start, 1);
 438 }
 439
 440 /* Match REGEXP against STRING, searching all of STRING,
 441    and return the index of the match, or negative on failure.
 442    This does not clobber the match data.  */
 443
 444 int
 445 fast_string_match (regexp, string)
 446      Lisp_Object regexp, string;
 447 {
 448   int val;
 449   struct re_pattern_buffer *bufp;
 450
 451   bufp = compile_pattern (regexp, 0, Qnil,
 452                           0, STRING_MULTIBYTE (string));
 453   immediate_quit = 1;
 454   re_match_object = string;
 455
 456   val = re_search (bufp, (char *) XSTRING (string)->data,
 457                    STRING_BYTES (XSTRING (string)), 0,
 458                    STRING_BYTES (XSTRING (string)), 0);
 459   immediate_quit = 0;
 460   return val;
 461 }
 462
 463 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 464    and return the index of the match, or negative on failure.
 465    This does not clobber the match data.
 466    We assume that STRING contains single-byte characters.  */
 467
 468 extern Lisp_Object Vascii_downcase_table;
 469
 470 int
 471 fast_c_string_match_ignore_case (regexp, string)
 472      Lisp_Object regexp;
 473      char *string;
 474 {
 475   int val;
 476   struct re_pattern_buffer *bufp;
 477   int len = strlen (string);
 478
 479   regexp = string_make_unibyte (regexp);
 480   re_match_object = Qt;
 481   bufp = compile_pattern (regexp, 0,
 482                           Vascii_downcase_table, 0,
 483                           0);
 484   immediate_quit = 1;
 485   val = re_search (bufp, string, len, 0, len, 0);
 486   immediate_quit = 0;
 487   return val;
 488 }
 489 \f
 490 /* The newline cache: remembering which sections of text have no newlines.  */
 491
 492 /* If the user has requested newline caching, make sure it's on.
 493    Otherwise, make sure it's off.
 494    This is our cheezy way of associating an action with the change of
 495    state of a buffer-local variable.  */
 496 static void
 497 newline_cache_on_off (buf)
 498      struct buffer *buf;
 499 {
 500   if (NILP (buf->cache_long_line_scans))
 501     {
 502       /* It should be off.  */
 503       if (buf->newline_cache)
 504         {
 505           free_region_cache (buf->newline_cache);
 506           buf->newline_cache = 0;
 507         }
 508     }
 509   else
 510     {
 511       /* It should be on.  */
 512       if (buf->newline_cache == 0)
 513         buf->newline_cache = new_region_cache ();
 514     }
 515 }
 516
 517 \f
 518 /* Search for COUNT instances of the character TARGET between START and END.
 519
 520    If COUNT is positive, search forwards; END must be >= START.
 521    If COUNT is negative, search backwards for the -COUNTth instance;
 522       END must be <= START.
 523    If COUNT is zero, do anything you please; run rogue, for all I care.
 524
 525    If END is zero, use BEGV or ZV instead, as appropriate for the
 526    direction indicated by COUNT.
 527
 528    If we find COUNT instances, set *SHORTAGE to zero, and return the
 529    position after the COUNTth match.  Note that for reverse motion
 530    this is not the same as the usual convention for Emacs motion commands.
 531
 532    If we don't find COUNT instances before reaching END, set *SHORTAGE
 533    to the number of TARGETs left unfound, and return END.
 534
 535    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 536    except when inside redisplay.  */
 537
 538 int
 539 scan_buffer (target, start, end, count, shortage, allow_quit)
 540      register int target;
 541      int start, end;
 542      int count;
 543      int *shortage;
 544      int allow_quit;
 545 {
 546   struct region_cache *newline_cache;
 547   int direction;
 548
 549   if (count > 0)
 550     {
 551       direction = 1;
 552       if (! end) end = ZV;
 553     }
 554   else
 555     {
 556       direction = -1;
 557       if (! end) end = BEGV;
 558     }
 559
 560   newline_cache_on_off (current_buffer);
 561   newline_cache = current_buffer->newline_cache;
 562
 563   if (shortage != 0)
 564     *shortage = 0;
 565
 566   immediate_quit = allow_quit;
 567
 568   if (count > 0)
 569     while (start != end)
 570       {
 571         /* Our innermost scanning loop is very simple; it doesn't know
 572            about gaps, buffer ends, or the newline cache.  ceiling is
 573            the position of the last character before the next such
 574            obstacle --- the last character the dumb search loop should
 575            examine.  */
 576         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 577         int start_byte = CHAR_TO_BYTE (start);
 578         int tem;
 579
 580         /* If we're looking for a newline, consult the newline cache
 581            to see where we can avoid some scanning.  */
 582         if (target == '\n' && newline_cache)
 583           {
 584             int next_change;
 585             immediate_quit = 0;
 586             while (region_cache_forward
 587                    (current_buffer, newline_cache, start_byte, &next_change))
 588               start_byte = next_change;
 589             immediate_quit = allow_quit;
 590
 591             /* START should never be after END.  */
 592             if (start_byte > ceiling_byte)
 593               start_byte = ceiling_byte;
 594
 595             /* Now the text after start is an unknown region, and
 596                next_change is the position of the next known region. */
 597             ceiling_byte = min (next_change - 1, ceiling_byte);
 598           }
 599
 600         /* The dumb loop can only scan text stored in contiguous
 601            bytes. BUFFER_CEILING_OF returns the last character
 602            position that is contiguous, so the ceiling is the
 603            position after that.  */
 604         tem = BUFFER_CEILING_OF (start_byte);
 605         ceiling_byte = min (tem, ceiling_byte);
 606
 607         {
 608           /* The termination address of the dumb loop.  */
 609           register unsigned char *ceiling_addr
 610             = BYTE_POS_ADDR (ceiling_byte) + 1;
 611           register unsigned char *cursor
 612             = BYTE_POS_ADDR (start_byte);
 613           unsigned char *base = cursor;
 614
 615           while (cursor < ceiling_addr)
 616             {
 617               unsigned char *scan_start = cursor;
 618
 619               /* The dumb loop.  */
 620               while (*cursor != target && ++cursor < ceiling_addr)
 621                 ;
 622
 623               /* If we're looking for newlines, cache the fact that
 624                  the region from start to cursor is free of them. */
 625               if (target == '\n' && newline_cache)
 626                 know_region_cache (current_buffer, newline_cache,
 627                                    start_byte + scan_start - base,
 628                                    start_byte + cursor - base);
 629
 630               /* Did we find the target character?  */
 631               if (cursor < ceiling_addr)
 632                 {
 633                   if (--count == 0)
 634                     {
 635                       immediate_quit = 0;
 636                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 637                     }
 638                   cursor++;
 639                 }
 640             }
 641
 642           start = BYTE_TO_CHAR (start_byte + cursor - base);
 643         }
 644       }
 645   else
 646     while (start > end)
 647       {
 648         /* The last character to check before the next obstacle.  */
 649         int ceiling_byte = CHAR_TO_BYTE (end);
 650         int start_byte = CHAR_TO_BYTE (start);
 651         int tem;
 652
 653         /* Consult the newline cache, if appropriate.  */
 654         if (target == '\n' && newline_cache)
 655           {
 656             int next_change;
 657             immediate_quit = 0;
 658             while (region_cache_backward
 659                    (current_buffer, newline_cache, start_byte, &next_change))
 660               start_byte = next_change;
 661             immediate_quit = allow_quit;
 662
 663             /* Start should never be at or before end.  */
 664             if (start_byte <= ceiling_byte)
 665               start_byte = ceiling_byte + 1;
 666
 667             /* Now the text before start is an unknown region, and
 668                next_change is the position of the next known region. */
 669             ceiling_byte = max (next_change, ceiling_byte);
 670           }
 671
 672         /* Stop scanning before the gap.  */
 673         tem = BUFFER_FLOOR_OF (start_byte - 1);
 674         ceiling_byte = max (tem, ceiling_byte);
 675
 676         {
 677           /* The termination address of the dumb loop.  */
 678           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 679           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 680           unsigned char *base = cursor;
 681
 682           while (cursor >= ceiling_addr)
 683             {
 684               unsigned char *scan_start = cursor;
 685
 686               while (*cursor != target && --cursor >= ceiling_addr)
 687                 ;
 688
 689               /* If we're looking for newlines, cache the fact that
 690                  the region from after the cursor to start is free of them.  */
 691               if (target == '\n' && newline_cache)
 692                 know_region_cache (current_buffer, newline_cache,
 693                                    start_byte + cursor - base,
 694                                    start_byte + scan_start - base);
 695
 696               /* Did we find the target character?  */
 697               if (cursor >= ceiling_addr)
 698                 {
 699                   if (++count >= 0)
 700                     {
 701                       immediate_quit = 0;
 702                       return BYTE_TO_CHAR (start_byte + cursor - base);
 703                     }
 704                   cursor--;
 705                 }
 706             }
 707
 708           start = BYTE_TO_CHAR (start_byte + cursor - base);
 709         }
 710       }
 711
 712   immediate_quit = 0;
 713   if (shortage != 0)
 714     *shortage = count * direction;
 715   return start;
 716 }
 717 \f
 718 /* Search for COUNT instances of a line boundary, which means either a
 719    newline or (if selective display enabled) a carriage return.
 720    Start at START.  If COUNT is negative, search backwards.
 721
 722    We report the resulting position by calling TEMP_SET_PT_BOTH.
 723
 724    If we find COUNT instances. we position after (always after,
 725    even if scanning backwards) the COUNTth match, and return 0.
 726
 727    If we don't find COUNT instances before reaching the end of the
 728    buffer (or the beginning, if scanning backwards), we return
 729    the number of line boundaries left unfound, and position at
 730    the limit we bumped up against.
 731
 732    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 733    except in special cases.  */
 734
 735 int
 736 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 737      int start, start_byte;
 738      int limit, limit_byte;
 739      register int count;
 740      int allow_quit;
 741 {
 742   int direction = ((count > 0) ? 1 : -1);
 743
 744   register unsigned char *cursor;
 745   unsigned char *base;
 746
 747   register int ceiling;
 748   register unsigned char *ceiling_addr;
 749
 750   int old_immediate_quit = immediate_quit;
 751
 752   /* If we are not in selective display mode,
 753      check only for newlines.  */
 754   int selective_display = (!NILP (current_buffer->selective_display)
 755                            && !INTEGERP (current_buffer->selective_display));
 756
 757   /* The code that follows is like scan_buffer
 758      but checks for either newline or carriage return.  */
 759
 760   if (allow_quit)
 761     immediate_quit++;
 762
 763   start_byte = CHAR_TO_BYTE (start);
 764
 765   if (count > 0)
 766     {
 767       while (start_byte < limit_byte)
 768         {
 769           ceiling =  BUFFER_CEILING_OF (start_byte);
 770           ceiling = min (limit_byte - 1, ceiling);
 771           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 772           base = (cursor = BYTE_POS_ADDR (start_byte));
 773           while (1)
 774             {
 775               while (*cursor != '\n' && ++cursor != ceiling_addr)
 776                 ;
 777
 778               if (cursor != ceiling_addr)
 779                 {
 780                   if (--count == 0)
 781                     {
 782                       immediate_quit = old_immediate_quit;
 783                       start_byte = start_byte + cursor - base + 1;
 784                       start = BYTE_TO_CHAR (start_byte);
 785                       TEMP_SET_PT_BOTH (start, start_byte);
 786                       return 0;
 787                     }
 788                   else
 789                     if (++cursor == ceiling_addr)
 790                       break;
 791                 }
 792               else
 793                 break;
 794             }
 795           start_byte += cursor - base;
 796         }
 797     }
 798   else
 799     {
 800       while (start_byte > limit_byte)
 801         {
 802           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 803           ceiling = max (limit_byte, ceiling);
 804           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 805           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 806           while (1)
 807             {
 808               while (--cursor != ceiling_addr && *cursor != '\n')
 809                 ;
 810
 811               if (cursor != ceiling_addr)
 812                 {
 813                   if (++count == 0)
 814                     {
 815                       immediate_quit = old_immediate_quit;
 816                       /* Return the position AFTER the match we found.  */
 817                       start_byte = start_byte + cursor - base + 1;
 818                       start = BYTE_TO_CHAR (start_byte);
 819                       TEMP_SET_PT_BOTH (start, start_byte);
 820                       return 0;
 821                     }
 822                 }
 823               else
 824                 break;
 825             }
 826           /* Here we add 1 to compensate for the last decrement
 827              of CURSOR, which took it past the valid range.  */
 828           start_byte += cursor - base + 1;
 829         }
 830     }
 831
 832   TEMP_SET_PT_BOTH (limit, limit_byte);
 833   immediate_quit = old_immediate_quit;
 834
 835   return count * direction;
 836 }
 837
 838 int
 839 find_next_newline_no_quit (from, cnt)
 840      register int from, cnt;
 841 {
 842   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 843 }
 844
 845 /* Like find_next_newline, but returns position before the newline,
 846    not after, and only search up to TO.  This isn't just
 847    find_next_newline (...)-1, because you might hit TO.  */
 848
 849 int
 850 find_before_next_newline (from, to, cnt)
 851      int from, to, cnt;
 852 {
 853   int shortage;
 854   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 855
 856   if (shortage == 0)
 857     pos--;
 858
 859   return pos;
 860 }
 861 \f
 862 /* Subroutines of Lisp buffer search functions. */
 863
 864 static Lisp_Object
 865 search_command (string, bound, noerror, count, direction, RE, posix)
 866      Lisp_Object string, bound, noerror, count;
 867      int direction;
 868      int RE;
 869      int posix;
 870 {
 871   register int np;
 872   int lim, lim_byte;
 873   int n = direction;
 874
 875   if (!NILP (count))
 876     {
 877       CHECK_NUMBER (count, 3);
 878       n *= XINT (count);
 879     }
 880
 881   CHECK_STRING (string, 0);
 882   if (NILP (bound))
 883     {
 884       if (n > 0)
 885         lim = ZV, lim_byte = ZV_BYTE;
 886       else
 887         lim = BEGV, lim_byte = BEGV_BYTE;
 888     }
 889   else
 890     {
 891       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 892       lim = XINT (bound);
 893       if (n > 0 ? lim < PT : lim > PT)
 894         error ("Invalid search bound (wrong side of point)");
 895       if (lim > ZV)
 896         lim = ZV, lim_byte = ZV_BYTE;
 897       else if (lim < BEGV)
 898         lim = BEGV, lim_byte = BEGV_BYTE;
 899       else
 900         lim_byte = CHAR_TO_BYTE (lim);
 901     }
 902
 903   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 904                       (!NILP (current_buffer->case_fold_search)
 905                        ? current_buffer->case_canon_table
 906                        : Qnil),
 907                       (!NILP (current_buffer->case_fold_search)
 908                        ? current_buffer->case_eqv_table
 909                        : Qnil),
 910                       posix);
 911   if (np <= 0)
 912     {
 913       if (NILP (noerror))
 914         return signal_failure (string);
 915       if (!EQ (noerror, Qt))
 916         {
 917           if (lim < BEGV || lim > ZV)
 918             abort ();
 919           SET_PT_BOTH (lim, lim_byte);
 920           return Qnil;
 921 #if 0 /* This would be clean, but maybe programs depend on
 922          a value of nil here.  */
 923           np = lim;
 924 #endif
 925         }
 926       else
 927         return Qnil;
 928     }
 929
 930   if (np < BEGV || np > ZV)
 931     abort ();
 932
 933   SET_PT (np);
 934
 935   return make_number (np);
 936 }
 937 \f
 938 /* Return 1 if REGEXP it matches just one constant string.  */
 939
 940 static int
 941 trivial_regexp_p (regexp)
 942      Lisp_Object regexp;
 943 {
 944   int len = STRING_BYTES (XSTRING (regexp));
 945   unsigned char *s = XSTRING (regexp)->data;
 946   unsigned char c;
 947   while (--len >= 0)
 948     {
 949       switch (*s++)
 950         {
 951         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 952           return 0;
 953         case '\\':
 954           if (--len < 0)
 955             return 0;
 956           switch (*s++)
 957             {
 958             case '|': case '(': case ')': case '`': case '\'': case 'b':
 959             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 960             case 'S': case '=':
 961             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 962             case '1': case '2': case '3': case '4': case '5':
 963             case '6': case '7': case '8': case '9':
 964               return 0;
 965             }
 966         }
 967     }
 968   return 1;
 969 }
 970
 971 /* Search for the n'th occurrence of STRING in the current buffer,
 972    starting at position POS and stopping at position LIM,
 973    treating STRING as a literal string if RE is false or as
 974    a regular expression if RE is true.
 975
 976    If N is positive, searching is forward and LIM must be greater than POS.
 977    If N is negative, searching is backward and LIM must be less than POS.
 978
 979    Returns -x if x occurrences remain to be found (x > 0),
 980    or else the position at the beginning of the Nth occurrence
 981    (if searching backward) or the end (if searching forward).
 982
 983    POSIX is nonzero if we want full backtracking (POSIX style)
 984    for this pattern.  0 means backtrack only enough to get a valid match.  */
 985
 986 #define TRANSLATE(out, trt, d)                  \
 987 do                                              \
 988   {                                             \
 989     if (! NILP (trt))                           \
 990       {                                         \
 991         Lisp_Object temp;                       \
 992         temp = Faref (trt, make_number (d));    \
 993         if (INTEGERP (temp))                    \
 994           out = XINT (temp);                    \
 995         else                                    \
 996           out = d;                              \
 997       }                                         \
 998     else                                        \
 999       out = d;                                  \
1000   }                                             \
1001 while (0)
1002
1003 static int
1004 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1005                RE, trt, inverse_trt, posix)
1006      Lisp_Object string;
1007      int pos;
1008      int pos_byte;
1009      int lim;
1010      int lim_byte;
1011      int n;
1012      int RE;
1013      Lisp_Object trt;
1014      Lisp_Object inverse_trt;
1015      int posix;
1016 {
1017   int len = XSTRING (string)->size;
1018   int len_byte = STRING_BYTES (XSTRING (string));
1019   register int i;
1020
1021   if (running_asynch_code)
1022     save_search_regs ();
1023
1024   /* Searching 0 times means don't move.  */
1025   /* Null string is found at starting position.  */
1026   if (len == 0 || n == 0)
1027     {
1028       set_search_regs (pos, 0);
1029       return pos;
1030     }
1031
1032   if (RE && !trivial_regexp_p (string))
1033     {
1034       unsigned char *p1, *p2;
1035       int s1, s2;
1036       struct re_pattern_buffer *bufp;
1037
1038       bufp = compile_pattern (string, &search_regs, trt, posix,
1039                               !NILP (current_buffer->enable_multibyte_characters));
1040
1041       immediate_quit = 1;       /* Quit immediately if user types ^G,
1042                                    because letting this function finish
1043                                    can take too long. */
1044       QUIT;                     /* Do a pending quit right away,
1045                                    to avoid paradoxical behavior */
1046       /* Get pointers and sizes of the two strings
1047          that make up the visible portion of the buffer. */
1048
1049       p1 = BEGV_ADDR;
1050       s1 = GPT_BYTE - BEGV_BYTE;
1051       p2 = GAP_END_ADDR;
1052       s2 = ZV_BYTE - GPT_BYTE;
1053       if (s1 < 0)
1054         {
1055           p2 = p1;
1056           s2 = ZV_BYTE - BEGV_BYTE;
1057           s1 = 0;
1058         }
1059       if (s2 < 0)
1060         {
1061           s1 = ZV_BYTE - BEGV_BYTE;
1062           s2 = 0;
1063         }
1064       re_match_object = Qnil;
1065
1066       while (n < 0)
1067         {
1068           int val;
1069           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1070                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1071                              &search_regs,
1072                              /* Don't allow match past current point */
1073                              pos_byte - BEGV_BYTE);
1074           if (val == -2)
1075             {
1076               matcher_overflow ();
1077             }
1078           if (val >= 0)
1079             {
1080               pos_byte = search_regs.start[0] + BEGV_BYTE;
1081               for (i = 0; i < search_regs.num_regs; i++)
1082                 if (search_regs.start[i] >= 0)
1083                   {
1084                     search_regs.start[i]
1085                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1086                     search_regs.end[i]
1087                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1088                   }
1089               XSETBUFFER (last_thing_searched, current_buffer);
1090               /* Set pos to the new position. */
1091               pos = search_regs.start[0];
1092             }
1093           else
1094             {
1095               immediate_quit = 0;
1096               return (n);
1097             }
1098           n++;
1099         }
1100       while (n > 0)
1101         {
1102           int val;
1103           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1104                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1105                              &search_regs,
1106                              lim_byte - BEGV_BYTE);
1107           if (val == -2)
1108             {
1109               matcher_overflow ();
1110             }
1111           if (val >= 0)
1112             {
1113               pos_byte = search_regs.end[0] + BEGV_BYTE;
1114               for (i = 0; i < search_regs.num_regs; i++)
1115                 if (search_regs.start[i] >= 0)
1116                   {
1117                     search_regs.start[i]
1118                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1119                     search_regs.end[i]
1120                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1121                   }
1122               XSETBUFFER (last_thing_searched, current_buffer);
1123               pos = search_regs.end[0];
1124             }
1125           else
1126             {
1127               immediate_quit = 0;
1128               return (0 - n);
1129             }
1130           n--;
1131         }
1132       immediate_quit = 0;
1133       return (pos);
1134     }
1135   else                          /* non-RE case */
1136     {
1137       unsigned char *raw_pattern, *pat;
1138       int raw_pattern_size;
1139       int raw_pattern_size_byte;
1140       unsigned char *patbuf;
1141       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1142       unsigned char *base_pat = XSTRING (string)->data;
1143       int charset_base = -1;
1144       int boyer_moore_ok = 1;
1145
1146       /* MULTIBYTE says whether the text to be searched is multibyte.
1147          We must convert PATTERN to match that, or we will not really
1148          find things right.  */
1149
1150       if (multibyte == STRING_MULTIBYTE (string))
1151         {
1152           raw_pattern = (unsigned char *) XSTRING (string)->data;
1153           raw_pattern_size = XSTRING (string)->size;
1154           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1155         }
1156       else if (multibyte)
1157         {
1158           raw_pattern_size = XSTRING (string)->size;
1159           raw_pattern_size_byte
1160             = count_size_as_multibyte (XSTRING (string)->data,
1161                                        raw_pattern_size);
1162           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1163           copy_text (XSTRING (string)->data, raw_pattern,
1164                      XSTRING (string)->size, 0, 1);
1165         }
1166       else
1167         {
1168           /* Converting multibyte to single-byte.
1169
1170              ??? Perhaps this conversion should be done in a special way
1171              by subtracting nonascii-insert-offset from each non-ASCII char,
1172              so that only the multibyte chars which really correspond to
1173              the chosen single-byte character set can possibly match.  */
1174           raw_pattern_size = XSTRING (string)->size;
1175           raw_pattern_size_byte = XSTRING (string)->size;
1176           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1177           copy_text (XSTRING (string)->data, raw_pattern,
1178                      STRING_BYTES (XSTRING (string)), 1, 0);
1179         }
1180
1181       /* Copy and optionally translate the pattern.  */
1182       len = raw_pattern_size;
1183       len_byte = raw_pattern_size_byte;
1184       patbuf = (unsigned char *) alloca (len_byte);
1185       pat = patbuf;
1186       base_pat = raw_pattern;
1187       if (multibyte)
1188         {
1189           while (--len >= 0)
1190             {
1191               unsigned char workbuf[4], *str;
1192               int c, translated, inverse;
1193               int in_charlen, charlen;
1194
1195               /* If we got here and the RE flag is set, it's because we're
1196                  dealing with a regexp known to be trivial, so the backslash
1197                  just quotes the next character.  */
1198               if (RE && *base_pat == '\\')
1199                 {
1200                   len--;
1201                   len_byte--;
1202                   base_pat++;
1203                 }
1204
1205               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1206
1207               /* Translate the character, if requested.  */
1208               TRANSLATE (translated, trt, c);
1209               /* If translation changed the byte-length, go back
1210                  to the original character.  */
1211               charlen = CHAR_STRING (translated, workbuf, str);
1212               if (in_charlen != charlen)
1213                 {
1214                   translated = c;
1215                   charlen = CHAR_STRING (c, workbuf, str);
1216                 }
1217
1218               /* If we are searching for something strange,
1219                  an invalid multibyte code, don't use boyer-moore.  */
1220               if (! ASCII_BYTE_P (translated)
1221                   && (charlen == 1 /* 8bit code */
1222                       || charlen != in_charlen /* invalid multibyte code */
1223                       ))
1224                 boyer_moore_ok = 0;
1225
1226               TRANSLATE (inverse, inverse_trt, c);
1227
1228               /* Did this char actually get translated?
1229                  Would any other char get translated into it?  */
1230               if (translated != c || inverse != c)
1231                 {
1232                   /* Keep track of which character set row
1233                      contains the characters that need translation.  */
1234                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1235                   if (charset_base == -1)
1236                     charset_base = charset_base_code;
1237                   else if (charset_base != charset_base_code)
1238                     /* If two different rows appear, needing translation,
1239                        then we cannot use boyer_moore search.  */
1240                     boyer_moore_ok = 0;
1241                     /* ??? Handa: this must do boyer_moore_ok = 0
1242                        if c is a composite character.  */
1243                 }
1244
1245               /* Store this character into the translated pattern.  */
1246               bcopy (str, pat, charlen);
1247               pat += charlen;
1248               base_pat += in_charlen;
1249               len_byte -= in_charlen;
1250             }
1251         }
1252       else
1253         {
1254           /* Unibyte buffer.  */
1255           charset_base = 0;
1256           while (--len >= 0)
1257             {
1258               int c, translated;
1259
1260               /* If we got here and the RE flag is set, it's because we're
1261                  dealing with a regexp known to be trivial, so the backslash
1262                  just quotes the next character.  */
1263               if (RE && *base_pat == '\\')
1264                 {
1265                   len--;
1266                   base_pat++;
1267                 }
1268               c = *base_pat++;
1269               TRANSLATE (translated, trt, c);
1270               *pat++ = translated;
1271             }
1272         }
1273
1274       len_byte = pat - patbuf;
1275       len = raw_pattern_size;
1276       pat = base_pat = patbuf;
1277
1278       if (boyer_moore_ok)
1279         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1280                             pos, pos_byte, lim, lim_byte,
1281                             charset_base);
1282       else
1283         return simple_search (n, pat, len, len_byte, trt,
1284                               pos, pos_byte, lim, lim_byte);
1285     }
1286 }
1287 \f
1288 /* Do a simple string search N times for the string PAT,
1289    whose length is LEN/LEN_BYTE,
1290    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1291    TRT is the translation table.
1292
1293    Return the character position where the match is found.
1294    Otherwise, if M matches remained to be found, return -M.
1295
1296    This kind of search works regardless of what is in PAT and
1297    regardless of what is in TRT.  It is used in cases where
1298    boyer_moore cannot work.  */
1299
1300 static int
1301 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1302      int n;
1303      unsigned char *pat;
1304      int len, len_byte;
1305      Lisp_Object trt;
1306      int pos, pos_byte;
1307      int lim, lim_byte;
1308 {
1309   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1310   int forward = n > 0;
1311
1312   if (lim > pos && multibyte)
1313     while (n > 0)
1314       {
1315         while (1)
1316           {
1317             /* Try matching at position POS.  */
1318             int this_pos = pos;
1319             int this_pos_byte = pos_byte;
1320             int this_len = len;
1321             int this_len_byte = len_byte;
1322             unsigned char *p = pat;
1323             if (pos + len > lim)
1324               goto stop;
1325
1326             while (this_len > 0)
1327               {
1328                 int charlen, buf_charlen;
1329                 int pat_ch, buf_ch;
1330
1331                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1332                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1333                                                  ZV_BYTE - this_pos_byte,
1334                                                  buf_charlen);
1335                 TRANSLATE (buf_ch, trt, buf_ch);
1336
1337                 if (buf_ch != pat_ch)
1338                   break;
1339
1340                 this_len_byte -= charlen;
1341                 this_len--;
1342                 p += charlen;
1343
1344                 this_pos_byte += buf_charlen;
1345                 this_pos++;
1346               }
1347
1348             if (this_len == 0)
1349               {
1350                 pos += len;
1351                 pos_byte += len_byte;
1352                 break;
1353               }
1354
1355             INC_BOTH (pos, pos_byte);
1356           }
1357
1358         n--;
1359       }
1360   else if (lim > pos)
1361     while (n > 0)
1362       {
1363         while (1)
1364           {
1365             /* Try matching at position POS.  */
1366             int this_pos = pos;
1367             int this_len = len;
1368             unsigned char *p = pat;
1369
1370             if (pos + len > lim)
1371               goto stop;
1372
1373             while (this_len > 0)
1374               {
1375                 int pat_ch = *p++;
1376                 int buf_ch = FETCH_BYTE (this_pos);
1377                 TRANSLATE (buf_ch, trt, buf_ch);
1378
1379                 if (buf_ch != pat_ch)
1380                   break;
1381
1382                 this_len--;
1383                 this_pos++;
1384               }
1385
1386             if (this_len == 0)
1387               {
1388                 pos += len;
1389                 break;
1390               }
1391
1392             pos++;
1393           }
1394
1395         n--;
1396       }
1397   /* Backwards search.  */
1398   else if (lim < pos && multibyte)
1399     while (n < 0)
1400       {
1401         while (1)
1402           {
1403             /* Try matching at position POS.  */
1404             int this_pos = pos - len;
1405             int this_pos_byte = pos_byte - len_byte;
1406             int this_len = len;
1407             int this_len_byte = len_byte;
1408             unsigned char *p = pat;
1409
1410             if (pos - len < lim)
1411               goto stop;
1412
1413             while (this_len > 0)
1414               {
1415                 int charlen, buf_charlen;
1416                 int pat_ch, buf_ch;
1417
1418                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1419                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1420                                                  ZV_BYTE - this_pos_byte,
1421                                                  buf_charlen);
1422                 TRANSLATE (buf_ch, trt, buf_ch);
1423
1424                 if (buf_ch != pat_ch)
1425                   break;
1426
1427                 this_len_byte -= charlen;
1428                 this_len--;
1429                 p += charlen;
1430                 this_pos_byte += buf_charlen;
1431                 this_pos++;
1432               }
1433
1434             if (this_len == 0)
1435               {
1436                 pos -= len;
1437                 pos_byte -= len_byte;
1438                 break;
1439               }
1440
1441             DEC_BOTH (pos, pos_byte);
1442           }
1443
1444         n++;
1445       }
1446   else if (lim < pos)
1447     while (n < 0)
1448       {
1449         while (1)
1450           {
1451             /* Try matching at position POS.  */
1452             int this_pos = pos - len;
1453             int this_len = len;
1454             unsigned char *p = pat;
1455
1456             if (pos - len < lim)
1457               goto stop;
1458
1459             while (this_len > 0)
1460               {
1461                 int pat_ch = *p++;
1462                 int buf_ch = FETCH_BYTE (this_pos);
1463                 TRANSLATE (buf_ch, trt, buf_ch);
1464
1465                 if (buf_ch != pat_ch)
1466                   break;
1467                 this_len--;
1468                 this_pos++;
1469               }
1470
1471             if (this_len == 0)
1472               {
1473                 pos -= len;
1474                 break;
1475               }
1476
1477             pos--;
1478           }
1479
1480         n++;
1481       }
1482
1483  stop:
1484   if (n == 0)
1485     {
1486       if (forward)
1487         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1488       else
1489         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1490
1491       return pos;
1492     }
1493   else if (n > 0)
1494     return -n;
1495   else
1496     return n;
1497 }
1498 \f
1499 /* Do Boyer-Moore search N times for the string PAT,
1500    whose length is LEN/LEN_BYTE,
1501    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1502    DIRECTION says which direction we search in.
1503    TRT and INVERSE_TRT are translation tables.
1504
1505    This kind of search works if all the characters in PAT that have
1506    nontrivial translation are the same aside from the last byte.  This
1507    makes it possible to translate just the last byte of a character,
1508    and do so after just a simple test of the context.
1509
1510    If that criterion is not satisfied, do not call this function.  */
1511
1512 static int
1513 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1514              pos, pos_byte, lim, lim_byte, charset_base)
1515      int n;
1516      unsigned char *base_pat;
1517      int len, len_byte;
1518      Lisp_Object trt;
1519      Lisp_Object inverse_trt;
1520      int pos, pos_byte;
1521      int lim, lim_byte;
1522      int charset_base;
1523 {
1524   int direction = ((n > 0) ? 1 : -1);
1525   register int dirlen;
1526   int infinity, limit, k, stride_for_teases;
1527   register int *BM_tab;
1528   int *BM_tab_base;
1529   register unsigned char *cursor, *p_limit;
1530   register int i, j;
1531   unsigned char *pat, *pat_end;
1532   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1533
1534   unsigned char simple_translate[0400];
1535   int translate_prev_byte;
1536   int translate_anteprev_byte;
1537
1538 #ifdef C_ALLOCA
1539   int BM_tab_space[0400];
1540   BM_tab = &BM_tab_space[0];
1541 #else
1542   BM_tab = (int *) alloca (0400 * sizeof (int));
1543 #endif
1544   /* The general approach is that we are going to maintain that we know */
1545   /* the first (closest to the present position, in whatever direction */
1546   /* we're searching) character that could possibly be the last */
1547   /* (furthest from present position) character of a valid match.  We */
1548   /* advance the state of our knowledge by looking at that character */
1549   /* and seeing whether it indeed matches the last character of the */
1550   /* pattern.  If it does, we take a closer look.  If it does not, we */
1551   /* move our pointer (to putative last characters) as far as is */
1552   /* logically possible.  This amount of movement, which I call a */
1553   /* stride, will be the length of the pattern if the actual character */
1554   /* appears nowhere in the pattern, otherwise it will be the distance */
1555   /* from the last occurrence of that character to the end of the */
1556   /* pattern. */
1557   /* As a coding trick, an enormous stride is coded into the table for */
1558   /* characters that match the last character.  This allows use of only */
1559   /* a single test, a test for having gone past the end of the */
1560   /* permissible match region, to test for both possible matches (when */
1561   /* the stride goes past the end immediately) and failure to */
1562   /* match (where you get nudged past the end one stride at a time). */
1563
1564   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1565   /* is determined only by the last character of the putative match. */
1566   /* If that character does not match, we will stride the proper */
1567   /* distance to propose a match that superimposes it on the last */
1568   /* instance of a character that matches it (per trt), or misses */
1569   /* it entirely if there is none. */
1570
1571   dirlen = len_byte * direction;
1572   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1573
1574   /* Record position after the end of the pattern.  */
1575   pat_end = base_pat + len_byte;
1576   /* BASE_PAT points to a character that we start scanning from.
1577      It is the first character in a forward search,
1578      the last character in a backward search.  */
1579   if (direction < 0)
1580     base_pat = pat_end - 1;
1581
1582   BM_tab_base = BM_tab;
1583   BM_tab += 0400;
1584   j = dirlen;           /* to get it in a register */
1585   /* A character that does not appear in the pattern induces a */
1586   /* stride equal to the pattern length. */
1587   while (BM_tab_base != BM_tab)
1588     {
1589       *--BM_tab = j;
1590       *--BM_tab = j;
1591       *--BM_tab = j;
1592       *--BM_tab = j;
1593     }
1594
1595   /* We use this for translation, instead of TRT itself.
1596      We fill this in to handle the characters that actually
1597      occur in the pattern.  Others don't matter anyway!  */
1598   bzero (simple_translate, sizeof simple_translate);
1599   for (i = 0; i < 0400; i++)
1600     simple_translate[i] = i;
1601
1602   i = 0;
1603   while (i != infinity)
1604     {
1605       unsigned char *ptr = base_pat + i;
1606       i += direction;
1607       if (i == dirlen)
1608         i = infinity;
1609       if (! NILP (trt))
1610         {
1611           int ch;
1612           int untranslated;
1613           int this_translated = 1;
1614
1615           if (multibyte
1616               /* Is *PTR the last byte of a character?  */
1617               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1618             {
1619               unsigned char *charstart = ptr;
1620               while (! CHAR_HEAD_P (*charstart))
1621                 charstart--;
1622               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1623               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1624                 {
1625                   TRANSLATE (ch, trt, untranslated);
1626                   if (! CHAR_HEAD_P (*ptr))
1627                     {
1628                       translate_prev_byte = ptr[-1];
1629                       if (! CHAR_HEAD_P (translate_prev_byte))
1630                         translate_anteprev_byte = ptr[-2];
1631                     }
1632                 }
1633               else
1634                 {
1635                   this_translated = 0;
1636                   ch = *ptr;
1637                 }
1638             }
1639           else if (!multibyte)
1640             TRANSLATE (ch, trt, *ptr);
1641           else
1642             {
1643               ch = *ptr;
1644               this_translated = 0;
1645             }
1646
1647           if (ch > 0400)
1648             j = ((unsigned char) ch) | 0200;
1649           else
1650             j = (unsigned char) ch;
1651
1652           if (i == infinity)
1653             stride_for_teases = BM_tab[j];
1654
1655           BM_tab[j] = dirlen - i;
1656           /* A translation table is accompanied by its inverse -- see */
1657           /* comment following downcase_table for details */
1658           if (this_translated)
1659             {
1660               int starting_ch = ch;
1661               int starting_j = j;
1662               while (1)
1663                 {
1664                   TRANSLATE (ch, inverse_trt, ch);
1665                   if (ch > 0400)
1666                     j = ((unsigned char) ch) | 0200;
1667                   else
1668                     j = (unsigned char) ch;
1669
1670                   /* For all the characters that map into CH,
1671                      set up simple_translate to map the last byte
1672                      into STARTING_J.  */
1673                   simple_translate[j] = starting_j;
1674                   if (ch == starting_ch)
1675                     break;
1676                   BM_tab[j] = dirlen - i;
1677                 }
1678             }
1679         }
1680       else
1681         {
1682           j = *ptr;
1683
1684           if (i == infinity)
1685             stride_for_teases = BM_tab[j];
1686           BM_tab[j] = dirlen - i;
1687         }
1688       /* stride_for_teases tells how much to stride if we get a */
1689       /* match on the far character but are subsequently */
1690       /* disappointed, by recording what the stride would have been */
1691       /* for that character if the last character had been */
1692       /* different. */
1693     }
1694   infinity = dirlen - infinity;
1695   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1696   /* loop invariant - POS_BYTE points at where last char (first
1697      char if reverse) of pattern would align in a possible match.  */
1698   while (n != 0)
1699     {
1700       int tail_end;
1701       unsigned char *tail_end_ptr;
1702
1703       /* It's been reported that some (broken) compiler thinks that
1704          Boolean expressions in an arithmetic context are unsigned.
1705          Using an explicit ?1:0 prevents this.  */
1706       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1707           < 0)
1708         return (n * (0 - direction));
1709       /* First we do the part we can by pointers (maybe nothing) */
1710       QUIT;
1711       pat = base_pat;
1712       limit = pos_byte - dirlen + direction;
1713       if (direction > 0)
1714         {
1715           limit = BUFFER_CEILING_OF (limit);
1716           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1717              can take on without hitting edge of buffer or the gap.  */
1718           limit = min (limit, pos_byte + 20000);
1719           limit = min (limit, lim_byte - 1);
1720         }
1721       else
1722         {
1723           limit = BUFFER_FLOOR_OF (limit);
1724           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1725              can take on without hitting edge of buffer or the gap.  */
1726           limit = max (limit, pos_byte - 20000);
1727           limit = max (limit, lim_byte);
1728         }
1729       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1730       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1731
1732       if ((limit - pos_byte) * direction > 20)
1733         {
1734           unsigned char *p2;
1735
1736           p_limit = BYTE_POS_ADDR (limit);
1737           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1738           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1739           while (1)             /* use one cursor setting as long as i can */
1740             {
1741               if (direction > 0) /* worth duplicating */
1742                 {
1743                   /* Use signed comparison if appropriate
1744                      to make cursor+infinity sure to be > p_limit.
1745                      Assuming that the buffer lies in a range of addresses
1746                      that are all "positive" (as ints) or all "negative",
1747                      either kind of comparison will work as long
1748                      as we don't step by infinity.  So pick the kind
1749                      that works when we do step by infinity.  */
1750                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1751                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1752                       cursor += BM_tab[*cursor];
1753                   else
1754                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1755                       cursor += BM_tab[*cursor];
1756                 }
1757               else
1758                 {
1759                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1760                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1761                       cursor += BM_tab[*cursor];
1762                   else
1763                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1764                       cursor += BM_tab[*cursor];
1765                 }
1766 /* If you are here, cursor is beyond the end of the searched region. */
1767 /* This can happen if you match on the far character of the pattern, */
1768 /* because the "stride" of that character is infinity, a number able */
1769 /* to throw you well beyond the end of the search.  It can also */
1770 /* happen if you fail to match within the permitted region and would */
1771 /* otherwise try a character beyond that region */
1772               if ((cursor - p_limit) * direction <= len_byte)
1773                 break;  /* a small overrun is genuine */
1774               cursor -= infinity; /* large overrun = hit */
1775               i = dirlen - direction;
1776               if (! NILP (trt))
1777                 {
1778                   while ((i -= direction) + direction != 0)
1779                     {
1780                       int ch;
1781                       cursor -= direction;
1782                       /* Translate only the last byte of a character.  */
1783                       if (! multibyte
1784                           || ((cursor == tail_end_ptr
1785                                || CHAR_HEAD_P (cursor[1]))
1786                               && (CHAR_HEAD_P (cursor[0])
1787                                   || (translate_prev_byte == cursor[-1]
1788                                       && (CHAR_HEAD_P (translate_prev_byte)
1789                                           || translate_anteprev_byte == cursor[-2])))))
1790                         ch = simple_translate[*cursor];
1791                       else
1792                         ch = *cursor;
1793                       if (pat[i] != ch)
1794                         break;
1795                     }
1796                 }
1797               else
1798                 {
1799                   while ((i -= direction) + direction != 0)
1800                     {
1801                       cursor -= direction;
1802                       if (pat[i] != *cursor)
1803                         break;
1804                     }
1805                 }
1806               cursor += dirlen - i - direction; /* fix cursor */
1807               if (i + direction == 0)
1808                 {
1809                   int position;
1810
1811                   cursor -= direction;
1812
1813                   position = pos_byte + cursor - p2 + ((direction > 0)
1814                                                        ? 1 - len_byte : 0);
1815                   set_search_regs (position, len_byte);
1816
1817                   if ((n -= direction) != 0)
1818                     cursor += dirlen; /* to resume search */
1819                   else
1820                     return ((direction > 0)
1821                             ? search_regs.end[0] : search_regs.start[0]);
1822                 }
1823               else
1824                 cursor += stride_for_teases; /* <sigh> we lose -  */
1825             }
1826           pos_byte += cursor - p2;
1827         }
1828       else
1829         /* Now we'll pick up a clump that has to be done the hard */
1830         /* way because it covers a discontinuity */
1831         {
1832           limit = ((direction > 0)
1833                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1834                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1835           limit = ((direction > 0)
1836                    ? min (limit + len_byte, lim_byte - 1)
1837                    : max (limit - len_byte, lim_byte));
1838           /* LIMIT is now the last value POS_BYTE can have
1839              and still be valid for a possible match.  */
1840           while (1)
1841             {
1842               /* This loop can be coded for space rather than */
1843               /* speed because it will usually run only once. */
1844               /* (the reach is at most len + 21, and typically */
1845               /* does not exceed len) */
1846               while ((limit - pos_byte) * direction >= 0)
1847                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1848               /* now run the same tests to distinguish going off the */
1849               /* end, a match or a phony match. */
1850               if ((pos_byte - limit) * direction <= len_byte)
1851                 break;  /* ran off the end */
1852               /* Found what might be a match.
1853                  Set POS_BYTE back to last (first if reverse) pos.  */
1854               pos_byte -= infinity;
1855               i = dirlen - direction;
1856               while ((i -= direction) + direction != 0)
1857                 {
1858                   int ch;
1859                   unsigned char *ptr;
1860                   pos_byte -= direction;
1861                   ptr = BYTE_POS_ADDR (pos_byte);
1862                   /* Translate only the last byte of a character.  */
1863                   if (! multibyte
1864                       || ((ptr == tail_end_ptr
1865                            || CHAR_HEAD_P (ptr[1]))
1866                           && (CHAR_HEAD_P (ptr[0])
1867                               || (translate_prev_byte == ptr[-1]
1868                                   && (CHAR_HEAD_P (translate_prev_byte)
1869                                       || translate_anteprev_byte == ptr[-2])))))
1870                     ch = simple_translate[*ptr];
1871                   else
1872                     ch = *ptr;
1873                   if (pat[i] != ch)
1874                     break;
1875                 }
1876               /* Above loop has moved POS_BYTE part or all the way
1877                  back to the first pos (last pos if reverse).
1878                  Set it once again at the last (first if reverse) char.  */
1879               pos_byte += dirlen - i- direction;
1880               if (i + direction == 0)
1881                 {
1882                   int position;
1883                   pos_byte -= direction;
1884
1885                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1886
1887                   set_search_regs (position, len_byte);
1888
1889                   if ((n -= direction) != 0)
1890                     pos_byte += dirlen; /* to resume search */
1891                   else
1892                     return ((direction > 0)
1893                             ? search_regs.end[0] : search_regs.start[0]);
1894                 }
1895               else
1896                 pos_byte += stride_for_teases;
1897             }
1898           }
1899       /* We have done one clump.  Can we continue? */
1900       if ((lim_byte - pos_byte) * direction < 0)
1901         return ((0 - n) * direction);
1902     }
1903   return BYTE_TO_CHAR (pos_byte);
1904 }
1905
1906 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1907    for the overall match just found in the current buffer.
1908    Also clear out the match data for registers 1 and up.  */
1909
1910 static void
1911 set_search_regs (beg_byte, nbytes)
1912      int beg_byte, nbytes;
1913 {
1914   int i;
1915
1916   /* Make sure we have registers in which to store
1917      the match position.  */
1918   if (search_regs.num_regs == 0)
1919     {
1920       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1921       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1922       search_regs.num_regs = 2;
1923     }
1924
1925   /* Clear out the other registers.  */
1926   for (i = 1; i < search_regs.num_regs; i++)
1927     {
1928       search_regs.start[i] = -1;
1929       search_regs.end[i] = -1;
1930     }
1931
1932   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1933   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1934   XSETBUFFER (last_thing_searched, current_buffer);
1935 }
1936 \f
1937 /* Given a string of words separated by word delimiters,
1938   compute a regexp that matches those exact words
1939   separated by arbitrary punctuation.  */
1940
1941 static Lisp_Object
1942 wordify (string)
1943      Lisp_Object string;
1944 {
1945   register unsigned char *p, *o;
1946   register int i, i_byte, len, punct_count = 0, word_count = 0;
1947   Lisp_Object val;
1948   int prev_c = 0;
1949   int adjust;
1950
1951   CHECK_STRING (string, 0);
1952   p = XSTRING (string)->data;
1953   len = XSTRING (string)->size;
1954
1955   for (i = 0, i_byte = 0; i < len; )
1956     {
1957       int c;
1958
1959       if (STRING_MULTIBYTE (string))
1960         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1961       else
1962         c = XSTRING (string)->data[i++];
1963
1964       if (SYNTAX (c) != Sword)
1965         {
1966           punct_count++;
1967           if (i > 0 && SYNTAX (prev_c) == Sword)
1968             word_count++;
1969         }
1970
1971       prev_c = c;
1972     }
1973
1974   if (SYNTAX (prev_c) == Sword)
1975     word_count++;
1976   if (!word_count)
1977     return build_string ("");
1978
1979   adjust = - punct_count + 5 * (word_count - 1) + 4;
1980   if (STRING_MULTIBYTE (string))
1981     val = make_uninit_multibyte_string (len + adjust,
1982                                         STRING_BYTES (XSTRING (string))
1983                                         + adjust);
1984   else
1985     val = make_uninit_string (len + adjust);
1986
1987   o = XSTRING (val)->data;
1988   *o++ = '\\';
1989   *o++ = 'b';
1990   prev_c = 0;
1991
1992   for (i = 0, i_byte = 0; i < len; )
1993     {
1994       int c;
1995       int i_byte_orig = i_byte;
1996
1997       if (STRING_MULTIBYTE (string))
1998         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1999       else
2000         {
2001           c = XSTRING (string)->data[i++];
2002           i_byte++;
2003         }
2004
2005       if (SYNTAX (c) == Sword)
2006         {
2007           bcopy (&XSTRING (string)->data[i_byte_orig], o,
2008                  i_byte - i_byte_orig);
2009           o += i_byte - i_byte_orig;
2010         }
2011       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2012         {
2013           *o++ = '\\';
2014           *o++ = 'W';
2015           *o++ = '\\';
2016           *o++ = 'W';
2017           *o++ = '*';
2018         }
2019
2020       prev_c = c;
2021     }
2022
2023   *o++ = '\\';
2024   *o++ = 'b';
2025
2026   return val;
2027 }
2028 \f
2029 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2030   "MSearch backward: ",
2031   "Search backward from point for STRING.\n\
2032 Set point to the beginning of the occurrence found, and return point.\n\
2033 An optional second argument bounds the search; it is a buffer position.\n\
2034 The match found must not extend before that position.\n\
2035 Optional third argument, if t, means if fail just return nil (no error).\n\
2036  If not nil and not t, position at limit of search and return nil.\n\
2037 Optional fourth argument is repeat count--search for successive occurrences.\n\
2038 See also the functions `match-beginning', `match-end' and `replace-match'.")
2039   (string, bound, noerror, count)
2040      Lisp_Object string, bound, noerror, count;
2041 {
2042   return search_command (string, bound, noerror, count, -1, 0, 0);
2043 }
2044
2045 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2046   "Search forward from point for STRING.\n\
2047 Set point to the end of the occurrence found, and return point.\n\
2048 An optional second argument bounds the search; it is a buffer position.\n\
2049 The match found must not extend after that position.  nil is equivalent\n\
2050   to (point-max).\n\
2051 Optional third argument, if t, means if fail just return nil (no error).\n\
2052   If not nil and not t, move to limit of search and return nil.\n\
2053 Optional fourth argument is repeat count--search for successive occurrences.\n\
2054 See also the functions `match-beginning', `match-end' and `replace-match'.")
2055   (string, bound, noerror, count)
2056      Lisp_Object string, bound, noerror, count;
2057 {
2058   return search_command (string, bound, noerror, count, 1, 0, 0);
2059 }
2060
2061 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2062   "sWord search backward: ",
2063   "Search backward from point for STRING, ignoring differences in punctuation.\n\
2064 Set point to the beginning of the occurrence found, and return point.\n\
2065 An optional second argument bounds the search; it is a buffer position.\n\
2066 The match found must not extend before that position.\n\
2067 Optional third argument, if t, means if fail just return nil (no error).\n\
2068   If not nil and not t, move to limit of search and return nil.\n\
2069 Optional fourth argument is repeat count--search for successive occurrences.")
2070   (string, bound, noerror, count)
2071      Lisp_Object string, bound, noerror, count;
2072 {
2073   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2074 }
2075
2076 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2077   "sWord search: ",
2078   "Search forward from point for STRING, ignoring differences in punctuation.\n\
2079 Set point to the end of the occurrence found, and return point.\n\
2080 An optional second argument bounds the search; it is a buffer position.\n\
2081 The match found must not extend after that position.\n\
2082 Optional third argument, if t, means if fail just return nil (no error).\n\
2083   If not nil and not t, move to limit of search and return nil.\n\
2084 Optional fourth argument is repeat count--search for successive occurrences.")
2085   (string, bound, noerror, count)
2086      Lisp_Object string, bound, noerror, count;
2087 {
2088   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2089 }
2090
2091 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2092   "sRE search backward: ",
2093   "Search backward from point for match for regular expression REGEXP.\n\
2094 Set point to the beginning of the match, and return point.\n\
2095 The match found is the one starting last in the buffer\n\
2096 and yet ending before the origin of the search.\n\
2097 An optional second argument bounds the search; it is a buffer position.\n\
2098 The match found must start at or after that position.\n\
2099 Optional third argument, if t, means if fail just return nil (no error).\n\
2100   If not nil and not t, move to limit of search and return nil.\n\
2101 Optional fourth argument is repeat count--search for successive occurrences.\n\
2102 See also the functions `match-beginning', `match-end' and `replace-match'.")
2103   (regexp, bound, noerror, count)
2104      Lisp_Object regexp, bound, noerror, count;
2105 {
2106   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2107 }
2108
2109 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2110   "sRE search: ",
2111   "Search forward from point for regular expression REGEXP.\n\
2112 Set point to the end of the occurrence found, and return point.\n\
2113 An optional second argument bounds the search; it is a buffer position.\n\
2114 The match found must not extend after that position.\n\
2115 Optional third argument, if t, means if fail just return nil (no error).\n\
2116   If not nil and not t, move to limit of search and return nil.\n\
2117 Optional fourth argument is repeat count--search for successive occurrences.\n\
2118 See also the functions `match-beginning', `match-end' and `replace-match'.")
2119   (regexp, bound, noerror, count)
2120      Lisp_Object regexp, bound, noerror, count;
2121 {
2122   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2123 }
2124
2125 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2126   "sPosix search backward: ",
2127   "Search backward from point for match for regular expression REGEXP.\n\
2128 Find the longest match in accord with Posix regular expression rules.\n\
2129 Set point to the beginning of the match, and return point.\n\
2130 The match found is the one starting last in the buffer\n\
2131 and yet ending before the origin of the search.\n\
2132 An optional second argument bounds the search; it is a buffer position.\n\
2133 The match found must start at or after that position.\n\
2134 Optional third argument, if t, means if fail just return nil (no error).\n\
2135   If not nil and not t, move to limit of search and return nil.\n\
2136 Optional fourth argument is repeat count--search for successive occurrences.\n\
2137 See also the functions `match-beginning', `match-end' and `replace-match'.")
2138   (regexp, bound, noerror, count)
2139      Lisp_Object regexp, bound, noerror, count;
2140 {
2141   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2142 }
2143
2144 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2145   "sPosix search: ",
2146   "Search forward from point for regular expression REGEXP.\n\
2147 Find the longest match in accord with Posix regular expression rules.\n\
2148 Set point to the end of the occurrence found, and return point.\n\
2149 An optional second argument bounds the search; it is a buffer position.\n\
2150 The match found must not extend after that position.\n\
2151 Optional third argument, if t, means if fail just return nil (no error).\n\
2152   If not nil and not t, move to limit of search and return nil.\n\
2153 Optional fourth argument is repeat count--search for successive occurrences.\n\
2154 See also the functions `match-beginning', `match-end' and `replace-match'.")
2155   (regexp, bound, noerror, count)
2156      Lisp_Object regexp, bound, noerror, count;
2157 {
2158   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2159 }
2160 \f
2161 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2162   "Replace text matched by last search with NEWTEXT.\n\
2163 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2164 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2165 based on the replaced text.\n\
2166 If the replaced text has only capital letters\n\
2167 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2168 If the replaced text has at least one word starting with a capital letter,\n\
2169 then capitalize each word in NEWTEXT.\n\n\
2170 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2171 Otherwise treat `\\' as special:\n\
2172   `\\&' in NEWTEXT means substitute original matched text.\n\
2173   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2174        If Nth parens didn't match, substitute nothing.\n\
2175   `\\\\' means insert one `\\'.\n\
2176 FIXEDCASE and LITERAL are optional arguments.\n\
2177 Leaves point at end of replacement text.\n\
2178 \n\
2179 The optional fourth argument STRING can be a string to modify.\n\
2180 In that case, this function creates and returns a new string\n\
2181 which is made by replacing the part of STRING that was matched.\n\
2182 \n\
2183 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2184 It says to replace just that subexpression instead of the whole match.\n\
2185 This is useful only after a regular expression search or match\n\
2186 since only regular expressions have distinguished subexpressions.")
2187   (newtext, fixedcase, literal, string, subexp)
2188      Lisp_Object newtext, fixedcase, literal, string, subexp;
2189 {
2190   enum { nochange, all_caps, cap_initial } case_action;
2191   register int pos, pos_byte;
2192   int some_multiletter_word;
2193   int some_lowercase;
2194   int some_uppercase;
2195   int some_nonuppercase_initial;
2196   register int c, prevc;
2197   int inslen;
2198   int sub;
2199   int opoint, newpoint;
2200
2201   CHECK_STRING (newtext, 0);
2202
2203   if (! NILP (string))
2204     CHECK_STRING (string, 4);
2205
2206   case_action = nochange;       /* We tried an initialization */
2207                                 /* but some C compilers blew it */
2208
2209   if (search_regs.num_regs <= 0)
2210     error ("replace-match called before any match found");
2211
2212   if (NILP (subexp))
2213     sub = 0;
2214   else
2215     {
2216       CHECK_NUMBER (subexp, 3);
2217       sub = XINT (subexp);
2218       if (sub < 0 || sub >= search_regs.num_regs)
2219         args_out_of_range (subexp, make_number (search_regs.num_regs));
2220     }
2221
2222   if (NILP (string))
2223     {
2224       if (search_regs.start[sub] < BEGV
2225           || search_regs.start[sub] > search_regs.end[sub]
2226           || search_regs.end[sub] > ZV)
2227         args_out_of_range (make_number (search_regs.start[sub]),
2228                            make_number (search_regs.end[sub]));
2229     }
2230   else
2231     {
2232       if (search_regs.start[sub] < 0
2233           || search_regs.start[sub] > search_regs.end[sub]
2234           || search_regs.end[sub] > XSTRING (string)->size)
2235         args_out_of_range (make_number (search_regs.start[sub]),
2236                            make_number (search_regs.end[sub]));
2237     }
2238
2239   if (NILP (fixedcase))
2240     {
2241       /* Decide how to casify by examining the matched text. */
2242       int last;
2243
2244       pos = search_regs.start[sub];
2245       last = search_regs.end[sub];
2246
2247       if (NILP (string))
2248         pos_byte = CHAR_TO_BYTE (pos);
2249       else
2250         pos_byte = string_char_to_byte (string, pos);
2251
2252       prevc = '\n';
2253       case_action = all_caps;
2254
2255       /* some_multiletter_word is set nonzero if any original word
2256          is more than one letter long. */
2257       some_multiletter_word = 0;
2258       some_lowercase = 0;
2259       some_nonuppercase_initial = 0;
2260       some_uppercase = 0;
2261
2262       while (pos < last)
2263         {
2264           if (NILP (string))
2265             {
2266               c = FETCH_CHAR (pos_byte);
2267               INC_BOTH (pos, pos_byte);
2268             }
2269           else
2270             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2271
2272           if (LOWERCASEP (c))
2273             {
2274               /* Cannot be all caps if any original char is lower case */
2275
2276               some_lowercase = 1;
2277               if (SYNTAX (prevc) != Sword)
2278                 some_nonuppercase_initial = 1;
2279               else
2280                 some_multiletter_word = 1;
2281             }
2282           else if (!NOCASEP (c))
2283             {
2284               some_uppercase = 1;
2285               if (SYNTAX (prevc) != Sword)
2286                 ;
2287               else
2288                 some_multiletter_word = 1;
2289             }
2290           else
2291             {
2292               /* If the initial is a caseless word constituent,
2293                  treat that like a lowercase initial.  */
2294               if (SYNTAX (prevc) != Sword)
2295                 some_nonuppercase_initial = 1;
2296             }
2297
2298           prevc = c;
2299         }
2300
2301       /* Convert to all caps if the old text is all caps
2302          and has at least one multiletter word.  */
2303       if (! some_lowercase && some_multiletter_word)
2304         case_action = all_caps;
2305       /* Capitalize each word, if the old text has all capitalized words.  */
2306       else if (!some_nonuppercase_initial && some_multiletter_word)
2307         case_action = cap_initial;
2308       else if (!some_nonuppercase_initial && some_uppercase)
2309         /* Should x -> yz, operating on X, give Yz or YZ?
2310            We'll assume the latter.  */
2311         case_action = all_caps;
2312       else
2313         case_action = nochange;
2314     }
2315
2316   /* Do replacement in a string.  */
2317   if (!NILP (string))
2318     {
2319       Lisp_Object before, after;
2320
2321       before = Fsubstring (string, make_number (0),
2322                            make_number (search_regs.start[sub]));
2323       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2324
2325       /* Substitute parts of the match into NEWTEXT
2326          if desired.  */
2327       if (NILP (literal))
2328         {
2329           int lastpos = 0;
2330           int lastpos_byte = 0;
2331           /* We build up the substituted string in ACCUM.  */
2332           Lisp_Object accum;
2333           Lisp_Object middle;
2334           int length = STRING_BYTES (XSTRING (newtext));
2335
2336           accum = Qnil;
2337
2338           for (pos_byte = 0, pos = 0; pos_byte < length;)
2339             {
2340               int substart = -1;
2341               int subend;
2342               int delbackslash = 0;
2343
2344               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2345
2346               if (c == '\\')
2347                 {
2348                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2349                   if (c == '&')
2350                     {
2351                       substart = search_regs.start[sub];
2352                       subend = search_regs.end[sub];
2353                     }
2354                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2355                     {
2356                       if (search_regs.start[c - '0'] >= 0)
2357                         {
2358                           substart = search_regs.start[c - '0'];
2359                           subend = search_regs.end[c - '0'];
2360                         }
2361                     }
2362                   else if (c == '\\')
2363                     delbackslash = 1;
2364                   else
2365                     error ("Invalid use of `\\' in replacement text");
2366                 }
2367               if (substart >= 0)
2368                 {
2369                   if (pos - 2 != lastpos)
2370                     middle = substring_both (newtext, lastpos,
2371                                              lastpos_byte,
2372                                              pos - 2, pos_byte - 2);
2373                   else
2374                     middle = Qnil;
2375                   accum = concat3 (accum, middle,
2376                                    Fsubstring (string,
2377                                                make_number (substart),
2378                                                make_number (subend)));
2379                   lastpos = pos;
2380                   lastpos_byte = pos_byte;
2381                 }
2382               else if (delbackslash)
2383                 {
2384                   middle = substring_both (newtext, lastpos,
2385                                            lastpos_byte,
2386                                            pos - 1, pos_byte - 1);
2387
2388                   accum = concat2 (accum, middle);
2389                   lastpos = pos;
2390                   lastpos_byte = pos_byte;
2391                 }
2392             }
2393
2394           if (pos != lastpos)
2395             middle = substring_both (newtext, lastpos,
2396                                      lastpos_byte,
2397                                      pos, pos_byte);
2398           else
2399             middle = Qnil;
2400
2401           newtext = concat2 (accum, middle);
2402         }
2403
2404       /* Do case substitution in NEWTEXT if desired.  */
2405       if (case_action == all_caps)
2406         newtext = Fupcase (newtext);
2407       else if (case_action == cap_initial)
2408         newtext = Fupcase_initials (newtext);
2409
2410       return concat3 (before, newtext, after);
2411     }
2412
2413   /* Record point, the move (quietly) to the start of the match.  */
2414   if (PT >= search_regs.end[sub])
2415     opoint = PT - ZV;
2416   else if (PT > search_regs.start[sub])
2417     opoint = search_regs.end[sub] - ZV;
2418   else
2419     opoint = PT;
2420
2421   TEMP_SET_PT (search_regs.start[sub]);
2422
2423   /* We insert the replacement text before the old text, and then
2424      delete the original text.  This means that markers at the
2425      beginning or end of the original will float to the corresponding
2426      position in the replacement.  */
2427   if (!NILP (literal))
2428     Finsert_and_inherit (1, &newtext);
2429   else
2430     {
2431       struct gcpro gcpro1;
2432       int length = STRING_BYTES (XSTRING (newtext));
2433
2434       GCPRO1 (newtext);
2435
2436       for (pos_byte = 0, pos = 0; pos_byte < length;)
2437         {
2438           int offset = PT - search_regs.start[sub];
2439
2440           FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2441
2442           if (c == '\\')
2443             {
2444               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2445               if (c == '&')
2446                 Finsert_buffer_substring
2447                   (Fcurrent_buffer (),
2448                    make_number (search_regs.start[sub] + offset),
2449                    make_number (search_regs.end[sub] + offset));
2450               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2451                 {
2452                   if (search_regs.start[c - '0'] >= 1)
2453                     Finsert_buffer_substring
2454                       (Fcurrent_buffer (),
2455                        make_number (search_regs.start[c - '0'] + offset),
2456                        make_number (search_regs.end[c - '0'] + offset));
2457                 }
2458               else if (c == '\\')
2459                 insert_char (c);
2460               else
2461                 error ("Invalid use of `\\' in replacement text");
2462             }
2463           else
2464             insert_char (c);
2465         }
2466       UNGCPRO;
2467     }
2468
2469   inslen = PT - (search_regs.start[sub]);
2470   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2471
2472   if (case_action == all_caps)
2473     Fupcase_region (make_number (PT - inslen), make_number (PT));
2474   else if (case_action == cap_initial)
2475     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2476
2477   newpoint = PT;
2478
2479   /* Put point back where it was in the text.  */
2480   if (opoint <= 0)
2481     TEMP_SET_PT (opoint + ZV);
2482   else
2483     TEMP_SET_PT (opoint);
2484
2485   /* Now move point "officially" to the start of the inserted replacement.  */
2486   move_if_not_intangible (newpoint);
2487
2488   return Qnil;
2489 }
2490 \f
2491 static Lisp_Object
2492 match_limit (num, beginningp)
2493      Lisp_Object num;
2494      int beginningp;
2495 {
2496   register int n;
2497
2498   CHECK_NUMBER (num, 0);
2499   n = XINT (num);
2500   if (n < 0 || n >= search_regs.num_regs)
2501     args_out_of_range (num, make_number (search_regs.num_regs));
2502   if (search_regs.num_regs <= 0
2503       || search_regs.start[n] < 0)
2504     return Qnil;
2505   return (make_number ((beginningp) ? search_regs.start[n]
2506                                     : search_regs.end[n]));
2507 }
2508
2509 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2510   "Return position of start of text matched by last search.\n\
2511 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2512   regexp.\n\
2513 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2514   SUBEXP pairs.\n\
2515 Zero means the entire text matched by the whole regexp or whole string.")
2516   (subexp)
2517      Lisp_Object subexp;
2518 {
2519   return match_limit (subexp, 1);
2520 }
2521
2522 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2523   "Return position of end of text matched by last search.\n\
2524 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2525   regexp.\n\
2526 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2527   SUBEXP pairs.\n\
2528 Zero means the entire text matched by the whole regexp or whole string.")
2529   (subexp)
2530      Lisp_Object subexp;
2531 {
2532   return match_limit (subexp, 0);
2533 }
2534
2535 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2536   "Return a list containing all info on what the last search matched.\n\
2537 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2538 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2539 if the last match was on a buffer; integers or nil if a string was matched.\n\
2540 Use `store-match-data' to reinstate the data in this list.\n\
2541 \n\
2542 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2543 \(rather than markers) to represent buffer positions.\n\
2544 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2545 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2546   (integers, reuse)
2547      Lisp_Object integers, reuse;
2548 {
2549   Lisp_Object tail, prev;
2550   Lisp_Object *data;
2551   int i, len;
2552
2553   if (NILP (last_thing_searched))
2554     return Qnil;
2555
2556   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2557                                  * sizeof (Lisp_Object));
2558
2559   len = -1;
2560   for (i = 0; i < search_regs.num_regs; i++)
2561     {
2562       int start = search_regs.start[i];
2563       if (start >= 0)
2564         {
2565           if (EQ (last_thing_searched, Qt)
2566               || ! NILP (integers))
2567             {
2568               XSETFASTINT (data[2 * i], start);
2569               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2570             }
2571           else if (BUFFERP (last_thing_searched))
2572             {
2573               data[2 * i] = Fmake_marker ();
2574               Fset_marker (data[2 * i],
2575                            make_number (start),
2576                            last_thing_searched);
2577               data[2 * i + 1] = Fmake_marker ();
2578               Fset_marker (data[2 * i + 1],
2579                            make_number (search_regs.end[i]),
2580                            last_thing_searched);
2581             }
2582           else
2583             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2584             abort ();
2585
2586           len = i;
2587         }
2588       else
2589         data[2 * i] = data [2 * i + 1] = Qnil;
2590     }
2591
2592   /* If REUSE is not usable, cons up the values and return them.  */
2593   if (! CONSP (reuse))
2594     return Flist (2 * len + 2, data);
2595
2596   /* If REUSE is a list, store as many value elements as will fit
2597      into the elements of REUSE.  */
2598   for (i = 0, tail = reuse; CONSP (tail);
2599        i++, tail = XCDR (tail))
2600     {
2601       if (i < 2 * len + 2)
2602         XCAR (tail) = data[i];
2603       else
2604         XCAR (tail) = Qnil;
2605       prev = tail;
2606     }
2607
2608   /* If we couldn't fit all value elements into REUSE,
2609      cons up the rest of them and add them to the end of REUSE.  */
2610   if (i < 2 * len + 2)
2611     XCDR (prev) = Flist (2 * len + 2 - i, data + i);
2612
2613   return reuse;
2614 }
2615
2616
2617 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2618   "Set internal data on last search match from elements of LIST.\n\
2619 LIST should have been created by calling `match-data' previously.")
2620   (list)
2621      register Lisp_Object list;
2622 {
2623   register int i;
2624   register Lisp_Object marker;
2625
2626   if (running_asynch_code)
2627     save_search_regs ();
2628
2629   if (!CONSP (list) && !NILP (list))
2630     list = wrong_type_argument (Qconsp, list);
2631
2632   /* Unless we find a marker with a buffer in LIST, assume that this
2633      match data came from a string.  */
2634   last_thing_searched = Qt;
2635
2636   /* Allocate registers if they don't already exist.  */
2637   {
2638     int length = XFASTINT (Flength (list)) / 2;
2639
2640     if (length > search_regs.num_regs)
2641       {
2642         if (search_regs.num_regs == 0)
2643           {
2644             search_regs.start
2645               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2646             search_regs.end
2647               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2648           }
2649         else
2650           {
2651             search_regs.start
2652               = (regoff_t *) xrealloc (search_regs.start,
2653                                        length * sizeof (regoff_t));
2654             search_regs.end
2655               = (regoff_t *) xrealloc (search_regs.end,
2656                                        length * sizeof (regoff_t));
2657           }
2658
2659         search_regs.num_regs = length;
2660       }
2661   }
2662
2663   for (i = 0; i < search_regs.num_regs; i++)
2664     {
2665       marker = Fcar (list);
2666       if (NILP (marker))
2667         {
2668           search_regs.start[i] = -1;
2669           list = Fcdr (list);
2670         }
2671       else
2672         {
2673           if (MARKERP (marker))
2674             {
2675               if (XMARKER (marker)->buffer == 0)
2676                 XSETFASTINT (marker, 0);
2677               else
2678                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2679             }
2680
2681           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2682           search_regs.start[i] = XINT (marker);
2683           list = Fcdr (list);
2684
2685           marker = Fcar (list);
2686           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2687             XSETFASTINT (marker, 0);
2688
2689           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2690           search_regs.end[i] = XINT (marker);
2691         }
2692       list = Fcdr (list);
2693     }
2694
2695   return Qnil;
2696 }
2697
2698 /* If non-zero the match data have been saved in saved_search_regs
2699    during the execution of a sentinel or filter. */
2700 static int search_regs_saved;
2701 static struct re_registers saved_search_regs;
2702
2703 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2704    if asynchronous code (filter or sentinel) is running. */
2705 static void
2706 save_search_regs ()
2707 {
2708   if (!search_regs_saved)
2709     {
2710       saved_search_regs.num_regs = search_regs.num_regs;
2711       saved_search_regs.start = search_regs.start;
2712       saved_search_regs.end = search_regs.end;
2713       search_regs.num_regs = 0;
2714       search_regs.start = 0;
2715       search_regs.end = 0;
2716
2717       search_regs_saved = 1;
2718     }
2719 }
2720
2721 /* Called upon exit from filters and sentinels. */
2722 void
2723 restore_match_data ()
2724 {
2725   if (search_regs_saved)
2726     {
2727       if (search_regs.num_regs > 0)
2728         {
2729           xfree (search_regs.start);
2730           xfree (search_regs.end);
2731         }
2732       search_regs.num_regs = saved_search_regs.num_regs;
2733       search_regs.start = saved_search_regs.start;
2734       search_regs.end = saved_search_regs.end;
2735
2736       search_regs_saved = 0;
2737     }
2738 }
2739
2740 /* Quote a string to inactivate reg-expr chars */
2741
2742 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2743   "Return a regexp string which matches exactly STRING and nothing else.")
2744   (string)
2745      Lisp_Object string;
2746 {
2747   register unsigned char *in, *out, *end;
2748   register unsigned char *temp;
2749   int backslashes_added = 0;
2750
2751   CHECK_STRING (string, 0);
2752
2753   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2754
2755   /* Now copy the data into the new string, inserting escapes. */
2756
2757   in = XSTRING (string)->data;
2758   end = in + STRING_BYTES (XSTRING (string));
2759   out = temp;
2760
2761   for (; in != end; in++)
2762     {
2763       if (*in == '[' || *in == ']'
2764           || *in == '*' || *in == '.' || *in == '\\'
2765           || *in == '?' || *in == '+'
2766           || *in == '^' || *in == '$')
2767         *out++ = '\\', backslashes_added++;
2768       *out++ = *in;
2769     }
2770
2771   return make_specified_string (temp,
2772                                 XSTRING (string)->size + backslashes_added,
2773                                 out - temp,
2774                                 STRING_MULTIBYTE (string));
2775 }
2776 \f
2777 void
2778 syms_of_search ()
2779 {
2780   register int i;
2781
2782   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2783     {
2784       searchbufs[i].buf.allocated = 100;
2785       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2786       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2787       searchbufs[i].regexp = Qnil;
2788       staticpro (&searchbufs[i].regexp);
2789       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2790     }
2791   searchbuf_head = &searchbufs[0];
2792
2793   Qsearch_failed = intern ("search-failed");
2794   staticpro (&Qsearch_failed);
2795   Qinvalid_regexp = intern ("invalid-regexp");
2796   staticpro (&Qinvalid_regexp);
2797
2798   Fput (Qsearch_failed, Qerror_conditions,
2799         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2800   Fput (Qsearch_failed, Qerror_message,
2801         build_string ("Search failed"));
2802
2803   Fput (Qinvalid_regexp, Qerror_conditions,
2804         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2805   Fput (Qinvalid_regexp, Qerror_message,
2806         build_string ("Invalid regexp"));
2807
2808   last_thing_searched = Qnil;
2809   staticpro (&last_thing_searched);
2810
2811   defsubr (&Slooking_at);
2812   defsubr (&Sposix_looking_at);
2813   defsubr (&Sstring_match);
2814   defsubr (&Sposix_string_match);
2815   defsubr (&Ssearch_forward);
2816   defsubr (&Ssearch_backward);
2817   defsubr (&Sword_search_forward);
2818   defsubr (&Sword_search_backward);
2819   defsubr (&Sre_search_forward);
2820   defsubr (&Sre_search_backward);
2821   defsubr (&Sposix_search_forward);
2822   defsubr (&Sposix_search_backward);
2823   defsubr (&Sreplace_match);
2824   defsubr (&Smatch_beginning);
2825   defsubr (&Smatch_end);
2826   defsubr (&Smatch_data);
2827   defsubr (&Sset_match_data);
2828   defsubr (&Sregexp_quote);
2829 }