src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
   3                  2004, 2005, 2006 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 Lisp_Object Vsearch_spaces_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93 static void matcher_overflow () NO_RETURN;
  94
  95 static void
  96 matcher_overflow ()
  97 {
  98   error ("Stack overflow in regexp matcher");
  99 }
 100
 101 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 102    PATTERN is the pattern to compile.
 103    CP is the place to put the result.
 104    TRANSLATE is a translation table for ignoring case, or nil for none.
 105    REGP is the structure that says where to store the "register"
 106    values that will result from matching this pattern.
 107    If it is 0, we should compile the pattern not to record any
 108    subexpression bounds.
 109    POSIX is nonzero if we want full backtracking (POSIX style)
 110    for this pattern.  0 means backtrack only enough to get a valid match.
 111    MULTIBYTE is nonzero if we want to handle multibyte characters in
 112    PATTERN.  0 means all multibyte characters are recognized just as
 113    sequences of binary data.
 114
 115    The behavior also depends on Vsearch_spaces_regexp.  */
 116
 117 static void
 118 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 119      struct regexp_cache *cp;
 120      Lisp_Object pattern;
 121      Lisp_Object translate;
 122      struct re_registers *regp;
 123      int posix;
 124      int multibyte;
 125 {
 126   unsigned char *raw_pattern;
 127   int raw_pattern_size;
 128   char *val;
 129   reg_syntax_t old;
 130
 131   /* MULTIBYTE says whether the text to be searched is multibyte.
 132      We must convert PATTERN to match that, or we will not really
 133      find things right.  */
 134
 135   if (multibyte == STRING_MULTIBYTE (pattern))
 136     {
 137       raw_pattern = (unsigned char *) SDATA (pattern);
 138       raw_pattern_size = SBYTES (pattern);
 139     }
 140   else if (multibyte)
 141     {
 142       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 143                                                   SCHARS (pattern));
 144       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 145       copy_text (SDATA (pattern), raw_pattern,
 146                  SCHARS (pattern), 0, 1);
 147     }
 148   else
 149     {
 150       /* Converting multibyte to single-byte.
 151
 152          ??? Perhaps this conversion should be done in a special way
 153          by subtracting nonascii-insert-offset from each non-ASCII char,
 154          so that only the multibyte chars which really correspond to
 155          the chosen single-byte character set can possibly match.  */
 156       raw_pattern_size = SCHARS (pattern);
 157       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 158       copy_text (SDATA (pattern), raw_pattern,
 159                  SBYTES (pattern), 1, 0);
 160     }
 161
 162   cp->regexp = Qnil;
 163   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 164   cp->posix = posix;
 165   cp->buf.multibyte = multibyte;
 166   cp->whitespace_regexp = Vsearch_spaces_regexp;
 167   BLOCK_INPUT;
 168   old = re_set_syntax (RE_SYNTAX_EMACS
 169                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 170
 171   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 172                             : SDATA (Vsearch_spaces_regexp));
 173
 174   val = (char *) re_compile_pattern ((char *)raw_pattern,
 175                                      raw_pattern_size, &cp->buf);
 176
 177   re_set_whitespace_regexp (NULL);
 178
 179   re_set_syntax (old);
 180   UNBLOCK_INPUT;
 181   if (val)
 182     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 183
 184   cp->regexp = Fcopy_sequence (pattern);
 185 }
 186
 187 /* Shrink each compiled regexp buffer in the cache
 188    to the size actually used right now.
 189    This is called from garbage collection.  */
 190
 191 void
 192 shrink_regexp_cache ()
 193 {
 194   struct regexp_cache *cp;
 195
 196   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 197     {
 198       cp->buf.allocated = cp->buf.used;
 199       cp->buf.buffer
 200         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 201     }
 202 }
 203
 204 /* Compile a regexp if necessary, but first check to see if there's one in
 205    the cache.
 206    PATTERN is the pattern to compile.
 207    TRANSLATE is a translation table for ignoring case, or nil for none.
 208    REGP is the structure that says where to store the "register"
 209    values that will result from matching this pattern.
 210    If it is 0, we should compile the pattern not to record any
 211    subexpression bounds.
 212    POSIX is nonzero if we want full backtracking (POSIX style)
 213    for this pattern.  0 means backtrack only enough to get a valid match.  */
 214
 215 struct re_pattern_buffer *
 216 compile_pattern (pattern, regp, translate, posix, multibyte)
 217      Lisp_Object pattern;
 218      struct re_registers *regp;
 219      Lisp_Object translate;
 220      int posix, multibyte;
 221 {
 222   struct regexp_cache *cp, **cpp;
 223
 224   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 225     {
 226       cp = *cpp;
 227       /* Entries are initialized to nil, and may be set to nil by
 228          compile_pattern_1 if the pattern isn't valid.  Don't apply
 229          string accessors in those cases.  However, compile_pattern_1
 230          is only applied to the cache entry we pick here to reuse.  So
 231          nil should never appear before a non-nil entry.  */
 232       if (NILP (cp->regexp))
 233         goto compile_it;
 234       if (SCHARS (cp->regexp) == SCHARS (pattern)
 235           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 236           && !NILP (Fstring_equal (cp->regexp, pattern))
 237           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 238           && cp->posix == posix
 239           && cp->buf.multibyte == multibyte
 240           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 241         break;
 242
 243       /* If we're at the end of the cache, compile into the nil cell
 244          we found, or the last (least recently used) cell with a
 245          string value.  */
 246       if (cp->next == 0)
 247         {
 248         compile_it:
 249           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 250           break;
 251         }
 252     }
 253
 254   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 255      either because we found it in the cache or because we just compiled it.
 256      Move it to the front of the queue to mark it as most recently used.  */
 257   *cpp = cp->next;
 258   cp->next = searchbuf_head;
 259   searchbuf_head = cp;
 260
 261   /* Advise the searching functions about the space we have allocated
 262      for register data.  */
 263   if (regp)
 264     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 265
 266   return &cp->buf;
 267 }
 268
 269 /* Error condition used for failing searches */
 270 Lisp_Object Qsearch_failed;
 271
 272 Lisp_Object
 273 signal_failure (arg)
 274      Lisp_Object arg;
 275 {
 276   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 277   return Qnil;
 278 }
 279 \f
 280 static Lisp_Object
 281 looking_at_1 (string, posix)
 282      Lisp_Object string;
 283      int posix;
 284 {
 285   Lisp_Object val;
 286   unsigned char *p1, *p2;
 287   int s1, s2;
 288   register int i;
 289   struct re_pattern_buffer *bufp;
 290
 291   if (running_asynch_code)
 292     save_search_regs ();
 293
 294   CHECK_STRING (string);
 295   bufp = compile_pattern (string, &search_regs,
 296                           (!NILP (current_buffer->case_fold_search)
 297                            ? current_buffer->case_canon_table : Qnil),
 298                           posix,
 299                           !NILP (current_buffer->enable_multibyte_characters));
 300
 301   immediate_quit = 1;
 302   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 303
 304   /* Get pointers and sizes of the two strings
 305      that make up the visible portion of the buffer. */
 306
 307   p1 = BEGV_ADDR;
 308   s1 = GPT_BYTE - BEGV_BYTE;
 309   p2 = GAP_END_ADDR;
 310   s2 = ZV_BYTE - GPT_BYTE;
 311   if (s1 < 0)
 312     {
 313       p2 = p1;
 314       s2 = ZV_BYTE - BEGV_BYTE;
 315       s1 = 0;
 316     }
 317   if (s2 < 0)
 318     {
 319       s1 = ZV_BYTE - BEGV_BYTE;
 320       s2 = 0;
 321     }
 322
 323   re_match_object = Qnil;
 324
 325   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 326                   PT_BYTE - BEGV_BYTE, &search_regs,
 327                   ZV_BYTE - BEGV_BYTE);
 328   immediate_quit = 0;
 329
 330   if (i == -2)
 331     matcher_overflow ();
 332
 333   val = (0 <= i ? Qt : Qnil);
 334   if (i >= 0)
 335     for (i = 0; i < search_regs.num_regs; i++)
 336       if (search_regs.start[i] >= 0)
 337         {
 338           search_regs.start[i]
 339             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 340           search_regs.end[i]
 341             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 342         }
 343   XSETBUFFER (last_thing_searched, current_buffer);
 344   return val;
 345 }
 346
 347 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 348        doc: /* Return t if text after point matches regular expression REGEXP.
 349 This function modifies the match data that `match-beginning',
 350 `match-end' and `match-data' access; save and restore the match
 351 data if you want to preserve them.  */)
 352      (regexp)
 353      Lisp_Object regexp;
 354 {
 355   return looking_at_1 (regexp, 0);
 356 }
 357
 358 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 359        doc: /* Return t if text after point matches regular expression REGEXP.
 360 Find the longest match, in accord with Posix regular expression rules.
 361 This function modifies the match data that `match-beginning',
 362 `match-end' and `match-data' access; save and restore the match
 363 data if you want to preserve them.  */)
 364      (regexp)
 365      Lisp_Object regexp;
 366 {
 367   return looking_at_1 (regexp, 1);
 368 }
 369 \f
 370 static Lisp_Object
 371 string_match_1 (regexp, string, start, posix)
 372      Lisp_Object regexp, string, start;
 373      int posix;
 374 {
 375   int val;
 376   struct re_pattern_buffer *bufp;
 377   int pos, pos_byte;
 378   int i;
 379
 380   if (running_asynch_code)
 381     save_search_regs ();
 382
 383   CHECK_STRING (regexp);
 384   CHECK_STRING (string);
 385
 386   if (NILP (start))
 387     pos = 0, pos_byte = 0;
 388   else
 389     {
 390       int len = SCHARS (string);
 391
 392       CHECK_NUMBER (start);
 393       pos = XINT (start);
 394       if (pos < 0 && -pos <= len)
 395         pos = len + pos;
 396       else if (0 > pos || pos > len)
 397         args_out_of_range (string, start);
 398       pos_byte = string_char_to_byte (string, pos);
 399     }
 400
 401   bufp = compile_pattern (regexp, &search_regs,
 402                           (!NILP (current_buffer->case_fold_search)
 403                            ? current_buffer->case_canon_table : Qnil),
 404                           posix,
 405                           STRING_MULTIBYTE (string));
 406   immediate_quit = 1;
 407   re_match_object = string;
 408
 409   val = re_search (bufp, (char *) SDATA (string),
 410                    SBYTES (string), pos_byte,
 411                    SBYTES (string) - pos_byte,
 412                    &search_regs);
 413   immediate_quit = 0;
 414   last_thing_searched = Qt;
 415   if (val == -2)
 416     matcher_overflow ();
 417   if (val < 0) return Qnil;
 418
 419   for (i = 0; i < search_regs.num_regs; i++)
 420     if (search_regs.start[i] >= 0)
 421       {
 422         search_regs.start[i]
 423           = string_byte_to_char (string, search_regs.start[i]);
 424         search_regs.end[i]
 425           = string_byte_to_char (string, search_regs.end[i]);
 426       }
 427
 428   return make_number (string_byte_to_char (string, val));
 429 }
 430
 431 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 432        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 433 Matching ignores case if `case-fold-search' is non-nil.
 434 If third arg START is non-nil, start search at that index in STRING.
 435 For index of first char beyond the match, do (match-end 0).
 436 `match-end' and `match-beginning' also give indices of substrings
 437 matched by parenthesis constructs in the pattern.
 438
 439 You can use the function `match-string' to extract the substrings
 440 matched by the parenthesis constructions in REGEXP. */)
 441      (regexp, string, start)
 442      Lisp_Object regexp, string, start;
 443 {
 444   return string_match_1 (regexp, string, start, 0);
 445 }
 446
 447 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 448        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 449 Find the longest match, in accord with Posix regular expression rules.
 450 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 451 If third arg START is non-nil, start search at that index in STRING.
 452 For index of first char beyond the match, do (match-end 0).
 453 `match-end' and `match-beginning' also give indices of substrings
 454 matched by parenthesis constructs in the pattern.  */)
 455      (regexp, string, start)
 456      Lisp_Object regexp, string, start;
 457 {
 458   return string_match_1 (regexp, string, start, 1);
 459 }
 460
 461 /* Match REGEXP against STRING, searching all of STRING,
 462    and return the index of the match, or negative on failure.
 463    This does not clobber the match data.  */
 464
 465 int
 466 fast_string_match (regexp, string)
 467      Lisp_Object regexp, string;
 468 {
 469   int val;
 470   struct re_pattern_buffer *bufp;
 471
 472   bufp = compile_pattern (regexp, 0, Qnil,
 473                           0, STRING_MULTIBYTE (string));
 474   immediate_quit = 1;
 475   re_match_object = string;
 476
 477   val = re_search (bufp, (char *) SDATA (string),
 478                    SBYTES (string), 0,
 479                    SBYTES (string), 0);
 480   immediate_quit = 0;
 481   return val;
 482 }
 483
 484 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 485    and return the index of the match, or negative on failure.
 486    This does not clobber the match data.
 487    We assume that STRING contains single-byte characters.  */
 488
 489 extern Lisp_Object Vascii_downcase_table;
 490
 491 int
 492 fast_c_string_match_ignore_case (regexp, string)
 493      Lisp_Object regexp;
 494      const char *string;
 495 {
 496   int val;
 497   struct re_pattern_buffer *bufp;
 498   int len = strlen (string);
 499
 500   regexp = string_make_unibyte (regexp);
 501   re_match_object = Qt;
 502   bufp = compile_pattern (regexp, 0,
 503                           Vascii_canon_table, 0,
 504                           0);
 505   immediate_quit = 1;
 506   val = re_search (bufp, string, len, 0, len, 0);
 507   immediate_quit = 0;
 508   return val;
 509 }
 510
 511 /* Like fast_string_match but ignore case.  */
 512
 513 int
 514 fast_string_match_ignore_case (regexp, string)
 515      Lisp_Object regexp, string;
 516 {
 517   int val;
 518   struct re_pattern_buffer *bufp;
 519
 520   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 521                           0, STRING_MULTIBYTE (string));
 522   immediate_quit = 1;
 523   re_match_object = string;
 524
 525   val = re_search (bufp, (char *) SDATA (string),
 526                    SBYTES (string), 0,
 527                    SBYTES (string), 0);
 528   immediate_quit = 0;
 529   return val;
 530 }
 531 \f
 532 /* The newline cache: remembering which sections of text have no newlines.  */
 533
 534 /* If the user has requested newline caching, make sure it's on.
 535    Otherwise, make sure it's off.
 536    This is our cheezy way of associating an action with the change of
 537    state of a buffer-local variable.  */
 538 static void
 539 newline_cache_on_off (buf)
 540      struct buffer *buf;
 541 {
 542   if (NILP (buf->cache_long_line_scans))
 543     {
 544       /* It should be off.  */
 545       if (buf->newline_cache)
 546         {
 547           free_region_cache (buf->newline_cache);
 548           buf->newline_cache = 0;
 549         }
 550     }
 551   else
 552     {
 553       /* It should be on.  */
 554       if (buf->newline_cache == 0)
 555         buf->newline_cache = new_region_cache ();
 556     }
 557 }
 558
 559 \f
 560 /* Search for COUNT instances of the character TARGET between START and END.
 561
 562    If COUNT is positive, search forwards; END must be >= START.
 563    If COUNT is negative, search backwards for the -COUNTth instance;
 564       END must be <= START.
 565    If COUNT is zero, do anything you please; run rogue, for all I care.
 566
 567    If END is zero, use BEGV or ZV instead, as appropriate for the
 568    direction indicated by COUNT.
 569
 570    If we find COUNT instances, set *SHORTAGE to zero, and return the
 571    position past the COUNTth match.  Note that for reverse motion
 572    this is not the same as the usual convention for Emacs motion commands.
 573
 574    If we don't find COUNT instances before reaching END, set *SHORTAGE
 575    to the number of TARGETs left unfound, and return END.
 576
 577    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 578    except when inside redisplay.  */
 579
 580 int
 581 scan_buffer (target, start, end, count, shortage, allow_quit)
 582      register int target;
 583      int start, end;
 584      int count;
 585      int *shortage;
 586      int allow_quit;
 587 {
 588   struct region_cache *newline_cache;
 589   int direction;
 590
 591   if (count > 0)
 592     {
 593       direction = 1;
 594       if (! end) end = ZV;
 595     }
 596   else
 597     {
 598       direction = -1;
 599       if (! end) end = BEGV;
 600     }
 601
 602   newline_cache_on_off (current_buffer);
 603   newline_cache = current_buffer->newline_cache;
 604
 605   if (shortage != 0)
 606     *shortage = 0;
 607
 608   immediate_quit = allow_quit;
 609
 610   if (count > 0)
 611     while (start != end)
 612       {
 613         /* Our innermost scanning loop is very simple; it doesn't know
 614            about gaps, buffer ends, or the newline cache.  ceiling is
 615            the position of the last character before the next such
 616            obstacle --- the last character the dumb search loop should
 617            examine.  */
 618         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 619         int start_byte = CHAR_TO_BYTE (start);
 620         int tem;
 621
 622         /* If we're looking for a newline, consult the newline cache
 623            to see where we can avoid some scanning.  */
 624         if (target == '\n' && newline_cache)
 625           {
 626             int next_change;
 627             immediate_quit = 0;
 628             while (region_cache_forward
 629                    (current_buffer, newline_cache, start_byte, &next_change))
 630               start_byte = next_change;
 631             immediate_quit = allow_quit;
 632
 633             /* START should never be after END.  */
 634             if (start_byte > ceiling_byte)
 635               start_byte = ceiling_byte;
 636
 637             /* Now the text after start is an unknown region, and
 638                next_change is the position of the next known region. */
 639             ceiling_byte = min (next_change - 1, ceiling_byte);
 640           }
 641
 642         /* The dumb loop can only scan text stored in contiguous
 643            bytes. BUFFER_CEILING_OF returns the last character
 644            position that is contiguous, so the ceiling is the
 645            position after that.  */
 646         tem = BUFFER_CEILING_OF (start_byte);
 647         ceiling_byte = min (tem, ceiling_byte);
 648
 649         {
 650           /* The termination address of the dumb loop.  */
 651           register unsigned char *ceiling_addr
 652             = BYTE_POS_ADDR (ceiling_byte) + 1;
 653           register unsigned char *cursor
 654             = BYTE_POS_ADDR (start_byte);
 655           unsigned char *base = cursor;
 656
 657           while (cursor < ceiling_addr)
 658             {
 659               unsigned char *scan_start = cursor;
 660
 661               /* The dumb loop.  */
 662               while (*cursor != target && ++cursor < ceiling_addr)
 663                 ;
 664
 665               /* If we're looking for newlines, cache the fact that
 666                  the region from start to cursor is free of them. */
 667               if (target == '\n' && newline_cache)
 668                 know_region_cache (current_buffer, newline_cache,
 669                                    start_byte + scan_start - base,
 670                                    start_byte + cursor - base);
 671
 672               /* Did we find the target character?  */
 673               if (cursor < ceiling_addr)
 674                 {
 675                   if (--count == 0)
 676                     {
 677                       immediate_quit = 0;
 678                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 679                     }
 680                   cursor++;
 681                 }
 682             }
 683
 684           start = BYTE_TO_CHAR (start_byte + cursor - base);
 685         }
 686       }
 687   else
 688     while (start > end)
 689       {
 690         /* The last character to check before the next obstacle.  */
 691         int ceiling_byte = CHAR_TO_BYTE (end);
 692         int start_byte = CHAR_TO_BYTE (start);
 693         int tem;
 694
 695         /* Consult the newline cache, if appropriate.  */
 696         if (target == '\n' && newline_cache)
 697           {
 698             int next_change;
 699             immediate_quit = 0;
 700             while (region_cache_backward
 701                    (current_buffer, newline_cache, start_byte, &next_change))
 702               start_byte = next_change;
 703             immediate_quit = allow_quit;
 704
 705             /* Start should never be at or before end.  */
 706             if (start_byte <= ceiling_byte)
 707               start_byte = ceiling_byte + 1;
 708
 709             /* Now the text before start is an unknown region, and
 710                next_change is the position of the next known region. */
 711             ceiling_byte = max (next_change, ceiling_byte);
 712           }
 713
 714         /* Stop scanning before the gap.  */
 715         tem = BUFFER_FLOOR_OF (start_byte - 1);
 716         ceiling_byte = max (tem, ceiling_byte);
 717
 718         {
 719           /* The termination address of the dumb loop.  */
 720           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 721           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 722           unsigned char *base = cursor;
 723
 724           while (cursor >= ceiling_addr)
 725             {
 726               unsigned char *scan_start = cursor;
 727
 728               while (*cursor != target && --cursor >= ceiling_addr)
 729                 ;
 730
 731               /* If we're looking for newlines, cache the fact that
 732                  the region from after the cursor to start is free of them.  */
 733               if (target == '\n' && newline_cache)
 734                 know_region_cache (current_buffer, newline_cache,
 735                                    start_byte + cursor - base,
 736                                    start_byte + scan_start - base);
 737
 738               /* Did we find the target character?  */
 739               if (cursor >= ceiling_addr)
 740                 {
 741                   if (++count >= 0)
 742                     {
 743                       immediate_quit = 0;
 744                       return BYTE_TO_CHAR (start_byte + cursor - base);
 745                     }
 746                   cursor--;
 747                 }
 748             }
 749
 750           start = BYTE_TO_CHAR (start_byte + cursor - base);
 751         }
 752       }
 753
 754   immediate_quit = 0;
 755   if (shortage != 0)
 756     *shortage = count * direction;
 757   return start;
 758 }
 759 \f
 760 /* Search for COUNT instances of a line boundary, which means either a
 761    newline or (if selective display enabled) a carriage return.
 762    Start at START.  If COUNT is negative, search backwards.
 763
 764    We report the resulting position by calling TEMP_SET_PT_BOTH.
 765
 766    If we find COUNT instances. we position after (always after,
 767    even if scanning backwards) the COUNTth match, and return 0.
 768
 769    If we don't find COUNT instances before reaching the end of the
 770    buffer (or the beginning, if scanning backwards), we return
 771    the number of line boundaries left unfound, and position at
 772    the limit we bumped up against.
 773
 774    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 775    except in special cases.  */
 776
 777 int
 778 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 779      int start, start_byte;
 780      int limit, limit_byte;
 781      register int count;
 782      int allow_quit;
 783 {
 784   int direction = ((count > 0) ? 1 : -1);
 785
 786   register unsigned char *cursor;
 787   unsigned char *base;
 788
 789   register int ceiling;
 790   register unsigned char *ceiling_addr;
 791
 792   int old_immediate_quit = immediate_quit;
 793
 794   /* The code that follows is like scan_buffer
 795      but checks for either newline or carriage return.  */
 796
 797   if (allow_quit)
 798     immediate_quit++;
 799
 800   start_byte = CHAR_TO_BYTE (start);
 801
 802   if (count > 0)
 803     {
 804       while (start_byte < limit_byte)
 805         {
 806           ceiling =  BUFFER_CEILING_OF (start_byte);
 807           ceiling = min (limit_byte - 1, ceiling);
 808           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 809           base = (cursor = BYTE_POS_ADDR (start_byte));
 810           while (1)
 811             {
 812               while (*cursor != '\n' && ++cursor != ceiling_addr)
 813                 ;
 814
 815               if (cursor != ceiling_addr)
 816                 {
 817                   if (--count == 0)
 818                     {
 819                       immediate_quit = old_immediate_quit;
 820                       start_byte = start_byte + cursor - base + 1;
 821                       start = BYTE_TO_CHAR (start_byte);
 822                       TEMP_SET_PT_BOTH (start, start_byte);
 823                       return 0;
 824                     }
 825                   else
 826                     if (++cursor == ceiling_addr)
 827                       break;
 828                 }
 829               else
 830                 break;
 831             }
 832           start_byte += cursor - base;
 833         }
 834     }
 835   else
 836     {
 837       while (start_byte > limit_byte)
 838         {
 839           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 840           ceiling = max (limit_byte, ceiling);
 841           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 842           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 843           while (1)
 844             {
 845               while (--cursor != ceiling_addr && *cursor != '\n')
 846                 ;
 847
 848               if (cursor != ceiling_addr)
 849                 {
 850                   if (++count == 0)
 851                     {
 852                       immediate_quit = old_immediate_quit;
 853                       /* Return the position AFTER the match we found.  */
 854                       start_byte = start_byte + cursor - base + 1;
 855                       start = BYTE_TO_CHAR (start_byte);
 856                       TEMP_SET_PT_BOTH (start, start_byte);
 857                       return 0;
 858                     }
 859                 }
 860               else
 861                 break;
 862             }
 863           /* Here we add 1 to compensate for the last decrement
 864              of CURSOR, which took it past the valid range.  */
 865           start_byte += cursor - base + 1;
 866         }
 867     }
 868
 869   TEMP_SET_PT_BOTH (limit, limit_byte);
 870   immediate_quit = old_immediate_quit;
 871
 872   return count * direction;
 873 }
 874
 875 int
 876 find_next_newline_no_quit (from, cnt)
 877      register int from, cnt;
 878 {
 879   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 880 }
 881
 882 /* Like find_next_newline, but returns position before the newline,
 883    not after, and only search up to TO.  This isn't just
 884    find_next_newline (...)-1, because you might hit TO.  */
 885
 886 int
 887 find_before_next_newline (from, to, cnt)
 888      int from, to, cnt;
 889 {
 890   int shortage;
 891   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 892
 893   if (shortage == 0)
 894     pos--;
 895
 896   return pos;
 897 }
 898 \f
 899 /* Subroutines of Lisp buffer search functions. */
 900
 901 static Lisp_Object
 902 search_command (string, bound, noerror, count, direction, RE, posix)
 903      Lisp_Object string, bound, noerror, count;
 904      int direction;
 905      int RE;
 906      int posix;
 907 {
 908   register int np;
 909   int lim, lim_byte;
 910   int n = direction;
 911
 912   if (!NILP (count))
 913     {
 914       CHECK_NUMBER (count);
 915       n *= XINT (count);
 916     }
 917
 918   CHECK_STRING (string);
 919   if (NILP (bound))
 920     {
 921       if (n > 0)
 922         lim = ZV, lim_byte = ZV_BYTE;
 923       else
 924         lim = BEGV, lim_byte = BEGV_BYTE;
 925     }
 926   else
 927     {
 928       CHECK_NUMBER_COERCE_MARKER (bound);
 929       lim = XINT (bound);
 930       if (n > 0 ? lim < PT : lim > PT)
 931         error ("Invalid search bound (wrong side of point)");
 932       if (lim > ZV)
 933         lim = ZV, lim_byte = ZV_BYTE;
 934       else if (lim < BEGV)
 935         lim = BEGV, lim_byte = BEGV_BYTE;
 936       else
 937         lim_byte = CHAR_TO_BYTE (lim);
 938     }
 939
 940   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 941                       (!NILP (current_buffer->case_fold_search)
 942                        ? current_buffer->case_canon_table
 943                        : Qnil),
 944                       (!NILP (current_buffer->case_fold_search)
 945                        ? current_buffer->case_eqv_table
 946                        : Qnil),
 947                       posix);
 948   if (np <= 0)
 949     {
 950       if (NILP (noerror))
 951         return signal_failure (string);
 952       if (!EQ (noerror, Qt))
 953         {
 954           if (lim < BEGV || lim > ZV)
 955             abort ();
 956           SET_PT_BOTH (lim, lim_byte);
 957           return Qnil;
 958 #if 0 /* This would be clean, but maybe programs depend on
 959          a value of nil here.  */
 960           np = lim;
 961 #endif
 962         }
 963       else
 964         return Qnil;
 965     }
 966
 967   if (np < BEGV || np > ZV)
 968     abort ();
 969
 970   SET_PT (np);
 971
 972   return make_number (np);
 973 }
 974 \f
 975 /* Return 1 if REGEXP it matches just one constant string.  */
 976
 977 static int
 978 trivial_regexp_p (regexp)
 979      Lisp_Object regexp;
 980 {
 981   int len = SBYTES (regexp);
 982   unsigned char *s = SDATA (regexp);
 983   while (--len >= 0)
 984     {
 985       switch (*s++)
 986         {
 987         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 988           return 0;
 989         case '\\':
 990           if (--len < 0)
 991             return 0;
 992           switch (*s++)
 993             {
 994             case '|': case '(': case ')': case '`': case '\'': case 'b':
 995             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 996             case 'S': case '=': case '{': case '}': case '_':
 997             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 998             case '1': case '2': case '3': case '4': case '5':
 999             case '6': case '7': case '8': case '9':
1000               return 0;
1001             }
1002         }
1003     }
1004   return 1;
1005 }
1006
1007 /* Search for the n'th occurrence of STRING in the current buffer,
1008    starting at position POS and stopping at position LIM,
1009    treating STRING as a literal string if RE is false or as
1010    a regular expression if RE is true.
1011
1012    If N is positive, searching is forward and LIM must be greater than POS.
1013    If N is negative, searching is backward and LIM must be less than POS.
1014
1015    Returns -x if x occurrences remain to be found (x > 0),
1016    or else the position at the beginning of the Nth occurrence
1017    (if searching backward) or the end (if searching forward).
1018
1019    POSIX is nonzero if we want full backtracking (POSIX style)
1020    for this pattern.  0 means backtrack only enough to get a valid match.  */
1021
1022 #define TRANSLATE(out, trt, d)                  \
1023 do                                              \
1024   {                                             \
1025     if (! NILP (trt))                           \
1026       {                                         \
1027         Lisp_Object temp;                       \
1028         temp = Faref (trt, make_number (d));    \
1029         if (INTEGERP (temp))                    \
1030           out = XINT (temp);                    \
1031         else                                    \
1032           out = d;                              \
1033       }                                         \
1034     else                                        \
1035       out = d;                                  \
1036   }                                             \
1037 while (0)
1038
1039 static int
1040 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1041                RE, trt, inverse_trt, posix)
1042      Lisp_Object string;
1043      int pos;
1044      int pos_byte;
1045      int lim;
1046      int lim_byte;
1047      int n;
1048      int RE;
1049      Lisp_Object trt;
1050      Lisp_Object inverse_trt;
1051      int posix;
1052 {
1053   int len = SCHARS (string);
1054   int len_byte = SBYTES (string);
1055   register int i;
1056
1057   if (running_asynch_code)
1058     save_search_regs ();
1059
1060   /* Searching 0 times means don't move.  */
1061   /* Null string is found at starting position.  */
1062   if (len == 0 || n == 0)
1063     {
1064       set_search_regs (pos_byte, 0);
1065       return pos;
1066     }
1067
1068   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1069     {
1070       unsigned char *p1, *p2;
1071       int s1, s2;
1072       struct re_pattern_buffer *bufp;
1073
1074       bufp = compile_pattern (string, &search_regs, trt, posix,
1075                               !NILP (current_buffer->enable_multibyte_characters));
1076
1077       immediate_quit = 1;       /* Quit immediately if user types ^G,
1078                                    because letting this function finish
1079                                    can take too long. */
1080       QUIT;                     /* Do a pending quit right away,
1081                                    to avoid paradoxical behavior */
1082       /* Get pointers and sizes of the two strings
1083          that make up the visible portion of the buffer. */
1084
1085       p1 = BEGV_ADDR;
1086       s1 = GPT_BYTE - BEGV_BYTE;
1087       p2 = GAP_END_ADDR;
1088       s2 = ZV_BYTE - GPT_BYTE;
1089       if (s1 < 0)
1090         {
1091           p2 = p1;
1092           s2 = ZV_BYTE - BEGV_BYTE;
1093           s1 = 0;
1094         }
1095       if (s2 < 0)
1096         {
1097           s1 = ZV_BYTE - BEGV_BYTE;
1098           s2 = 0;
1099         }
1100       re_match_object = Qnil;
1101
1102       while (n < 0)
1103         {
1104           int val;
1105           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1106                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1107                              &search_regs,
1108                              /* Don't allow match past current point */
1109                              pos_byte - BEGV_BYTE);
1110           if (val == -2)
1111             {
1112               matcher_overflow ();
1113             }
1114           if (val >= 0)
1115             {
1116               pos_byte = search_regs.start[0] + BEGV_BYTE;
1117               for (i = 0; i < search_regs.num_regs; i++)
1118                 if (search_regs.start[i] >= 0)
1119                   {
1120                     search_regs.start[i]
1121                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1122                     search_regs.end[i]
1123                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1124                   }
1125               XSETBUFFER (last_thing_searched, current_buffer);
1126               /* Set pos to the new position. */
1127               pos = search_regs.start[0];
1128             }
1129           else
1130             {
1131               immediate_quit = 0;
1132               return (n);
1133             }
1134           n++;
1135         }
1136       while (n > 0)
1137         {
1138           int val;
1139           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1140                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1141                              &search_regs,
1142                              lim_byte - BEGV_BYTE);
1143           if (val == -2)
1144             {
1145               matcher_overflow ();
1146             }
1147           if (val >= 0)
1148             {
1149               pos_byte = search_regs.end[0] + BEGV_BYTE;
1150               for (i = 0; i < search_regs.num_regs; i++)
1151                 if (search_regs.start[i] >= 0)
1152                   {
1153                     search_regs.start[i]
1154                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1155                     search_regs.end[i]
1156                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1157                   }
1158               XSETBUFFER (last_thing_searched, current_buffer);
1159               pos = search_regs.end[0];
1160             }
1161           else
1162             {
1163               immediate_quit = 0;
1164               return (0 - n);
1165             }
1166           n--;
1167         }
1168       immediate_quit = 0;
1169       return (pos);
1170     }
1171   else                          /* non-RE case */
1172     {
1173       unsigned char *raw_pattern, *pat;
1174       int raw_pattern_size;
1175       int raw_pattern_size_byte;
1176       unsigned char *patbuf;
1177       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1178       unsigned char *base_pat;
1179       /* Set to positive if we find a non-ASCII char that need
1180          translation.  Otherwise set to zero later.  */
1181       int charset_base = -1;
1182       int boyer_moore_ok = 1;
1183
1184       /* MULTIBYTE says whether the text to be searched is multibyte.
1185          We must convert PATTERN to match that, or we will not really
1186          find things right.  */
1187
1188       if (multibyte == STRING_MULTIBYTE (string))
1189         {
1190           raw_pattern = (unsigned char *) SDATA (string);
1191           raw_pattern_size = SCHARS (string);
1192           raw_pattern_size_byte = SBYTES (string);
1193         }
1194       else if (multibyte)
1195         {
1196           raw_pattern_size = SCHARS (string);
1197           raw_pattern_size_byte
1198             = count_size_as_multibyte (SDATA (string),
1199                                        raw_pattern_size);
1200           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1201           copy_text (SDATA (string), raw_pattern,
1202                      SCHARS (string), 0, 1);
1203         }
1204       else
1205         {
1206           /* Converting multibyte to single-byte.
1207
1208              ??? Perhaps this conversion should be done in a special way
1209              by subtracting nonascii-insert-offset from each non-ASCII char,
1210              so that only the multibyte chars which really correspond to
1211              the chosen single-byte character set can possibly match.  */
1212           raw_pattern_size = SCHARS (string);
1213           raw_pattern_size_byte = SCHARS (string);
1214           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1215           copy_text (SDATA (string), raw_pattern,
1216                      SBYTES (string), 1, 0);
1217         }
1218
1219       /* Copy and optionally translate the pattern.  */
1220       len = raw_pattern_size;
1221       len_byte = raw_pattern_size_byte;
1222       patbuf = (unsigned char *) alloca (len_byte);
1223       pat = patbuf;
1224       base_pat = raw_pattern;
1225       if (multibyte)
1226         {
1227           /* Fill patbuf by translated characters in STRING while
1228              checking if we can use boyer-moore search.  If TRT is
1229              non-nil, we can use boyer-moore search only if TRT can be
1230              represented by the byte array of 256 elements.  For that,
1231              all non-ASCII case-equivalents of all case-senstive
1232              characters in STRING must belong to the same charset and
1233              row.  */
1234
1235           while (--len >= 0)
1236             {
1237               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1238               int c, translated, inverse;
1239               int in_charlen, charlen;
1240
1241               /* If we got here and the RE flag is set, it's because we're
1242                  dealing with a regexp known to be trivial, so the backslash
1243                  just quotes the next character.  */
1244               if (RE && *base_pat == '\\')
1245                 {
1246                   len--;
1247                   raw_pattern_size--;
1248                   len_byte--;
1249                   base_pat++;
1250                 }
1251
1252               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1253
1254               if (NILP (trt))
1255                 {
1256                   str = base_pat;
1257                   charlen = in_charlen;
1258                 }
1259               else
1260                 {
1261                   /* Translate the character.  */
1262                   TRANSLATE (translated, trt, c);
1263                   charlen = CHAR_STRING (translated, str_base);
1264                   str = str_base;
1265
1266                   /* Check if C has any other case-equivalents.  */
1267                   TRANSLATE (inverse, inverse_trt, c);
1268                   /* If so, check if we can use boyer-moore.  */
1269                   if (c != inverse && boyer_moore_ok)
1270                     {
1271                       /* Check if all equivalents belong to the same
1272                          charset & row.  Note that the check of C
1273                          itself is done by the last iteration.  Note
1274                          also that we don't have to check ASCII
1275                          characters because boyer-moore search can
1276                          always handle their translation.  */
1277                       while (1)
1278                         {
1279                           if (ASCII_BYTE_P (inverse))
1280                             {
1281                               if (charset_base > 0)
1282                                 {
1283                                   boyer_moore_ok = 0;
1284                                   break;
1285                                 }
1286                               charset_base = 0;
1287                             }
1288                           else if (SINGLE_BYTE_CHAR_P (inverse))
1289                             {
1290                               /* Boyer-moore search can't handle a
1291                                  translation of an eight-bit
1292                                  character.  */
1293                               boyer_moore_ok = 0;
1294                               break;
1295                             }
1296                           else if (charset_base < 0)
1297                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1298                           else if ((inverse & ~CHAR_FIELD3_MASK)
1299                                    != charset_base)
1300                             {
1301                               boyer_moore_ok = 0;
1302                               break;
1303                             }
1304                           if (c == inverse)
1305                             break;
1306                           TRANSLATE (inverse, inverse_trt, inverse);
1307                         }
1308                     }
1309                 }
1310               if (charset_base < 0)
1311                 charset_base = 0;
1312
1313               /* Store this character into the translated pattern.  */
1314               bcopy (str, pat, charlen);
1315               pat += charlen;
1316               base_pat += in_charlen;
1317               len_byte -= in_charlen;
1318             }
1319         }
1320       else
1321         {
1322           /* Unibyte buffer.  */
1323           charset_base = 0;
1324           while (--len >= 0)
1325             {
1326               int c, translated;
1327
1328               /* If we got here and the RE flag is set, it's because we're
1329                  dealing with a regexp known to be trivial, so the backslash
1330                  just quotes the next character.  */
1331               if (RE && *base_pat == '\\')
1332                 {
1333                   len--;
1334                   raw_pattern_size--;
1335                   base_pat++;
1336                 }
1337               c = *base_pat++;
1338               TRANSLATE (translated, trt, c);
1339               *pat++ = translated;
1340             }
1341         }
1342
1343       len_byte = pat - patbuf;
1344       len = raw_pattern_size;
1345       pat = base_pat = patbuf;
1346
1347       if (boyer_moore_ok)
1348         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1349                             pos, pos_byte, lim, lim_byte,
1350                             charset_base);
1351       else
1352         return simple_search (n, pat, len, len_byte, trt,
1353                               pos, pos_byte, lim, lim_byte);
1354     }
1355 }
1356 \f
1357 /* Do a simple string search N times for the string PAT,
1358    whose length is LEN/LEN_BYTE,
1359    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1360    TRT is the translation table.
1361
1362    Return the character position where the match is found.
1363    Otherwise, if M matches remained to be found, return -M.
1364
1365    This kind of search works regardless of what is in PAT and
1366    regardless of what is in TRT.  It is used in cases where
1367    boyer_moore cannot work.  */
1368
1369 static int
1370 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1371      int n;
1372      unsigned char *pat;
1373      int len, len_byte;
1374      Lisp_Object trt;
1375      int pos, pos_byte;
1376      int lim, lim_byte;
1377 {
1378   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1379   int forward = n > 0;
1380
1381   if (lim > pos && multibyte)
1382     while (n > 0)
1383       {
1384         while (1)
1385           {
1386             /* Try matching at position POS.  */
1387             int this_pos = pos;
1388             int this_pos_byte = pos_byte;
1389             int this_len = len;
1390             int this_len_byte = len_byte;
1391             unsigned char *p = pat;
1392             if (pos + len > lim)
1393               goto stop;
1394
1395             while (this_len > 0)
1396               {
1397                 int charlen, buf_charlen;
1398                 int pat_ch, buf_ch;
1399
1400                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1401                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1402                                                  ZV_BYTE - this_pos_byte,
1403                                                  buf_charlen);
1404                 TRANSLATE (buf_ch, trt, buf_ch);
1405
1406                 if (buf_ch != pat_ch)
1407                   break;
1408
1409                 this_len_byte -= charlen;
1410                 this_len--;
1411                 p += charlen;
1412
1413                 this_pos_byte += buf_charlen;
1414                 this_pos++;
1415               }
1416
1417             if (this_len == 0)
1418               {
1419                 pos += len;
1420                 pos_byte += len_byte;
1421                 break;
1422               }
1423
1424             INC_BOTH (pos, pos_byte);
1425           }
1426
1427         n--;
1428       }
1429   else if (lim > pos)
1430     while (n > 0)
1431       {
1432         while (1)
1433           {
1434             /* Try matching at position POS.  */
1435             int this_pos = pos;
1436             int this_len = len;
1437             unsigned char *p = pat;
1438
1439             if (pos + len > lim)
1440               goto stop;
1441
1442             while (this_len > 0)
1443               {
1444                 int pat_ch = *p++;
1445                 int buf_ch = FETCH_BYTE (this_pos);
1446                 TRANSLATE (buf_ch, trt, buf_ch);
1447
1448                 if (buf_ch != pat_ch)
1449                   break;
1450
1451                 this_len--;
1452                 this_pos++;
1453               }
1454
1455             if (this_len == 0)
1456               {
1457                 pos += len;
1458                 break;
1459               }
1460
1461             pos++;
1462           }
1463
1464         n--;
1465       }
1466   /* Backwards search.  */
1467   else if (lim < pos && multibyte)
1468     while (n < 0)
1469       {
1470         while (1)
1471           {
1472             /* Try matching at position POS.  */
1473             int this_pos = pos - len;
1474             int this_pos_byte = pos_byte - len_byte;
1475             int this_len = len;
1476             int this_len_byte = len_byte;
1477             unsigned char *p = pat;
1478
1479             if (pos - len < lim)
1480               goto stop;
1481
1482             while (this_len > 0)
1483               {
1484                 int charlen, buf_charlen;
1485                 int pat_ch, buf_ch;
1486
1487                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1488                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1489                                                  ZV_BYTE - this_pos_byte,
1490                                                  buf_charlen);
1491                 TRANSLATE (buf_ch, trt, buf_ch);
1492
1493                 if (buf_ch != pat_ch)
1494                   break;
1495
1496                 this_len_byte -= charlen;
1497                 this_len--;
1498                 p += charlen;
1499                 this_pos_byte += buf_charlen;
1500                 this_pos++;
1501               }
1502
1503             if (this_len == 0)
1504               {
1505                 pos -= len;
1506                 pos_byte -= len_byte;
1507                 break;
1508               }
1509
1510             DEC_BOTH (pos, pos_byte);
1511           }
1512
1513         n++;
1514       }
1515   else if (lim < pos)
1516     while (n < 0)
1517       {
1518         while (1)
1519           {
1520             /* Try matching at position POS.  */
1521             int this_pos = pos - len;
1522             int this_len = len;
1523             unsigned char *p = pat;
1524
1525             if (pos - len < lim)
1526               goto stop;
1527
1528             while (this_len > 0)
1529               {
1530                 int pat_ch = *p++;
1531                 int buf_ch = FETCH_BYTE (this_pos);
1532                 TRANSLATE (buf_ch, trt, buf_ch);
1533
1534                 if (buf_ch != pat_ch)
1535                   break;
1536                 this_len--;
1537                 this_pos++;
1538               }
1539
1540             if (this_len == 0)
1541               {
1542                 pos -= len;
1543                 break;
1544               }
1545
1546             pos--;
1547           }
1548
1549         n++;
1550       }
1551
1552  stop:
1553   if (n == 0)
1554     {
1555       if (forward)
1556         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1557       else
1558         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1559
1560       return pos;
1561     }
1562   else if (n > 0)
1563     return -n;
1564   else
1565     return n;
1566 }
1567 \f
1568 /* Do Boyer-Moore search N times for the string BASE_PAT,
1569    whose length is LEN/LEN_BYTE,
1570    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1571    DIRECTION says which direction we search in.
1572    TRT and INVERSE_TRT are translation tables.
1573    Characters in PAT are already translated by TRT.
1574
1575    This kind of search works if all the characters in BASE_PAT that
1576    have nontrivial translation are the same aside from the last byte.
1577    This makes it possible to translate just the last byte of a
1578    character, and do so after just a simple test of the context.
1579    CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1580
1581    If that criterion is not satisfied, do not call this function.  */
1582
1583 static int
1584 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1585              pos, pos_byte, lim, lim_byte, charset_base)
1586      int n;
1587      unsigned char *base_pat;
1588      int len, len_byte;
1589      Lisp_Object trt;
1590      Lisp_Object inverse_trt;
1591      int pos, pos_byte;
1592      int lim, lim_byte;
1593      int charset_base;
1594 {
1595   int direction = ((n > 0) ? 1 : -1);
1596   register int dirlen;
1597   int infinity, limit, stride_for_teases = 0;
1598   register int *BM_tab;
1599   int *BM_tab_base;
1600   register unsigned char *cursor, *p_limit;
1601   register int i, j;
1602   unsigned char *pat, *pat_end;
1603   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1604
1605   unsigned char simple_translate[0400];
1606   /* These are set to the preceding bytes of a byte to be translated
1607      if charset_base is nonzero.  As the maximum byte length of a
1608      multibyte character is 4, we have to check at most three previous
1609      bytes.  */
1610   int translate_prev_byte1 = 0;
1611   int translate_prev_byte2 = 0;
1612   int translate_prev_byte3 = 0;
1613
1614 #ifdef C_ALLOCA
1615   int BM_tab_space[0400];
1616   BM_tab = &BM_tab_space[0];
1617 #else
1618   BM_tab = (int *) alloca (0400 * sizeof (int));
1619 #endif
1620   /* The general approach is that we are going to maintain that we know */
1621   /* the first (closest to the present position, in whatever direction */
1622   /* we're searching) character that could possibly be the last */
1623   /* (furthest from present position) character of a valid match.  We */
1624   /* advance the state of our knowledge by looking at that character */
1625   /* and seeing whether it indeed matches the last character of the */
1626   /* pattern.  If it does, we take a closer look.  If it does not, we */
1627   /* move our pointer (to putative last characters) as far as is */
1628   /* logically possible.  This amount of movement, which I call a */
1629   /* stride, will be the length of the pattern if the actual character */
1630   /* appears nowhere in the pattern, otherwise it will be the distance */
1631   /* from the last occurrence of that character to the end of the */
1632   /* pattern. */
1633   /* As a coding trick, an enormous stride is coded into the table for */
1634   /* characters that match the last character.  This allows use of only */
1635   /* a single test, a test for having gone past the end of the */
1636   /* permissible match region, to test for both possible matches (when */
1637   /* the stride goes past the end immediately) and failure to */
1638   /* match (where you get nudged past the end one stride at a time). */
1639
1640   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1641   /* is determined only by the last character of the putative match. */
1642   /* If that character does not match, we will stride the proper */
1643   /* distance to propose a match that superimposes it on the last */
1644   /* instance of a character that matches it (per trt), or misses */
1645   /* it entirely if there is none. */
1646
1647   dirlen = len_byte * direction;
1648   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1649
1650   /* Record position after the end of the pattern.  */
1651   pat_end = base_pat + len_byte;
1652   /* BASE_PAT points to a character that we start scanning from.
1653      It is the first character in a forward search,
1654      the last character in a backward search.  */
1655   if (direction < 0)
1656     base_pat = pat_end - 1;
1657
1658   BM_tab_base = BM_tab;
1659   BM_tab += 0400;
1660   j = dirlen;           /* to get it in a register */
1661   /* A character that does not appear in the pattern induces a */
1662   /* stride equal to the pattern length. */
1663   while (BM_tab_base != BM_tab)
1664     {
1665       *--BM_tab = j;
1666       *--BM_tab = j;
1667       *--BM_tab = j;
1668       *--BM_tab = j;
1669     }
1670
1671   /* We use this for translation, instead of TRT itself.
1672      We fill this in to handle the characters that actually
1673      occur in the pattern.  Others don't matter anyway!  */
1674   bzero (simple_translate, sizeof simple_translate);
1675   for (i = 0; i < 0400; i++)
1676     simple_translate[i] = i;
1677
1678   if (charset_base)
1679     {
1680       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1681          byte following them are the target of translation.  */
1682       int sample_char = charset_base | 0x20;
1683       unsigned char str[MAX_MULTIBYTE_LENGTH];
1684       int len = CHAR_STRING (sample_char, str);
1685
1686       translate_prev_byte1 = str[len - 2];
1687       if (len > 2)
1688         {
1689           translate_prev_byte2 = str[len - 3];
1690           if (len > 3)
1691             translate_prev_byte3 = str[len - 4];
1692         }
1693     }
1694
1695   i = 0;
1696   while (i != infinity)
1697     {
1698       unsigned char *ptr = base_pat + i;
1699       i += direction;
1700       if (i == dirlen)
1701         i = infinity;
1702       if (! NILP (trt))
1703         {
1704           /* If the byte currently looking at is the last of a
1705              character to check case-equivalents, set CH to that
1706              character.  An ASCII character and a non-ASCII character
1707              matching with CHARSET_BASE are to be checked.  */
1708           int ch = -1;
1709
1710           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1711             ch = *ptr;
1712           else if (charset_base
1713                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1714             {
1715               unsigned char *charstart = ptr - 1;
1716
1717               while (! (CHAR_HEAD_P (*charstart)))
1718                 charstart--;
1719               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1720               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1721                 ch = -1;
1722             }
1723
1724           if (ch >= 0400)
1725             j = ((unsigned char) ch) | 0200;
1726           else
1727             j = *ptr;
1728
1729           if (i == infinity)
1730             stride_for_teases = BM_tab[j];
1731
1732           BM_tab[j] = dirlen - i;
1733           /* A translation table is accompanied by its inverse -- see */
1734           /* comment following downcase_table for details */
1735           if (ch >= 0)
1736             {
1737               int starting_ch = ch;
1738               int starting_j = j;
1739
1740               while (1)
1741                 {
1742                   TRANSLATE (ch, inverse_trt, ch);
1743                   if (ch >= 0400)
1744                     j = ((unsigned char) ch) | 0200;
1745                   else
1746                     j = (unsigned char) ch;
1747
1748                   /* For all the characters that map into CH,
1749                      set up simple_translate to map the last byte
1750                      into STARTING_J.  */
1751                   simple_translate[j] = starting_j;
1752                   if (ch == starting_ch)
1753                     break;
1754                   BM_tab[j] = dirlen - i;
1755                 }
1756             }
1757         }
1758       else
1759         {
1760           j = *ptr;
1761
1762           if (i == infinity)
1763             stride_for_teases = BM_tab[j];
1764           BM_tab[j] = dirlen - i;
1765         }
1766       /* stride_for_teases tells how much to stride if we get a */
1767       /* match on the far character but are subsequently */
1768       /* disappointed, by recording what the stride would have been */
1769       /* for that character if the last character had been */
1770       /* different. */
1771     }
1772   infinity = dirlen - infinity;
1773   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1774   /* loop invariant - POS_BYTE points at where last char (first
1775      char if reverse) of pattern would align in a possible match.  */
1776   while (n != 0)
1777     {
1778       int tail_end;
1779       unsigned char *tail_end_ptr;
1780
1781       /* It's been reported that some (broken) compiler thinks that
1782          Boolean expressions in an arithmetic context are unsigned.
1783          Using an explicit ?1:0 prevents this.  */
1784       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1785           < 0)
1786         return (n * (0 - direction));
1787       /* First we do the part we can by pointers (maybe nothing) */
1788       QUIT;
1789       pat = base_pat;
1790       limit = pos_byte - dirlen + direction;
1791       if (direction > 0)
1792         {
1793           limit = BUFFER_CEILING_OF (limit);
1794           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1795              can take on without hitting edge of buffer or the gap.  */
1796           limit = min (limit, pos_byte + 20000);
1797           limit = min (limit, lim_byte - 1);
1798         }
1799       else
1800         {
1801           limit = BUFFER_FLOOR_OF (limit);
1802           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1803              can take on without hitting edge of buffer or the gap.  */
1804           limit = max (limit, pos_byte - 20000);
1805           limit = max (limit, lim_byte);
1806         }
1807       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1808       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1809
1810       if ((limit - pos_byte) * direction > 20)
1811         {
1812           unsigned char *p2;
1813
1814           p_limit = BYTE_POS_ADDR (limit);
1815           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1816           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1817           while (1)             /* use one cursor setting as long as i can */
1818             {
1819               if (direction > 0) /* worth duplicating */
1820                 {
1821                   /* Use signed comparison if appropriate
1822                      to make cursor+infinity sure to be > p_limit.
1823                      Assuming that the buffer lies in a range of addresses
1824                      that are all "positive" (as ints) or all "negative",
1825                      either kind of comparison will work as long
1826                      as we don't step by infinity.  So pick the kind
1827                      that works when we do step by infinity.  */
1828                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1829                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1830                       cursor += BM_tab[*cursor];
1831                   else
1832                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1833                       cursor += BM_tab[*cursor];
1834                 }
1835               else
1836                 {
1837                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1838                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1839                       cursor += BM_tab[*cursor];
1840                   else
1841                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1842                       cursor += BM_tab[*cursor];
1843                 }
1844 /* If you are here, cursor is beyond the end of the searched region. */
1845 /* This can happen if you match on the far character of the pattern, */
1846 /* because the "stride" of that character is infinity, a number able */
1847 /* to throw you well beyond the end of the search.  It can also */
1848 /* happen if you fail to match within the permitted region and would */
1849 /* otherwise try a character beyond that region */
1850               if ((cursor - p_limit) * direction <= len_byte)
1851                 break;  /* a small overrun is genuine */
1852               cursor -= infinity; /* large overrun = hit */
1853               i = dirlen - direction;
1854               if (! NILP (trt))
1855                 {
1856                   while ((i -= direction) + direction != 0)
1857                     {
1858                       int ch;
1859                       cursor -= direction;
1860                       /* Translate only the last byte of a character.  */
1861                       if (! multibyte
1862                           || ((cursor == tail_end_ptr
1863                                || CHAR_HEAD_P (cursor[1]))
1864                               && (CHAR_HEAD_P (cursor[0])
1865                                   /* Check if this is the last byte of
1866                                      a translable character.  */
1867                                   || (translate_prev_byte1 == cursor[-1]
1868                                       && (CHAR_HEAD_P (translate_prev_byte1)
1869                                           || (translate_prev_byte2 == cursor[-2]
1870                                               && (CHAR_HEAD_P (translate_prev_byte2)
1871                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1872                         ch = simple_translate[*cursor];
1873                       else
1874                         ch = *cursor;
1875                       if (pat[i] != ch)
1876                         break;
1877                     }
1878                 }
1879               else
1880                 {
1881                   while ((i -= direction) + direction != 0)
1882                     {
1883                       cursor -= direction;
1884                       if (pat[i] != *cursor)
1885                         break;
1886                     }
1887                 }
1888               cursor += dirlen - i - direction; /* fix cursor */
1889               if (i + direction == 0)
1890                 {
1891                   int position;
1892
1893                   cursor -= direction;
1894
1895                   position = pos_byte + cursor - p2 + ((direction > 0)
1896                                                        ? 1 - len_byte : 0);
1897                   set_search_regs (position, len_byte);
1898
1899                   if ((n -= direction) != 0)
1900                     cursor += dirlen; /* to resume search */
1901                   else
1902                     return ((direction > 0)
1903                             ? search_regs.end[0] : search_regs.start[0]);
1904                 }
1905               else
1906                 cursor += stride_for_teases; /* <sigh> we lose -  */
1907             }
1908           pos_byte += cursor - p2;
1909         }
1910       else
1911         /* Now we'll pick up a clump that has to be done the hard */
1912         /* way because it covers a discontinuity */
1913         {
1914           limit = ((direction > 0)
1915                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1916                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1917           limit = ((direction > 0)
1918                    ? min (limit + len_byte, lim_byte - 1)
1919                    : max (limit - len_byte, lim_byte));
1920           /* LIMIT is now the last value POS_BYTE can have
1921              and still be valid for a possible match.  */
1922           while (1)
1923             {
1924               /* This loop can be coded for space rather than */
1925               /* speed because it will usually run only once. */
1926               /* (the reach is at most len + 21, and typically */
1927               /* does not exceed len) */
1928               while ((limit - pos_byte) * direction >= 0)
1929                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1930               /* now run the same tests to distinguish going off the */
1931               /* end, a match or a phony match. */
1932               if ((pos_byte - limit) * direction <= len_byte)
1933                 break;  /* ran off the end */
1934               /* Found what might be a match.
1935                  Set POS_BYTE back to last (first if reverse) pos.  */
1936               pos_byte -= infinity;
1937               i = dirlen - direction;
1938               while ((i -= direction) + direction != 0)
1939                 {
1940                   int ch;
1941                   unsigned char *ptr;
1942                   pos_byte -= direction;
1943                   ptr = BYTE_POS_ADDR (pos_byte);
1944                   /* Translate only the last byte of a character.  */
1945                   if (! multibyte
1946                       || ((ptr == tail_end_ptr
1947                            || CHAR_HEAD_P (ptr[1]))
1948                           && (CHAR_HEAD_P (ptr[0])
1949                               /* Check if this is the last byte of a
1950                                  translable character.  */
1951                               || (translate_prev_byte1 == ptr[-1]
1952                                   && (CHAR_HEAD_P (translate_prev_byte1)
1953                                       || (translate_prev_byte2 == ptr[-2]
1954                                           && (CHAR_HEAD_P (translate_prev_byte2)
1955                                               || translate_prev_byte3 == ptr[-3])))))))
1956                     ch = simple_translate[*ptr];
1957                   else
1958                     ch = *ptr;
1959                   if (pat[i] != ch)
1960                     break;
1961                 }
1962               /* Above loop has moved POS_BYTE part or all the way
1963                  back to the first pos (last pos if reverse).
1964                  Set it once again at the last (first if reverse) char.  */
1965               pos_byte += dirlen - i- direction;
1966               if (i + direction == 0)
1967                 {
1968                   int position;
1969                   pos_byte -= direction;
1970
1971                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1972
1973                   set_search_regs (position, len_byte);
1974
1975                   if ((n -= direction) != 0)
1976                     pos_byte += dirlen; /* to resume search */
1977                   else
1978                     return ((direction > 0)
1979                             ? search_regs.end[0] : search_regs.start[0]);
1980                 }
1981               else
1982                 pos_byte += stride_for_teases;
1983             }
1984           }
1985       /* We have done one clump.  Can we continue? */
1986       if ((lim_byte - pos_byte) * direction < 0)
1987         return ((0 - n) * direction);
1988     }
1989   return BYTE_TO_CHAR (pos_byte);
1990 }
1991
1992 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1993    for the overall match just found in the current buffer.
1994    Also clear out the match data for registers 1 and up.  */
1995
1996 static void
1997 set_search_regs (beg_byte, nbytes)
1998      int beg_byte, nbytes;
1999 {
2000   int i;
2001
2002   /* Make sure we have registers in which to store
2003      the match position.  */
2004   if (search_regs.num_regs == 0)
2005     {
2006       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2007       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2008       search_regs.num_regs = 2;
2009     }
2010
2011   /* Clear out the other registers.  */
2012   for (i = 1; i < search_regs.num_regs; i++)
2013     {
2014       search_regs.start[i] = -1;
2015       search_regs.end[i] = -1;
2016     }
2017
2018   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2019   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2020   XSETBUFFER (last_thing_searched, current_buffer);
2021 }
2022 \f
2023 /* Given a string of words separated by word delimiters,
2024   compute a regexp that matches those exact words
2025   separated by arbitrary punctuation.  */
2026
2027 static Lisp_Object
2028 wordify (string)
2029      Lisp_Object string;
2030 {
2031   register unsigned char *p, *o;
2032   register int i, i_byte, len, punct_count = 0, word_count = 0;
2033   Lisp_Object val;
2034   int prev_c = 0;
2035   int adjust;
2036
2037   CHECK_STRING (string);
2038   p = SDATA (string);
2039   len = SCHARS (string);
2040
2041   for (i = 0, i_byte = 0; i < len; )
2042     {
2043       int c;
2044
2045       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2046
2047       if (SYNTAX (c) != Sword)
2048         {
2049           punct_count++;
2050           if (i > 0 && SYNTAX (prev_c) == Sword)
2051             word_count++;
2052         }
2053
2054       prev_c = c;
2055     }
2056
2057   if (SYNTAX (prev_c) == Sword)
2058     word_count++;
2059   if (!word_count)
2060     return empty_string;
2061
2062   adjust = - punct_count + 5 * (word_count - 1) + 4;
2063   if (STRING_MULTIBYTE (string))
2064     val = make_uninit_multibyte_string (len + adjust,
2065                                         SBYTES (string)
2066                                         + adjust);
2067   else
2068     val = make_uninit_string (len + adjust);
2069
2070   o = SDATA (val);
2071   *o++ = '\\';
2072   *o++ = 'b';
2073   prev_c = 0;
2074
2075   for (i = 0, i_byte = 0; i < len; )
2076     {
2077       int c;
2078       int i_byte_orig = i_byte;
2079
2080       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2081
2082       if (SYNTAX (c) == Sword)
2083         {
2084           bcopy (SDATA (string) + i_byte_orig, o,
2085                  i_byte - i_byte_orig);
2086           o += i_byte - i_byte_orig;
2087         }
2088       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2089         {
2090           *o++ = '\\';
2091           *o++ = 'W';
2092           *o++ = '\\';
2093           *o++ = 'W';
2094           *o++ = '*';
2095         }
2096
2097       prev_c = c;
2098     }
2099
2100   *o++ = '\\';
2101   *o++ = 'b';
2102
2103   return val;
2104 }
2105 \f
2106 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2107        "MSearch backward: ",
2108        doc: /* Search backward from point for STRING.
2109 Set point to the beginning of the occurrence found, and return point.
2110 An optional second argument bounds the search; it is a buffer position.
2111 The match found must not extend before that position.
2112 Optional third argument, if t, means if fail just return nil (no error).
2113  If not nil and not t, position at limit of search and return nil.
2114 Optional fourth argument is repeat count--search for successive occurrences.
2115
2116 Search case-sensitivity is determined by the value of the variable
2117 `case-fold-search', which see.
2118
2119 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2120      (string, bound, noerror, count)
2121      Lisp_Object string, bound, noerror, count;
2122 {
2123   return search_command (string, bound, noerror, count, -1, 0, 0);
2124 }
2125
2126 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2127        doc: /* Search forward from point for STRING.
2128 Set point to the end of the occurrence found, and return point.
2129 An optional second argument bounds the search; it is a buffer position.
2130 The match found must not extend after that position.  nil is equivalent
2131   to (point-max).
2132 Optional third argument, if t, means if fail just return nil (no error).
2133   If not nil and not t, move to limit of search and return nil.
2134 Optional fourth argument is repeat count--search for successive occurrences.
2135
2136 Search case-sensitivity is determined by the value of the variable
2137 `case-fold-search', which see.
2138
2139 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2140      (string, bound, noerror, count)
2141      Lisp_Object string, bound, noerror, count;
2142 {
2143   return search_command (string, bound, noerror, count, 1, 0, 0);
2144 }
2145
2146 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2147        "sWord search backward: ",
2148        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2149 Set point to the beginning of the occurrence found, and return point.
2150 An optional second argument bounds the search; it is a buffer position.
2151 The match found must not extend before that position.
2152 Optional third argument, if t, means if fail just return nil (no error).
2153   If not nil and not t, move to limit of search and return nil.
2154 Optional fourth argument is repeat count--search for successive occurrences.  */)
2155      (string, bound, noerror, count)
2156      Lisp_Object string, bound, noerror, count;
2157 {
2158   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2159 }
2160
2161 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2162        "sWord search: ",
2163        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2164 Set point to the end of the occurrence found, and return point.
2165 An optional second argument bounds the search; it is a buffer position.
2166 The match found must not extend after that position.
2167 Optional third argument, if t, means if fail just return nil (no error).
2168   If not nil and not t, move to limit of search and return nil.
2169 Optional fourth argument is repeat count--search for successive occurrences.  */)
2170      (string, bound, noerror, count)
2171      Lisp_Object string, bound, noerror, count;
2172 {
2173   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2174 }
2175
2176 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2177        "sRE search backward: ",
2178        doc: /* Search backward from point for match for regular expression REGEXP.
2179 Set point to the beginning of the match, and return point.
2180 The match found is the one starting last in the buffer
2181 and yet ending before the origin of the search.
2182 An optional second argument bounds the search; it is a buffer position.
2183 The match found must start at or after that position.
2184 Optional third argument, if t, means if fail just return nil (no error).
2185   If not nil and not t, move to limit of search and return nil.
2186 Optional fourth argument is repeat count--search for successive occurrences.
2187 See also the functions `match-beginning', `match-end', `match-string',
2188 and `replace-match'.  */)
2189      (regexp, bound, noerror, count)
2190      Lisp_Object regexp, bound, noerror, count;
2191 {
2192   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2193 }
2194
2195 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2196        "sRE search: ",
2197        doc: /* Search forward from point for regular expression REGEXP.
2198 Set point to the end of the occurrence found, and return point.
2199 An optional second argument bounds the search; it is a buffer position.
2200 The match found must not extend after that position.
2201 Optional third argument, if t, means if fail just return nil (no error).
2202   If not nil and not t, move to limit of search and return nil.
2203 Optional fourth argument is repeat count--search for successive occurrences.
2204 See also the functions `match-beginning', `match-end', `match-string',
2205 and `replace-match'.  */)
2206      (regexp, bound, noerror, count)
2207      Lisp_Object regexp, bound, noerror, count;
2208 {
2209   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2210 }
2211
2212 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2213        "sPosix search backward: ",
2214        doc: /* Search backward from point for match for regular expression REGEXP.
2215 Find the longest match in accord with Posix regular expression rules.
2216 Set point to the beginning of the match, and return point.
2217 The match found is the one starting last in the buffer
2218 and yet ending before the origin of the search.
2219 An optional second argument bounds the search; it is a buffer position.
2220 The match found must start at or after that position.
2221 Optional third argument, if t, means if fail just return nil (no error).
2222   If not nil and not t, move to limit of search and return nil.
2223 Optional fourth argument is repeat count--search for successive occurrences.
2224 See also the functions `match-beginning', `match-end', `match-string',
2225 and `replace-match'.  */)
2226      (regexp, bound, noerror, count)
2227      Lisp_Object regexp, bound, noerror, count;
2228 {
2229   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2230 }
2231
2232 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2233        "sPosix search: ",
2234        doc: /* Search forward from point for regular expression REGEXP.
2235 Find the longest match in accord with Posix regular expression rules.
2236 Set point to the end of the occurrence found, and return point.
2237 An optional second argument bounds the search; it is a buffer position.
2238 The match found must not extend after that position.
2239 Optional third argument, if t, means if fail just return nil (no error).
2240   If not nil and not t, move to limit of search and return nil.
2241 Optional fourth argument is repeat count--search for successive occurrences.
2242 See also the functions `match-beginning', `match-end', `match-string',
2243 and `replace-match'.  */)
2244      (regexp, bound, noerror, count)
2245      Lisp_Object regexp, bound, noerror, count;
2246 {
2247   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2248 }
2249 \f
2250 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2251        doc: /* Replace text matched by last search with NEWTEXT.
2252 Leave point at the end of the replacement text.
2253
2254 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2255 Otherwise maybe capitalize the whole text, or maybe just word initials,
2256 based on the replaced text.
2257 If the replaced text has only capital letters
2258 and has at least one multiletter word, convert NEWTEXT to all caps.
2259 Otherwise if all words are capitalized in the replaced text,
2260 capitalize each word in NEWTEXT.
2261
2262 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2263 Otherwise treat `\\' as special:
2264   `\\&' in NEWTEXT means substitute original matched text.
2265   `\\N' means substitute what matched the Nth `\\(...\\)'.
2266        If Nth parens didn't match, substitute nothing.
2267   `\\\\' means insert one `\\'.
2268 Case conversion does not apply to these substitutions.
2269
2270 FIXEDCASE and LITERAL are optional arguments.
2271
2272 The optional fourth argument STRING can be a string to modify.
2273 This is meaningful when the previous match was done against STRING,
2274 using `string-match'.  When used this way, `replace-match'
2275 creates and returns a new string made by copying STRING and replacing
2276 the part of STRING that was matched.
2277
2278 The optional fifth argument SUBEXP specifies a subexpression;
2279 it says to replace just that subexpression with NEWTEXT,
2280 rather than replacing the entire matched text.
2281 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2282 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2283 NEWTEXT in place of subexp N.
2284 This is useful only after a regular expression search or match,
2285 since only regular expressions have distinguished subexpressions.  */)
2286      (newtext, fixedcase, literal, string, subexp)
2287      Lisp_Object newtext, fixedcase, literal, string, subexp;
2288 {
2289   enum { nochange, all_caps, cap_initial } case_action;
2290   register int pos, pos_byte;
2291   int some_multiletter_word;
2292   int some_lowercase;
2293   int some_uppercase;
2294   int some_nonuppercase_initial;
2295   register int c, prevc;
2296   int sub;
2297   int opoint, newpoint;
2298
2299   CHECK_STRING (newtext);
2300
2301   if (! NILP (string))
2302     CHECK_STRING (string);
2303
2304   case_action = nochange;       /* We tried an initialization */
2305                                 /* but some C compilers blew it */
2306
2307   if (search_regs.num_regs <= 0)
2308     error ("`replace-match' called before any match found");
2309
2310   if (NILP (subexp))
2311     sub = 0;
2312   else
2313     {
2314       CHECK_NUMBER (subexp);
2315       sub = XINT (subexp);
2316       if (sub < 0 || sub >= search_regs.num_regs)
2317         args_out_of_range (subexp, make_number (search_regs.num_regs));
2318     }
2319
2320   if (NILP (string))
2321     {
2322       if (search_regs.start[sub] < BEGV
2323           || search_regs.start[sub] > search_regs.end[sub]
2324           || search_regs.end[sub] > ZV)
2325         args_out_of_range (make_number (search_regs.start[sub]),
2326                            make_number (search_regs.end[sub]));
2327     }
2328   else
2329     {
2330       if (search_regs.start[sub] < 0
2331           || search_regs.start[sub] > search_regs.end[sub]
2332           || search_regs.end[sub] > SCHARS (string))
2333         args_out_of_range (make_number (search_regs.start[sub]),
2334                            make_number (search_regs.end[sub]));
2335     }
2336
2337   if (NILP (fixedcase))
2338     {
2339       /* Decide how to casify by examining the matched text. */
2340       int last;
2341
2342       pos = search_regs.start[sub];
2343       last = search_regs.end[sub];
2344
2345       if (NILP (string))
2346         pos_byte = CHAR_TO_BYTE (pos);
2347       else
2348         pos_byte = string_char_to_byte (string, pos);
2349
2350       prevc = '\n';
2351       case_action = all_caps;
2352
2353       /* some_multiletter_word is set nonzero if any original word
2354          is more than one letter long. */
2355       some_multiletter_word = 0;
2356       some_lowercase = 0;
2357       some_nonuppercase_initial = 0;
2358       some_uppercase = 0;
2359
2360       while (pos < last)
2361         {
2362           if (NILP (string))
2363             {
2364               c = FETCH_CHAR (pos_byte);
2365               INC_BOTH (pos, pos_byte);
2366             }
2367           else
2368             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2369
2370           if (LOWERCASEP (c))
2371             {
2372               /* Cannot be all caps if any original char is lower case */
2373
2374               some_lowercase = 1;
2375               if (SYNTAX (prevc) != Sword)
2376                 some_nonuppercase_initial = 1;
2377               else
2378                 some_multiletter_word = 1;
2379             }
2380           else if (UPPERCASEP (c))
2381             {
2382               some_uppercase = 1;
2383               if (SYNTAX (prevc) != Sword)
2384                 ;
2385               else
2386                 some_multiletter_word = 1;
2387             }
2388           else
2389             {
2390               /* If the initial is a caseless word constituent,
2391                  treat that like a lowercase initial.  */
2392               if (SYNTAX (prevc) != Sword)
2393                 some_nonuppercase_initial = 1;
2394             }
2395
2396           prevc = c;
2397         }
2398
2399       /* Convert to all caps if the old text is all caps
2400          and has at least one multiletter word.  */
2401       if (! some_lowercase && some_multiletter_word)
2402         case_action = all_caps;
2403       /* Capitalize each word, if the old text has all capitalized words.  */
2404       else if (!some_nonuppercase_initial && some_multiletter_word)
2405         case_action = cap_initial;
2406       else if (!some_nonuppercase_initial && some_uppercase)
2407         /* Should x -> yz, operating on X, give Yz or YZ?
2408            We'll assume the latter.  */
2409         case_action = all_caps;
2410       else
2411         case_action = nochange;
2412     }
2413
2414   /* Do replacement in a string.  */
2415   if (!NILP (string))
2416     {
2417       Lisp_Object before, after;
2418
2419       before = Fsubstring (string, make_number (0),
2420                            make_number (search_regs.start[sub]));
2421       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2422
2423       /* Substitute parts of the match into NEWTEXT
2424          if desired.  */
2425       if (NILP (literal))
2426         {
2427           int lastpos = 0;
2428           int lastpos_byte = 0;
2429           /* We build up the substituted string in ACCUM.  */
2430           Lisp_Object accum;
2431           Lisp_Object middle;
2432           int length = SBYTES (newtext);
2433
2434           accum = Qnil;
2435
2436           for (pos_byte = 0, pos = 0; pos_byte < length;)
2437             {
2438               int substart = -1;
2439               int subend = 0;
2440               int delbackslash = 0;
2441
2442               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2443
2444               if (c == '\\')
2445                 {
2446                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2447
2448                   if (c == '&')
2449                     {
2450                       substart = search_regs.start[sub];
2451                       subend = search_regs.end[sub];
2452                     }
2453                   else if (c >= '1' && c <= '9')
2454                     {
2455                       if (search_regs.start[c - '0'] >= 0
2456                           && c <= search_regs.num_regs + '0')
2457                         {
2458                           substart = search_regs.start[c - '0'];
2459                           subend = search_regs.end[c - '0'];
2460                         }
2461                       else
2462                         {
2463                           /* If that subexp did not match,
2464                              replace \\N with nothing.  */
2465                           substart = 0;
2466                           subend = 0;
2467                         }
2468                     }
2469                   else if (c == '\\')
2470                     delbackslash = 1;
2471                   else
2472                     error ("Invalid use of `\\' in replacement text");
2473                 }
2474               if (substart >= 0)
2475                 {
2476                   if (pos - 2 != lastpos)
2477                     middle = substring_both (newtext, lastpos,
2478                                              lastpos_byte,
2479                                              pos - 2, pos_byte - 2);
2480                   else
2481                     middle = Qnil;
2482                   accum = concat3 (accum, middle,
2483                                    Fsubstring (string,
2484                                                make_number (substart),
2485                                                make_number (subend)));
2486                   lastpos = pos;
2487                   lastpos_byte = pos_byte;
2488                 }
2489               else if (delbackslash)
2490                 {
2491                   middle = substring_both (newtext, lastpos,
2492                                            lastpos_byte,
2493                                            pos - 1, pos_byte - 1);
2494
2495                   accum = concat2 (accum, middle);
2496                   lastpos = pos;
2497                   lastpos_byte = pos_byte;
2498                 }
2499             }
2500
2501           if (pos != lastpos)
2502             middle = substring_both (newtext, lastpos,
2503                                      lastpos_byte,
2504                                      pos, pos_byte);
2505           else
2506             middle = Qnil;
2507
2508           newtext = concat2 (accum, middle);
2509         }
2510
2511       /* Do case substitution in NEWTEXT if desired.  */
2512       if (case_action == all_caps)
2513         newtext = Fupcase (newtext);
2514       else if (case_action == cap_initial)
2515         newtext = Fupcase_initials (newtext);
2516
2517       return concat3 (before, newtext, after);
2518     }
2519
2520   /* Record point, then move (quietly) to the start of the match.  */
2521   if (PT >= search_regs.end[sub])
2522     opoint = PT - ZV;
2523   else if (PT > search_regs.start[sub])
2524     opoint = search_regs.end[sub] - ZV;
2525   else
2526     opoint = PT;
2527
2528   /* If we want non-literal replacement,
2529      perform substitution on the replacement string.  */
2530   if (NILP (literal))
2531     {
2532       int length = SBYTES (newtext);
2533       unsigned char *substed;
2534       int substed_alloc_size, substed_len;
2535       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2536       int str_multibyte = STRING_MULTIBYTE (newtext);
2537       Lisp_Object rev_tbl;
2538       int really_changed = 0;
2539
2540       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2541                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2542                                           make_number (0))
2543                 : Qnil);
2544
2545       substed_alloc_size = length * 2 + 100;
2546       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2547       substed_len = 0;
2548
2549       /* Go thru NEWTEXT, producing the actual text to insert in
2550          SUBSTED while adjusting multibyteness to that of the current
2551          buffer.  */
2552
2553       for (pos_byte = 0, pos = 0; pos_byte < length;)
2554         {
2555           unsigned char str[MAX_MULTIBYTE_LENGTH];
2556           unsigned char *add_stuff = NULL;
2557           int add_len = 0;
2558           int idx = -1;
2559
2560           if (str_multibyte)
2561             {
2562               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2563               if (!buf_multibyte)
2564                 c = multibyte_char_to_unibyte (c, rev_tbl);
2565             }
2566           else
2567             {
2568               /* Note that we don't have to increment POS.  */
2569               c = SREF (newtext, pos_byte++);
2570               if (buf_multibyte)
2571                 c = unibyte_char_to_multibyte (c);
2572             }
2573
2574           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2575              or set IDX to a match index, which means put that part
2576              of the buffer text into SUBSTED.  */
2577
2578           if (c == '\\')
2579             {
2580               really_changed = 1;
2581
2582               if (str_multibyte)
2583                 {
2584                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2585                                                       pos, pos_byte);
2586                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2587                     c = multibyte_char_to_unibyte (c, rev_tbl);
2588                 }
2589               else
2590                 {
2591                   c = SREF (newtext, pos_byte++);
2592                   if (buf_multibyte)
2593                     c = unibyte_char_to_multibyte (c);
2594                 }
2595
2596               if (c == '&')
2597                 idx = sub;
2598               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2599                 {
2600                   if (search_regs.start[c - '0'] >= 1)
2601                     idx = c - '0';
2602                 }
2603               else if (c == '\\')
2604                 add_len = 1, add_stuff = "\\";
2605               else
2606                 {
2607                   xfree (substed);
2608                   error ("Invalid use of `\\' in replacement text");
2609                 }
2610             }
2611           else
2612             {
2613               add_len = CHAR_STRING (c, str);
2614               add_stuff = str;
2615             }
2616
2617           /* If we want to copy part of a previous match,
2618              set up ADD_STUFF and ADD_LEN to point to it.  */
2619           if (idx >= 0)
2620             {
2621               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2622               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2623               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2624                 move_gap (search_regs.start[idx]);
2625               add_stuff = BYTE_POS_ADDR (begbyte);
2626             }
2627
2628           /* Now the stuff we want to add to SUBSTED
2629              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2630
2631           /* Make sure SUBSTED is big enough.  */
2632           if (substed_len + add_len >= substed_alloc_size)
2633             {
2634               substed_alloc_size = substed_len + add_len + 500;
2635               substed = (unsigned char *) xrealloc (substed,
2636                                                     substed_alloc_size + 1);
2637             }
2638
2639           /* Now add to the end of SUBSTED.  */
2640           if (add_stuff)
2641             {
2642               bcopy (add_stuff, substed + substed_len, add_len);
2643               substed_len += add_len;
2644             }
2645         }
2646
2647       if (really_changed)
2648         {
2649           if (buf_multibyte)
2650             {
2651               int nchars = multibyte_chars_in_text (substed, substed_len);
2652
2653               newtext = make_multibyte_string (substed, nchars, substed_len);
2654             }
2655           else
2656             newtext = make_unibyte_string (substed, substed_len);
2657         }
2658       xfree (substed);
2659     }
2660
2661   /* Replace the old text with the new in the cleanest possible way.  */
2662   replace_range (search_regs.start[sub], search_regs.end[sub],
2663                  newtext, 1, 0, 1);
2664   newpoint = search_regs.start[sub] + SCHARS (newtext);
2665
2666   if (case_action == all_caps)
2667     Fupcase_region (make_number (search_regs.start[sub]),
2668                     make_number (newpoint));
2669   else if (case_action == cap_initial)
2670     Fupcase_initials_region (make_number (search_regs.start[sub]),
2671                              make_number (newpoint));
2672
2673   /* Adjust search data for this change.  */
2674   {
2675     int oldend = search_regs.end[sub];
2676     int oldstart = search_regs.start[sub];
2677     int change = newpoint - search_regs.end[sub];
2678     int i;
2679
2680     for (i = 0; i < search_regs.num_regs; i++)
2681       {
2682         if (search_regs.start[i] >= oldend)
2683           search_regs.start[i] += change;
2684         else if (search_regs.start[i] > oldstart)
2685           search_regs.start[i] = oldstart;
2686         if (search_regs.end[i] >= oldend)
2687           search_regs.end[i] += change;
2688         else if (search_regs.end[i] > oldstart)
2689           search_regs.end[i] = oldstart;
2690       }
2691   }
2692
2693   /* Put point back where it was in the text.  */
2694   if (opoint <= 0)
2695     TEMP_SET_PT (opoint + ZV);
2696   else
2697     TEMP_SET_PT (opoint);
2698
2699   /* Now move point "officially" to the start of the inserted replacement.  */
2700   move_if_not_intangible (newpoint);
2701
2702   return Qnil;
2703 }
2704 \f
2705 static Lisp_Object
2706 match_limit (num, beginningp)
2707      Lisp_Object num;
2708      int beginningp;
2709 {
2710   register int n;
2711
2712   CHECK_NUMBER (num);
2713   n = XINT (num);
2714   if (n < 0)
2715     args_out_of_range (num, make_number (0));
2716   if (search_regs.num_regs <= 0)
2717     error ("No match data, because no search succeeded");
2718   if (n >= search_regs.num_regs
2719       || search_regs.start[n] < 0)
2720     return Qnil;
2721   return (make_number ((beginningp) ? search_regs.start[n]
2722                                     : search_regs.end[n]));
2723 }
2724
2725 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2726        doc: /* Return position of start of text matched by last search.
2727 SUBEXP, a number, specifies which parenthesized expression in the last
2728   regexp.
2729 Value is nil if SUBEXPth pair didn't match, or there were less than
2730   SUBEXP pairs.
2731 Zero means the entire text matched by the whole regexp or whole string.  */)
2732      (subexp)
2733      Lisp_Object subexp;
2734 {
2735   return match_limit (subexp, 1);
2736 }
2737
2738 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2739        doc: /* Return position of end of text matched by last search.
2740 SUBEXP, a number, specifies which parenthesized expression in the last
2741   regexp.
2742 Value is nil if SUBEXPth pair didn't match, or there were less than
2743   SUBEXP pairs.
2744 Zero means the entire text matched by the whole regexp or whole string.  */)
2745      (subexp)
2746      Lisp_Object subexp;
2747 {
2748   return match_limit (subexp, 0);
2749 }
2750
2751 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2752        doc: /* Return a list containing all info on what the last search matched.
2753 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2754 All the elements are markers or nil (nil if the Nth pair didn't match)
2755 if the last match was on a buffer; integers or nil if a string was matched.
2756 Use `store-match-data' to reinstate the data in this list.
2757
2758 If INTEGERS (the optional first argument) is non-nil, always use
2759 integers \(rather than markers) to represent buffer positions.  In
2760 this case, and if the last match was in a buffer, the buffer will get
2761 stored as one additional element at the end of the list.
2762
2763 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2764 enough to hold all the values, and if INTEGERS is non-nil, no consing
2765 is done.
2766
2767 If optional third arg RESEAT is non-nil, any previous markers on the
2768 REUSE list will be modified to point to nowhere.
2769
2770 Return value is undefined if the last search failed.  */)
2771   (integers, reuse, reseat)
2772      Lisp_Object integers, reuse, reseat;
2773 {
2774   Lisp_Object tail, prev;
2775   Lisp_Object *data;
2776   int i, len;
2777
2778   if (!NILP (reseat))
2779     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2780       if (MARKERP (XCAR (tail)))
2781         {
2782           unchain_marker (XMARKER (XCAR (tail)));
2783           XSETCAR (tail, Qnil);
2784         }
2785
2786   if (NILP (last_thing_searched))
2787     return Qnil;
2788
2789   prev = Qnil;
2790
2791   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2792                                  * sizeof (Lisp_Object));
2793
2794   len = 0;
2795   for (i = 0; i < search_regs.num_regs; i++)
2796     {
2797       int start = search_regs.start[i];
2798       if (start >= 0)
2799         {
2800           if (EQ (last_thing_searched, Qt)
2801               || ! NILP (integers))
2802             {
2803               XSETFASTINT (data[2 * i], start);
2804               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2805             }
2806           else if (BUFFERP (last_thing_searched))
2807             {
2808               data[2 * i] = Fmake_marker ();
2809               Fset_marker (data[2 * i],
2810                            make_number (start),
2811                            last_thing_searched);
2812               data[2 * i + 1] = Fmake_marker ();
2813               Fset_marker (data[2 * i + 1],
2814                            make_number (search_regs.end[i]),
2815                            last_thing_searched);
2816             }
2817           else
2818             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2819             abort ();
2820
2821           len = 2 * i + 2;
2822         }
2823       else
2824         data[2 * i] = data[2 * i + 1] = Qnil;
2825     }
2826
2827   if (BUFFERP (last_thing_searched) && !NILP (integers))
2828     {
2829       data[len] = last_thing_searched;
2830       len++;
2831     }
2832
2833   /* If REUSE is not usable, cons up the values and return them.  */
2834   if (! CONSP (reuse))
2835     return Flist (len, data);
2836
2837   /* If REUSE is a list, store as many value elements as will fit
2838      into the elements of REUSE.  */
2839   for (i = 0, tail = reuse; CONSP (tail);
2840        i++, tail = XCDR (tail))
2841     {
2842       if (i < len)
2843         XSETCAR (tail, data[i]);
2844       else
2845         XSETCAR (tail, Qnil);
2846       prev = tail;
2847     }
2848
2849   /* If we couldn't fit all value elements into REUSE,
2850      cons up the rest of them and add them to the end of REUSE.  */
2851   if (i < len)
2852     XSETCDR (prev, Flist (len - i, data + i));
2853
2854   return reuse;
2855 }
2856
2857 /* Internal usage only:
2858    If RESEAT is `evaporate', put the markers back on the free list
2859    immediately.  No other references to the markers must exist in this case,
2860    so it is used only internally on the unwind stack and save-match-data from
2861    Lisp.  */
2862
2863 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2864        doc: /* Set internal data on last search match from elements of LIST.
2865 LIST should have been created by calling `match-data' previously.
2866
2867 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2868     (list, reseat)
2869      register Lisp_Object list, reseat;
2870 {
2871   register int i;
2872   register Lisp_Object marker;
2873
2874   if (running_asynch_code)
2875     save_search_regs ();
2876
2877   if (!CONSP (list) && !NILP (list))
2878     list = wrong_type_argument (Qconsp, list);
2879
2880   /* Unless we find a marker with a buffer or an explicit buffer
2881      in LIST, assume that this match data came from a string.  */
2882   last_thing_searched = Qt;
2883
2884   /* Allocate registers if they don't already exist.  */
2885   {
2886     int length = XFASTINT (Flength (list)) / 2;
2887
2888     if (length > search_regs.num_regs)
2889       {
2890         if (search_regs.num_regs == 0)
2891           {
2892             search_regs.start
2893               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2894             search_regs.end
2895               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2896           }
2897         else
2898           {
2899             search_regs.start
2900               = (regoff_t *) xrealloc (search_regs.start,
2901                                        length * sizeof (regoff_t));
2902             search_regs.end
2903               = (regoff_t *) xrealloc (search_regs.end,
2904                                        length * sizeof (regoff_t));
2905           }
2906
2907         for (i = search_regs.num_regs; i < length; i++)
2908           search_regs.start[i] = -1;
2909
2910         search_regs.num_regs = length;
2911       }
2912
2913     for (i = 0; CONSP (list); i++)
2914       {
2915         marker = XCAR (list);
2916         if (BUFFERP (marker))
2917           {
2918             last_thing_searched = marker;
2919             break;
2920           }
2921         if (i >= length)
2922           break;
2923         if (NILP (marker))
2924           {
2925             search_regs.start[i] = -1;
2926             list = XCDR (list);
2927           }
2928         else
2929           {
2930             int from;
2931             Lisp_Object m;
2932
2933             m = marker;
2934             if (MARKERP (marker))
2935               {
2936                 if (XMARKER (marker)->buffer == 0)
2937                   XSETFASTINT (marker, 0);
2938                 else
2939                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2940               }
2941
2942             CHECK_NUMBER_COERCE_MARKER (marker);
2943             from = XINT (marker);
2944
2945             if (!NILP (reseat) && MARKERP (m))
2946               {
2947                 if (EQ (reseat, Qevaporate))
2948                   free_marker (m);
2949                 else
2950                   unchain_marker (XMARKER (m));
2951                 XSETCAR (list, Qnil);
2952               }
2953
2954             if ((list = XCDR (list), !CONSP (list)))
2955               break;
2956
2957             m = marker = XCAR (list);
2958
2959             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2960               XSETFASTINT (marker, 0);
2961
2962             CHECK_NUMBER_COERCE_MARKER (marker);
2963             search_regs.start[i] = from;
2964             search_regs.end[i] = XINT (marker);
2965
2966             if (!NILP (reseat) && MARKERP (m))
2967               {
2968                 if (EQ (reseat, Qevaporate))
2969                   free_marker (m);
2970                 else
2971                   unchain_marker (XMARKER (m));
2972                 XSETCAR (list, Qnil);
2973               }
2974           }
2975         list = XCDR (list);
2976       }
2977
2978     for (; i < search_regs.num_regs; i++)
2979       search_regs.start[i] = -1;
2980   }
2981
2982   return Qnil;
2983 }
2984
2985 /* If non-zero the match data have been saved in saved_search_regs
2986    during the execution of a sentinel or filter. */
2987 static int search_regs_saved;
2988 static struct re_registers saved_search_regs;
2989 static Lisp_Object saved_last_thing_searched;
2990
2991 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2992    if asynchronous code (filter or sentinel) is running. */
2993 static void
2994 save_search_regs ()
2995 {
2996   if (!search_regs_saved)
2997     {
2998       saved_search_regs.num_regs = search_regs.num_regs;
2999       saved_search_regs.start = search_regs.start;
3000       saved_search_regs.end = search_regs.end;
3001       saved_last_thing_searched = last_thing_searched;
3002       last_thing_searched = Qnil;
3003       search_regs.num_regs = 0;
3004       search_regs.start = 0;
3005       search_regs.end = 0;
3006
3007       search_regs_saved = 1;
3008     }
3009 }
3010
3011 /* Called upon exit from filters and sentinels. */
3012 void
3013 restore_search_regs ()
3014 {
3015   if (search_regs_saved)
3016     {
3017       if (search_regs.num_regs > 0)
3018         {
3019           xfree (search_regs.start);
3020           xfree (search_regs.end);
3021         }
3022       search_regs.num_regs = saved_search_regs.num_regs;
3023       search_regs.start = saved_search_regs.start;
3024       search_regs.end = saved_search_regs.end;
3025       last_thing_searched = saved_last_thing_searched;
3026       saved_last_thing_searched = Qnil;
3027       search_regs_saved = 0;
3028     }
3029 }
3030
3031 static Lisp_Object
3032 unwind_set_match_data (list)
3033      Lisp_Object list;
3034 {
3035   /* It is safe to free (evaporate) the markers immediately.  */
3036   return Fset_match_data (list, Qevaporate);
3037 }
3038
3039 /* Called to unwind protect the match data.  */
3040 void
3041 record_unwind_save_match_data ()
3042 {
3043   record_unwind_protect (unwind_set_match_data,
3044                          Fmatch_data (Qnil, Qnil, Qnil));
3045 }
3046
3047 /* Quote a string to inactivate reg-expr chars */
3048
3049 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3050        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3051      (string)
3052      Lisp_Object string;
3053 {
3054   register unsigned char *in, *out, *end;
3055   register unsigned char *temp;
3056   int backslashes_added = 0;
3057
3058   CHECK_STRING (string);
3059
3060   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3061
3062   /* Now copy the data into the new string, inserting escapes. */
3063
3064   in = SDATA (string);
3065   end = in + SBYTES (string);
3066   out = temp;
3067
3068   for (; in != end; in++)
3069     {
3070       if (*in == '['
3071           || *in == '*' || *in == '.' || *in == '\\'
3072           || *in == '?' || *in == '+'
3073           || *in == '^' || *in == '$')
3074         *out++ = '\\', backslashes_added++;
3075       *out++ = *in;
3076     }
3077
3078   return make_specified_string (temp,
3079                                 SCHARS (string) + backslashes_added,
3080                                 out - temp,
3081                                 STRING_MULTIBYTE (string));
3082 }
3083 \f
3084 void
3085 syms_of_search ()
3086 {
3087   register int i;
3088
3089   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3090     {
3091       searchbufs[i].buf.allocated = 100;
3092       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3093       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3094       searchbufs[i].regexp = Qnil;
3095       searchbufs[i].whitespace_regexp = Qnil;
3096       staticpro (&searchbufs[i].regexp);
3097       staticpro (&searchbufs[i].whitespace_regexp);
3098       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3099     }
3100   searchbuf_head = &searchbufs[0];
3101
3102   Qsearch_failed = intern ("search-failed");
3103   staticpro (&Qsearch_failed);
3104   Qinvalid_regexp = intern ("invalid-regexp");
3105   staticpro (&Qinvalid_regexp);
3106
3107   Fput (Qsearch_failed, Qerror_conditions,
3108         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3109   Fput (Qsearch_failed, Qerror_message,
3110         build_string ("Search failed"));
3111
3112   Fput (Qinvalid_regexp, Qerror_conditions,
3113         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3114   Fput (Qinvalid_regexp, Qerror_message,
3115         build_string ("Invalid regexp"));
3116
3117   last_thing_searched = Qnil;
3118   staticpro (&last_thing_searched);
3119
3120   saved_last_thing_searched = Qnil;
3121   staticpro (&saved_last_thing_searched);
3122
3123   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3124       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3125 Some commands use this for user-specified regexps.
3126 Spaces that occur inside character classes or repetition operators
3127 or other such regexp constructs are not replaced with this.
3128 A value of nil (which is the normal value) means treat spaces literally.  */);
3129   Vsearch_spaces_regexp = Qnil;
3130
3131   defsubr (&Slooking_at);
3132   defsubr (&Sposix_looking_at);
3133   defsubr (&Sstring_match);
3134   defsubr (&Sposix_string_match);
3135   defsubr (&Ssearch_forward);
3136   defsubr (&Ssearch_backward);
3137   defsubr (&Sword_search_forward);
3138   defsubr (&Sword_search_backward);
3139   defsubr (&Sre_search_forward);
3140   defsubr (&Sre_search_backward);
3141   defsubr (&Sposix_search_forward);
3142   defsubr (&Sposix_search_backward);
3143   defsubr (&Sreplace_match);
3144   defsubr (&Smatch_beginning);
3145   defsubr (&Smatch_end);
3146   defsubr (&Smatch_data);
3147   defsubr (&Sset_match_data);
3148   defsubr (&Sregexp_quote);
3149 }
3150
3151 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3152    (do not change this comment) */