src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87 static int simple_search ();
  88 static int boyer_moore ();
  89 static int search_buffer ();
  90
  91 static void
  92 matcher_overflow ()
  93 {
  94   error ("Stack overflow in regexp matcher");
  95 }
  96
  97 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  98    PATTERN is the pattern to compile.
  99    CP is the place to put the result.
 100    TRANSLATE is a translation table for ignoring case, or nil for none.
 101    REGP is the structure that says where to store the "register"
 102    values that will result from matching this pattern.
 103    If it is 0, we should compile the pattern not to record any
 104    subexpression bounds.
 105    POSIX is nonzero if we want full backtracking (POSIX style)
 106    for this pattern.  0 means backtrack only enough to get a valid match.
 107    MULTIBYTE is nonzero if we want to handle multibyte characters in
 108    PATTERN.  0 means all multibyte characters are recognized just as
 109    sequences of binary data.  */
 110
 111 static void
 112 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 113      struct regexp_cache *cp;
 114      Lisp_Object pattern;
 115      Lisp_Object translate;
 116      struct re_registers *regp;
 117      int posix;
 118      int multibyte;
 119 {
 120   unsigned char *raw_pattern;
 121   int raw_pattern_size;
 122   char *val;
 123   reg_syntax_t old;
 124
 125   /* MULTIBYTE says whether the text to be searched is multibyte.
 126      We must convert PATTERN to match that, or we will not really
 127      find things right.  */
 128
 129   if (multibyte == STRING_MULTIBYTE (pattern))
 130     {
 131       raw_pattern = (unsigned char *) SDATA (pattern);
 132       raw_pattern_size = SBYTES (pattern);
 133     }
 134   else if (multibyte)
 135     {
 136       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 137                                                   SCHARS (pattern));
 138       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 139       copy_text (SDATA (pattern), raw_pattern,
 140                  SCHARS (pattern), 0, 1);
 141     }
 142   else
 143     {
 144       /* Converting multibyte to single-byte.
 145
 146          ??? Perhaps this conversion should be done in a special way
 147          by subtracting nonascii-insert-offset from each non-ASCII char,
 148          so that only the multibyte chars which really correspond to
 149          the chosen single-byte character set can possibly match.  */
 150       raw_pattern_size = SCHARS (pattern);
 151       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 152       copy_text (SDATA (pattern), raw_pattern,
 153                  SBYTES (pattern), 1, 0);
 154     }
 155
 156   cp->regexp = Qnil;
 157   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 158   cp->posix = posix;
 159   cp->buf.multibyte = multibyte;
 160   BLOCK_INPUT;
 161   old = re_set_syntax (RE_SYNTAX_EMACS
 162                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 163   val = (char *) re_compile_pattern ((char *)raw_pattern,
 164                                      raw_pattern_size, &cp->buf);
 165   re_set_syntax (old);
 166   UNBLOCK_INPUT;
 167   if (val)
 168     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 169
 170   cp->regexp = Fcopy_sequence (pattern);
 171 }
 172
 173 /* Shrink each compiled regexp buffer in the cache
 174    to the size actually used right now.
 175    This is called from garbage collection.  */
 176
 177 void
 178 shrink_regexp_cache ()
 179 {
 180   struct regexp_cache *cp;
 181
 182   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 183     {
 184       cp->buf.allocated = cp->buf.used;
 185       cp->buf.buffer
 186         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 187     }
 188 }
 189
 190 /* Compile a regexp if necessary, but first check to see if there's one in
 191    the cache.
 192    PATTERN is the pattern to compile.
 193    TRANSLATE is a translation table for ignoring case, or nil for none.
 194    REGP is the structure that says where to store the "register"
 195    values that will result from matching this pattern.
 196    If it is 0, we should compile the pattern not to record any
 197    subexpression bounds.
 198    POSIX is nonzero if we want full backtracking (POSIX style)
 199    for this pattern.  0 means backtrack only enough to get a valid match.  */
 200
 201 struct re_pattern_buffer *
 202 compile_pattern (pattern, regp, translate, posix, multibyte)
 203      Lisp_Object pattern;
 204      struct re_registers *regp;
 205      Lisp_Object translate;
 206      int posix, multibyte;
 207 {
 208   struct regexp_cache *cp, **cpp;
 209
 210   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 211     {
 212       cp = *cpp;
 213       /* Entries are initialized to nil, and may be set to nil by
 214          compile_pattern_1 if the pattern isn't valid.  Don't apply
 215          string accessors in those cases.  However, compile_pattern_1
 216          is only applied to the cache entry we pick here to reuse.  So
 217          nil should never appear before a non-nil entry.  */
 218       if (NILP (cp->regexp))
 219         goto compile_it;
 220       if (SCHARS (cp->regexp) == SCHARS (pattern)
 221           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 222           && !NILP (Fstring_equal (cp->regexp, pattern))
 223           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 224           && cp->posix == posix
 225           && cp->buf.multibyte == multibyte)
 226         break;
 227
 228       /* If we're at the end of the cache, compile into the nil cell
 229          we found, or the last (least recently used) cell with a
 230          string value.  */
 231       if (cp->next == 0)
 232         {
 233         compile_it:
 234           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 235           break;
 236         }
 237     }
 238
 239   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 240      either because we found it in the cache or because we just compiled it.
 241      Move it to the front of the queue to mark it as most recently used.  */
 242   *cpp = cp->next;
 243   cp->next = searchbuf_head;
 244   searchbuf_head = cp;
 245
 246   /* Advise the searching functions about the space we have allocated
 247      for register data.  */
 248   if (regp)
 249     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 250
 251   return &cp->buf;
 252 }
 253
 254 /* Error condition used for failing searches */
 255 Lisp_Object Qsearch_failed;
 256
 257 Lisp_Object
 258 signal_failure (arg)
 259      Lisp_Object arg;
 260 {
 261   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 262   return Qnil;
 263 }
 264 \f
 265 static Lisp_Object
 266 looking_at_1 (string, posix)
 267      Lisp_Object string;
 268      int posix;
 269 {
 270   Lisp_Object val;
 271   unsigned char *p1, *p2;
 272   int s1, s2;
 273   register int i;
 274   struct re_pattern_buffer *bufp;
 275
 276   if (running_asynch_code)
 277     save_search_regs ();
 278
 279   CHECK_STRING (string);
 280   bufp = compile_pattern (string, &search_regs,
 281                           (!NILP (current_buffer->case_fold_search)
 282                            ? DOWNCASE_TABLE : Qnil),
 283                           posix,
 284                           !NILP (current_buffer->enable_multibyte_characters));
 285
 286   immediate_quit = 1;
 287   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 288
 289   /* Get pointers and sizes of the two strings
 290      that make up the visible portion of the buffer. */
 291
 292   p1 = BEGV_ADDR;
 293   s1 = GPT_BYTE - BEGV_BYTE;
 294   p2 = GAP_END_ADDR;
 295   s2 = ZV_BYTE - GPT_BYTE;
 296   if (s1 < 0)
 297     {
 298       p2 = p1;
 299       s2 = ZV_BYTE - BEGV_BYTE;
 300       s1 = 0;
 301     }
 302   if (s2 < 0)
 303     {
 304       s1 = ZV_BYTE - BEGV_BYTE;
 305       s2 = 0;
 306     }
 307
 308   re_match_object = Qnil;
 309
 310   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 311                   PT_BYTE - BEGV_BYTE, &search_regs,
 312                   ZV_BYTE - BEGV_BYTE);
 313   immediate_quit = 0;
 314
 315   if (i == -2)
 316     matcher_overflow ();
 317
 318   val = (0 <= i ? Qt : Qnil);
 319   if (i >= 0)
 320     for (i = 0; i < search_regs.num_regs; i++)
 321       if (search_regs.start[i] >= 0)
 322         {
 323           search_regs.start[i]
 324             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 325           search_regs.end[i]
 326             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 327         }
 328   XSETBUFFER (last_thing_searched, current_buffer);
 329   return val;
 330 }
 331
 332 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 333        doc: /* Return t if text after point matches regular expression REGEXP.
 334 This function modifies the match data that `match-beginning',
 335 `match-end' and `match-data' access; save and restore the match
 336 data if you want to preserve them.  */)
 337      (regexp)
 338      Lisp_Object regexp;
 339 {
 340   return looking_at_1 (regexp, 0);
 341 }
 342
 343 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 344        doc: /* Return t if text after point matches regular expression REGEXP.
 345 Find the longest match, in accord with Posix regular expression rules.
 346 This function modifies the match data that `match-beginning',
 347 `match-end' and `match-data' access; save and restore the match
 348 data if you want to preserve them.  */)
 349      (regexp)
 350      Lisp_Object regexp;
 351 {
 352   return looking_at_1 (regexp, 1);
 353 }
 354 \f
 355 static Lisp_Object
 356 string_match_1 (regexp, string, start, posix)
 357      Lisp_Object regexp, string, start;
 358      int posix;
 359 {
 360   int val;
 361   struct re_pattern_buffer *bufp;
 362   int pos, pos_byte;
 363   int i;
 364
 365   if (running_asynch_code)
 366     save_search_regs ();
 367
 368   CHECK_STRING (regexp);
 369   CHECK_STRING (string);
 370
 371   if (NILP (start))
 372     pos = 0, pos_byte = 0;
 373   else
 374     {
 375       int len = SCHARS (string);
 376
 377       CHECK_NUMBER (start);
 378       pos = XINT (start);
 379       if (pos < 0 && -pos <= len)
 380         pos = len + pos;
 381       else if (0 > pos || pos > len)
 382         args_out_of_range (string, start);
 383       pos_byte = string_char_to_byte (string, pos);
 384     }
 385
 386   bufp = compile_pattern (regexp, &search_regs,
 387                           (!NILP (current_buffer->case_fold_search)
 388                            ? DOWNCASE_TABLE : Qnil),
 389                           posix,
 390                           STRING_MULTIBYTE (string));
 391   immediate_quit = 1;
 392   re_match_object = string;
 393
 394   val = re_search (bufp, (char *) SDATA (string),
 395                    SBYTES (string), pos_byte,
 396                    SBYTES (string) - pos_byte,
 397                    &search_regs);
 398   immediate_quit = 0;
 399   last_thing_searched = Qt;
 400   if (val == -2)
 401     matcher_overflow ();
 402   if (val < 0) return Qnil;
 403
 404   for (i = 0; i < search_regs.num_regs; i++)
 405     if (search_regs.start[i] >= 0)
 406       {
 407         search_regs.start[i]
 408           = string_byte_to_char (string, search_regs.start[i]);
 409         search_regs.end[i]
 410           = string_byte_to_char (string, search_regs.end[i]);
 411       }
 412
 413   return make_number (string_byte_to_char (string, val));
 414 }
 415
 416 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 417        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 418 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 419 If third arg START is non-nil, start search at that index in STRING.
 420 For index of first char beyond the match, do (match-end 0).
 421 `match-end' and `match-beginning' also give indices of substrings
 422 matched by parenthesis constructs in the pattern.
 423
 424 You can use the function `match-string' to extract the substrings
 425 matched by the parenthesis constructions in REGEXP. */)
 426      (regexp, string, start)
 427      Lisp_Object regexp, string, start;
 428 {
 429   return string_match_1 (regexp, string, start, 0);
 430 }
 431
 432 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 433        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 434 Find the longest match, in accord with Posix regular expression rules.
 435 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 436 If third arg START is non-nil, start search at that index in STRING.
 437 For index of first char beyond the match, do (match-end 0).
 438 `match-end' and `match-beginning' also give indices of substrings
 439 matched by parenthesis constructs in the pattern.  */)
 440      (regexp, string, start)
 441      Lisp_Object regexp, string, start;
 442 {
 443   return string_match_1 (regexp, string, start, 1);
 444 }
 445
 446 /* Match REGEXP against STRING, searching all of STRING,
 447    and return the index of the match, or negative on failure.
 448    This does not clobber the match data.  */
 449
 450 int
 451 fast_string_match (regexp, string)
 452      Lisp_Object regexp, string;
 453 {
 454   int val;
 455   struct re_pattern_buffer *bufp;
 456
 457   bufp = compile_pattern (regexp, 0, Qnil,
 458                           0, STRING_MULTIBYTE (string));
 459   immediate_quit = 1;
 460   re_match_object = string;
 461
 462   val = re_search (bufp, (char *) SDATA (string),
 463                    SBYTES (string), 0,
 464                    SBYTES (string), 0);
 465   immediate_quit = 0;
 466   return val;
 467 }
 468
 469 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 470    and return the index of the match, or negative on failure.
 471    This does not clobber the match data.
 472    We assume that STRING contains single-byte characters.  */
 473
 474 extern Lisp_Object Vascii_downcase_table;
 475
 476 int
 477 fast_c_string_match_ignore_case (regexp, string)
 478      Lisp_Object regexp;
 479      const char *string;
 480 {
 481   int val;
 482   struct re_pattern_buffer *bufp;
 483   int len = strlen (string);
 484
 485   regexp = string_make_unibyte (regexp);
 486   re_match_object = Qt;
 487   bufp = compile_pattern (regexp, 0,
 488                           Vascii_downcase_table, 0,
 489                           0);
 490   immediate_quit = 1;
 491   val = re_search (bufp, string, len, 0, len, 0);
 492   immediate_quit = 0;
 493   return val;
 494 }
 495 \f
 496 /* The newline cache: remembering which sections of text have no newlines.  */
 497
 498 /* If the user has requested newline caching, make sure it's on.
 499    Otherwise, make sure it's off.
 500    This is our cheezy way of associating an action with the change of
 501    state of a buffer-local variable.  */
 502 static void
 503 newline_cache_on_off (buf)
 504      struct buffer *buf;
 505 {
 506   if (NILP (buf->cache_long_line_scans))
 507     {
 508       /* It should be off.  */
 509       if (buf->newline_cache)
 510         {
 511           free_region_cache (buf->newline_cache);
 512           buf->newline_cache = 0;
 513         }
 514     }
 515   else
 516     {
 517       /* It should be on.  */
 518       if (buf->newline_cache == 0)
 519         buf->newline_cache = new_region_cache ();
 520     }
 521 }
 522
 523 \f
 524 /* Search for COUNT instances of the character TARGET between START and END.
 525
 526    If COUNT is positive, search forwards; END must be >= START.
 527    If COUNT is negative, search backwards for the -COUNTth instance;
 528       END must be <= START.
 529    If COUNT is zero, do anything you please; run rogue, for all I care.
 530
 531    If END is zero, use BEGV or ZV instead, as appropriate for the
 532    direction indicated by COUNT.
 533
 534    If we find COUNT instances, set *SHORTAGE to zero, and return the
 535    position after the COUNTth match.  Note that for reverse motion
 536    this is not the same as the usual convention for Emacs motion commands.
 537
 538    If we don't find COUNT instances before reaching END, set *SHORTAGE
 539    to the number of TARGETs left unfound, and return END.
 540
 541    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 542    except when inside redisplay.  */
 543
 544 int
 545 scan_buffer (target, start, end, count, shortage, allow_quit)
 546      register int target;
 547      int start, end;
 548      int count;
 549      int *shortage;
 550      int allow_quit;
 551 {
 552   struct region_cache *newline_cache;
 553   int direction;
 554
 555   if (count > 0)
 556     {
 557       direction = 1;
 558       if (! end) end = ZV;
 559     }
 560   else
 561     {
 562       direction = -1;
 563       if (! end) end = BEGV;
 564     }
 565
 566   newline_cache_on_off (current_buffer);
 567   newline_cache = current_buffer->newline_cache;
 568
 569   if (shortage != 0)
 570     *shortage = 0;
 571
 572   immediate_quit = allow_quit;
 573
 574   if (count > 0)
 575     while (start != end)
 576       {
 577         /* Our innermost scanning loop is very simple; it doesn't know
 578            about gaps, buffer ends, or the newline cache.  ceiling is
 579            the position of the last character before the next such
 580            obstacle --- the last character the dumb search loop should
 581            examine.  */
 582         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 583         int start_byte = CHAR_TO_BYTE (start);
 584         int tem;
 585
 586         /* If we're looking for a newline, consult the newline cache
 587            to see where we can avoid some scanning.  */
 588         if (target == '\n' && newline_cache)
 589           {
 590             int next_change;
 591             immediate_quit = 0;
 592             while (region_cache_forward
 593                    (current_buffer, newline_cache, start_byte, &next_change))
 594               start_byte = next_change;
 595             immediate_quit = allow_quit;
 596
 597             /* START should never be after END.  */
 598             if (start_byte > ceiling_byte)
 599               start_byte = ceiling_byte;
 600
 601             /* Now the text after start is an unknown region, and
 602                next_change is the position of the next known region. */
 603             ceiling_byte = min (next_change - 1, ceiling_byte);
 604           }
 605
 606         /* The dumb loop can only scan text stored in contiguous
 607            bytes. BUFFER_CEILING_OF returns the last character
 608            position that is contiguous, so the ceiling is the
 609            position after that.  */
 610         tem = BUFFER_CEILING_OF (start_byte);
 611         ceiling_byte = min (tem, ceiling_byte);
 612
 613         {
 614           /* The termination address of the dumb loop.  */
 615           register unsigned char *ceiling_addr
 616             = BYTE_POS_ADDR (ceiling_byte) + 1;
 617           register unsigned char *cursor
 618             = BYTE_POS_ADDR (start_byte);
 619           unsigned char *base = cursor;
 620
 621           while (cursor < ceiling_addr)
 622             {
 623               unsigned char *scan_start = cursor;
 624
 625               /* The dumb loop.  */
 626               while (*cursor != target && ++cursor < ceiling_addr)
 627                 ;
 628
 629               /* If we're looking for newlines, cache the fact that
 630                  the region from start to cursor is free of them. */
 631               if (target == '\n' && newline_cache)
 632                 know_region_cache (current_buffer, newline_cache,
 633                                    start_byte + scan_start - base,
 634                                    start_byte + cursor - base);
 635
 636               /* Did we find the target character?  */
 637               if (cursor < ceiling_addr)
 638                 {
 639                   if (--count == 0)
 640                     {
 641                       immediate_quit = 0;
 642                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 643                     }
 644                   cursor++;
 645                 }
 646             }
 647
 648           start = BYTE_TO_CHAR (start_byte + cursor - base);
 649         }
 650       }
 651   else
 652     while (start > end)
 653       {
 654         /* The last character to check before the next obstacle.  */
 655         int ceiling_byte = CHAR_TO_BYTE (end);
 656         int start_byte = CHAR_TO_BYTE (start);
 657         int tem;
 658
 659         /* Consult the newline cache, if appropriate.  */
 660         if (target == '\n' && newline_cache)
 661           {
 662             int next_change;
 663             immediate_quit = 0;
 664             while (region_cache_backward
 665                    (current_buffer, newline_cache, start_byte, &next_change))
 666               start_byte = next_change;
 667             immediate_quit = allow_quit;
 668
 669             /* Start should never be at or before end.  */
 670             if (start_byte <= ceiling_byte)
 671               start_byte = ceiling_byte + 1;
 672
 673             /* Now the text before start is an unknown region, and
 674                next_change is the position of the next known region. */
 675             ceiling_byte = max (next_change, ceiling_byte);
 676           }
 677
 678         /* Stop scanning before the gap.  */
 679         tem = BUFFER_FLOOR_OF (start_byte - 1);
 680         ceiling_byte = max (tem, ceiling_byte);
 681
 682         {
 683           /* The termination address of the dumb loop.  */
 684           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 685           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 686           unsigned char *base = cursor;
 687
 688           while (cursor >= ceiling_addr)
 689             {
 690               unsigned char *scan_start = cursor;
 691
 692               while (*cursor != target && --cursor >= ceiling_addr)
 693                 ;
 694
 695               /* If we're looking for newlines, cache the fact that
 696                  the region from after the cursor to start is free of them.  */
 697               if (target == '\n' && newline_cache)
 698                 know_region_cache (current_buffer, newline_cache,
 699                                    start_byte + cursor - base,
 700                                    start_byte + scan_start - base);
 701
 702               /* Did we find the target character?  */
 703               if (cursor >= ceiling_addr)
 704                 {
 705                   if (++count >= 0)
 706                     {
 707                       immediate_quit = 0;
 708                       return BYTE_TO_CHAR (start_byte + cursor - base);
 709                     }
 710                   cursor--;
 711                 }
 712             }
 713
 714           start = BYTE_TO_CHAR (start_byte + cursor - base);
 715         }
 716       }
 717
 718   immediate_quit = 0;
 719   if (shortage != 0)
 720     *shortage = count * direction;
 721   return start;
 722 }
 723 \f
 724 /* Search for COUNT instances of a line boundary, which means either a
 725    newline or (if selective display enabled) a carriage return.
 726    Start at START.  If COUNT is negative, search backwards.
 727
 728    We report the resulting position by calling TEMP_SET_PT_BOTH.
 729
 730    If we find COUNT instances. we position after (always after,
 731    even if scanning backwards) the COUNTth match, and return 0.
 732
 733    If we don't find COUNT instances before reaching the end of the
 734    buffer (or the beginning, if scanning backwards), we return
 735    the number of line boundaries left unfound, and position at
 736    the limit we bumped up against.
 737
 738    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 739    except in special cases.  */
 740
 741 int
 742 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 743      int start, start_byte;
 744      int limit, limit_byte;
 745      register int count;
 746      int allow_quit;
 747 {
 748   int direction = ((count > 0) ? 1 : -1);
 749
 750   register unsigned char *cursor;
 751   unsigned char *base;
 752
 753   register int ceiling;
 754   register unsigned char *ceiling_addr;
 755
 756   int old_immediate_quit = immediate_quit;
 757
 758   /* The code that follows is like scan_buffer
 759      but checks for either newline or carriage return.  */
 760
 761   if (allow_quit)
 762     immediate_quit++;
 763
 764   start_byte = CHAR_TO_BYTE (start);
 765
 766   if (count > 0)
 767     {
 768       while (start_byte < limit_byte)
 769         {
 770           ceiling =  BUFFER_CEILING_OF (start_byte);
 771           ceiling = min (limit_byte - 1, ceiling);
 772           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 773           base = (cursor = BYTE_POS_ADDR (start_byte));
 774           while (1)
 775             {
 776               while (*cursor != '\n' && ++cursor != ceiling_addr)
 777                 ;
 778
 779               if (cursor != ceiling_addr)
 780                 {
 781                   if (--count == 0)
 782                     {
 783                       immediate_quit = old_immediate_quit;
 784                       start_byte = start_byte + cursor - base + 1;
 785                       start = BYTE_TO_CHAR (start_byte);
 786                       TEMP_SET_PT_BOTH (start, start_byte);
 787                       return 0;
 788                     }
 789                   else
 790                     if (++cursor == ceiling_addr)
 791                       break;
 792                 }
 793               else
 794                 break;
 795             }
 796           start_byte += cursor - base;
 797         }
 798     }
 799   else
 800     {
 801       while (start_byte > limit_byte)
 802         {
 803           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 804           ceiling = max (limit_byte, ceiling);
 805           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 806           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 807           while (1)
 808             {
 809               while (--cursor != ceiling_addr && *cursor != '\n')
 810                 ;
 811
 812               if (cursor != ceiling_addr)
 813                 {
 814                   if (++count == 0)
 815                     {
 816                       immediate_quit = old_immediate_quit;
 817                       /* Return the position AFTER the match we found.  */
 818                       start_byte = start_byte + cursor - base + 1;
 819                       start = BYTE_TO_CHAR (start_byte);
 820                       TEMP_SET_PT_BOTH (start, start_byte);
 821                       return 0;
 822                     }
 823                 }
 824               else
 825                 break;
 826             }
 827           /* Here we add 1 to compensate for the last decrement
 828              of CURSOR, which took it past the valid range.  */
 829           start_byte += cursor - base + 1;
 830         }
 831     }
 832
 833   TEMP_SET_PT_BOTH (limit, limit_byte);
 834   immediate_quit = old_immediate_quit;
 835
 836   return count * direction;
 837 }
 838
 839 int
 840 find_next_newline_no_quit (from, cnt)
 841      register int from, cnt;
 842 {
 843   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 844 }
 845
 846 /* Like find_next_newline, but returns position before the newline,
 847    not after, and only search up to TO.  This isn't just
 848    find_next_newline (...)-1, because you might hit TO.  */
 849
 850 int
 851 find_before_next_newline (from, to, cnt)
 852      int from, to, cnt;
 853 {
 854   int shortage;
 855   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 856
 857   if (shortage == 0)
 858     pos--;
 859
 860   return pos;
 861 }
 862 \f
 863 /* Subroutines of Lisp buffer search functions. */
 864
 865 static Lisp_Object
 866 search_command (string, bound, noerror, count, direction, RE, posix)
 867      Lisp_Object string, bound, noerror, count;
 868      int direction;
 869      int RE;
 870      int posix;
 871 {
 872   register int np;
 873   int lim, lim_byte;
 874   int n = direction;
 875
 876   if (!NILP (count))
 877     {
 878       CHECK_NUMBER (count);
 879       n *= XINT (count);
 880     }
 881
 882   CHECK_STRING (string);
 883   if (NILP (bound))
 884     {
 885       if (n > 0)
 886         lim = ZV, lim_byte = ZV_BYTE;
 887       else
 888         lim = BEGV, lim_byte = BEGV_BYTE;
 889     }
 890   else
 891     {
 892       CHECK_NUMBER_COERCE_MARKER (bound);
 893       lim = XINT (bound);
 894       if (n > 0 ? lim < PT : lim > PT)
 895         error ("Invalid search bound (wrong side of point)");
 896       if (lim > ZV)
 897         lim = ZV, lim_byte = ZV_BYTE;
 898       else if (lim < BEGV)
 899         lim = BEGV, lim_byte = BEGV_BYTE;
 900       else
 901         lim_byte = CHAR_TO_BYTE (lim);
 902     }
 903
 904   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 905                       (!NILP (current_buffer->case_fold_search)
 906                        ? current_buffer->case_canon_table
 907                        : Qnil),
 908                       (!NILP (current_buffer->case_fold_search)
 909                        ? current_buffer->case_eqv_table
 910                        : Qnil),
 911                       posix);
 912   if (np <= 0)
 913     {
 914       if (NILP (noerror))
 915         return signal_failure (string);
 916       if (!EQ (noerror, Qt))
 917         {
 918           if (lim < BEGV || lim > ZV)
 919             abort ();
 920           SET_PT_BOTH (lim, lim_byte);
 921           return Qnil;
 922 #if 0 /* This would be clean, but maybe programs depend on
 923          a value of nil here.  */
 924           np = lim;
 925 #endif
 926         }
 927       else
 928         return Qnil;
 929     }
 930
 931   if (np < BEGV || np > ZV)
 932     abort ();
 933
 934   SET_PT (np);
 935
 936   return make_number (np);
 937 }
 938 \f
 939 /* Return 1 if REGEXP it matches just one constant string.  */
 940
 941 static int
 942 trivial_regexp_p (regexp)
 943      Lisp_Object regexp;
 944 {
 945   int len = SBYTES (regexp);
 946   unsigned char *s = SDATA (regexp);
 947   while (--len >= 0)
 948     {
 949       switch (*s++)
 950         {
 951         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 952           return 0;
 953         case '\\':
 954           if (--len < 0)
 955             return 0;
 956           switch (*s++)
 957             {
 958             case '|': case '(': case ')': case '`': case '\'': case 'b':
 959             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 960             case 'S': case '=': case '{': case '}': case '_':
 961             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 962             case '1': case '2': case '3': case '4': case '5':
 963             case '6': case '7': case '8': case '9':
 964               return 0;
 965             }
 966         }
 967     }
 968   return 1;
 969 }
 970
 971 /* Search for the n'th occurrence of STRING in the current buffer,
 972    starting at position POS and stopping at position LIM,
 973    treating STRING as a literal string if RE is false or as
 974    a regular expression if RE is true.
 975
 976    If N is positive, searching is forward and LIM must be greater than POS.
 977    If N is negative, searching is backward and LIM must be less than POS.
 978
 979    Returns -x if x occurrences remain to be found (x > 0),
 980    or else the position at the beginning of the Nth occurrence
 981    (if searching backward) or the end (if searching forward).
 982
 983    POSIX is nonzero if we want full backtracking (POSIX style)
 984    for this pattern.  0 means backtrack only enough to get a valid match.  */
 985
 986 #define TRANSLATE(out, trt, d)                  \
 987 do                                              \
 988   {                                             \
 989     if (! NILP (trt))                           \
 990       {                                         \
 991         Lisp_Object temp;                       \
 992         temp = Faref (trt, make_number (d));    \
 993         if (INTEGERP (temp))                    \
 994           out = XINT (temp);                    \
 995         else                                    \
 996           out = d;                              \
 997       }                                         \
 998     else                                        \
 999       out = d;                                  \
1000   }                                             \
1001 while (0)
1002
1003 static int
1004 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1005                RE, trt, inverse_trt, posix)
1006      Lisp_Object string;
1007      int pos;
1008      int pos_byte;
1009      int lim;
1010      int lim_byte;
1011      int n;
1012      int RE;
1013      Lisp_Object trt;
1014      Lisp_Object inverse_trt;
1015      int posix;
1016 {
1017   int len = SCHARS (string);
1018   int len_byte = SBYTES (string);
1019   register int i;
1020
1021   if (running_asynch_code)
1022     save_search_regs ();
1023
1024   /* Searching 0 times means don't move.  */
1025   /* Null string is found at starting position.  */
1026   if (len == 0 || n == 0)
1027     {
1028       set_search_regs (pos_byte, 0);
1029       return pos;
1030     }
1031
1032   if (RE && !trivial_regexp_p (string))
1033     {
1034       unsigned char *p1, *p2;
1035       int s1, s2;
1036       struct re_pattern_buffer *bufp;
1037
1038       bufp = compile_pattern (string, &search_regs, trt, posix,
1039                               !NILP (current_buffer->enable_multibyte_characters));
1040
1041       immediate_quit = 1;       /* Quit immediately if user types ^G,
1042                                    because letting this function finish
1043                                    can take too long. */
1044       QUIT;                     /* Do a pending quit right away,
1045                                    to avoid paradoxical behavior */
1046       /* Get pointers and sizes of the two strings
1047          that make up the visible portion of the buffer. */
1048
1049       p1 = BEGV_ADDR;
1050       s1 = GPT_BYTE - BEGV_BYTE;
1051       p2 = GAP_END_ADDR;
1052       s2 = ZV_BYTE - GPT_BYTE;
1053       if (s1 < 0)
1054         {
1055           p2 = p1;
1056           s2 = ZV_BYTE - BEGV_BYTE;
1057           s1 = 0;
1058         }
1059       if (s2 < 0)
1060         {
1061           s1 = ZV_BYTE - BEGV_BYTE;
1062           s2 = 0;
1063         }
1064       re_match_object = Qnil;
1065
1066       while (n < 0)
1067         {
1068           int val;
1069           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1070                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1071                              &search_regs,
1072                              /* Don't allow match past current point */
1073                              pos_byte - BEGV_BYTE);
1074           if (val == -2)
1075             {
1076               matcher_overflow ();
1077             }
1078           if (val >= 0)
1079             {
1080               pos_byte = search_regs.start[0] + BEGV_BYTE;
1081               for (i = 0; i < search_regs.num_regs; i++)
1082                 if (search_regs.start[i] >= 0)
1083                   {
1084                     search_regs.start[i]
1085                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1086                     search_regs.end[i]
1087                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1088                   }
1089               XSETBUFFER (last_thing_searched, current_buffer);
1090               /* Set pos to the new position. */
1091               pos = search_regs.start[0];
1092             }
1093           else
1094             {
1095               immediate_quit = 0;
1096               return (n);
1097             }
1098           n++;
1099         }
1100       while (n > 0)
1101         {
1102           int val;
1103           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1104                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1105                              &search_regs,
1106                              lim_byte - BEGV_BYTE);
1107           if (val == -2)
1108             {
1109               matcher_overflow ();
1110             }
1111           if (val >= 0)
1112             {
1113               pos_byte = search_regs.end[0] + BEGV_BYTE;
1114               for (i = 0; i < search_regs.num_regs; i++)
1115                 if (search_regs.start[i] >= 0)
1116                   {
1117                     search_regs.start[i]
1118                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1119                     search_regs.end[i]
1120                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1121                   }
1122               XSETBUFFER (last_thing_searched, current_buffer);
1123               pos = search_regs.end[0];
1124             }
1125           else
1126             {
1127               immediate_quit = 0;
1128               return (0 - n);
1129             }
1130           n--;
1131         }
1132       immediate_quit = 0;
1133       return (pos);
1134     }
1135   else                          /* non-RE case */
1136     {
1137       unsigned char *raw_pattern, *pat;
1138       int raw_pattern_size;
1139       int raw_pattern_size_byte;
1140       unsigned char *patbuf;
1141       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1142       unsigned char *base_pat = SDATA (string);
1143       int charset_base = -1;
1144       int boyer_moore_ok = 1;
1145
1146       /* MULTIBYTE says whether the text to be searched is multibyte.
1147          We must convert PATTERN to match that, or we will not really
1148          find things right.  */
1149
1150       if (multibyte == STRING_MULTIBYTE (string))
1151         {
1152           raw_pattern = (unsigned char *) SDATA (string);
1153           raw_pattern_size = SCHARS (string);
1154           raw_pattern_size_byte = SBYTES (string);
1155         }
1156       else if (multibyte)
1157         {
1158           raw_pattern_size = SCHARS (string);
1159           raw_pattern_size_byte
1160             = count_size_as_multibyte (SDATA (string),
1161                                        raw_pattern_size);
1162           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1163           copy_text (SDATA (string), raw_pattern,
1164                      SCHARS (string), 0, 1);
1165         }
1166       else
1167         {
1168           /* Converting multibyte to single-byte.
1169
1170              ??? Perhaps this conversion should be done in a special way
1171              by subtracting nonascii-insert-offset from each non-ASCII char,
1172              so that only the multibyte chars which really correspond to
1173              the chosen single-byte character set can possibly match.  */
1174           raw_pattern_size = SCHARS (string);
1175           raw_pattern_size_byte = SCHARS (string);
1176           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1177           copy_text (SDATA (string), raw_pattern,
1178                      SBYTES (string), 1, 0);
1179         }
1180
1181       /* Copy and optionally translate the pattern.  */
1182       len = raw_pattern_size;
1183       len_byte = raw_pattern_size_byte;
1184       patbuf = (unsigned char *) alloca (len_byte);
1185       pat = patbuf;
1186       base_pat = raw_pattern;
1187       if (multibyte)
1188         {
1189           while (--len >= 0)
1190             {
1191               unsigned char str[MAX_MULTIBYTE_LENGTH];
1192               int c, translated, inverse;
1193               int in_charlen, charlen;
1194
1195               /* If we got here and the RE flag is set, it's because we're
1196                  dealing with a regexp known to be trivial, so the backslash
1197                  just quotes the next character.  */
1198               if (RE && *base_pat == '\\')
1199                 {
1200                   len--;
1201                   len_byte--;
1202                   base_pat++;
1203                 }
1204
1205               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1206
1207               /* Translate the character, if requested.  */
1208               TRANSLATE (translated, trt, c);
1209               /* If translation changed the byte-length, go back
1210                  to the original character.  */
1211               charlen = CHAR_STRING (translated, str);
1212               if (in_charlen != charlen)
1213                 {
1214                   translated = c;
1215                   charlen = CHAR_STRING (c, str);
1216                 }
1217
1218               /* If we are searching for something strange,
1219                  an invalid multibyte code, don't use boyer-moore.  */
1220               if (! ASCII_BYTE_P (translated)
1221                   && (charlen == 1 /* 8bit code */
1222                       || charlen != in_charlen /* invalid multibyte code */
1223                       ))
1224                 boyer_moore_ok = 0;
1225
1226               TRANSLATE (inverse, inverse_trt, c);
1227
1228               /* Did this char actually get translated?
1229                  Would any other char get translated into it?  */
1230               if (translated != c || inverse != c)
1231                 {
1232                   /* Keep track of which character set row
1233                      contains the characters that need translation.  */
1234                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1235                   int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1236
1237                   if (charset_base_code != inverse_charset_base)
1238                     boyer_moore_ok = 0;
1239                   else if (charset_base == -1)
1240                     charset_base = charset_base_code;
1241                   else if (charset_base != charset_base_code)
1242                     /* If two different rows appear, needing translation,
1243                        then we cannot use boyer_moore search.  */
1244                     boyer_moore_ok = 0;
1245                 }
1246
1247               /* Store this character into the translated pattern.  */
1248               bcopy (str, pat, charlen);
1249               pat += charlen;
1250               base_pat += in_charlen;
1251               len_byte -= in_charlen;
1252             }
1253         }
1254       else
1255         {
1256           /* Unibyte buffer.  */
1257           charset_base = 0;
1258           while (--len >= 0)
1259             {
1260               int c, translated;
1261
1262               /* If we got here and the RE flag is set, it's because we're
1263                  dealing with a regexp known to be trivial, so the backslash
1264                  just quotes the next character.  */
1265               if (RE && *base_pat == '\\')
1266                 {
1267                   len--;
1268                   base_pat++;
1269                 }
1270               c = *base_pat++;
1271               TRANSLATE (translated, trt, c);
1272               *pat++ = translated;
1273             }
1274         }
1275
1276       len_byte = pat - patbuf;
1277       len = raw_pattern_size;
1278       pat = base_pat = patbuf;
1279
1280       if (boyer_moore_ok)
1281         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1282                             pos, pos_byte, lim, lim_byte,
1283                             charset_base);
1284       else
1285         return simple_search (n, pat, len, len_byte, trt,
1286                               pos, pos_byte, lim, lim_byte);
1287     }
1288 }
1289 \f
1290 /* Do a simple string search N times for the string PAT,
1291    whose length is LEN/LEN_BYTE,
1292    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1293    TRT is the translation table.
1294
1295    Return the character position where the match is found.
1296    Otherwise, if M matches remained to be found, return -M.
1297
1298    This kind of search works regardless of what is in PAT and
1299    regardless of what is in TRT.  It is used in cases where
1300    boyer_moore cannot work.  */
1301
1302 static int
1303 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1304      int n;
1305      unsigned char *pat;
1306      int len, len_byte;
1307      Lisp_Object trt;
1308      int pos, pos_byte;
1309      int lim, lim_byte;
1310 {
1311   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1312   int forward = n > 0;
1313
1314   if (lim > pos && multibyte)
1315     while (n > 0)
1316       {
1317         while (1)
1318           {
1319             /* Try matching at position POS.  */
1320             int this_pos = pos;
1321             int this_pos_byte = pos_byte;
1322             int this_len = len;
1323             int this_len_byte = len_byte;
1324             unsigned char *p = pat;
1325             if (pos + len > lim)
1326               goto stop;
1327
1328             while (this_len > 0)
1329               {
1330                 int charlen, buf_charlen;
1331                 int pat_ch, buf_ch;
1332
1333                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1334                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1335                                                  ZV_BYTE - this_pos_byte,
1336                                                  buf_charlen);
1337                 TRANSLATE (buf_ch, trt, buf_ch);
1338
1339                 if (buf_ch != pat_ch)
1340                   break;
1341
1342                 this_len_byte -= charlen;
1343                 this_len--;
1344                 p += charlen;
1345
1346                 this_pos_byte += buf_charlen;
1347                 this_pos++;
1348               }
1349
1350             if (this_len == 0)
1351               {
1352                 pos += len;
1353                 pos_byte += len_byte;
1354                 break;
1355               }
1356
1357             INC_BOTH (pos, pos_byte);
1358           }
1359
1360         n--;
1361       }
1362   else if (lim > pos)
1363     while (n > 0)
1364       {
1365         while (1)
1366           {
1367             /* Try matching at position POS.  */
1368             int this_pos = pos;
1369             int this_len = len;
1370             unsigned char *p = pat;
1371
1372             if (pos + len > lim)
1373               goto stop;
1374
1375             while (this_len > 0)
1376               {
1377                 int pat_ch = *p++;
1378                 int buf_ch = FETCH_BYTE (this_pos);
1379                 TRANSLATE (buf_ch, trt, buf_ch);
1380
1381                 if (buf_ch != pat_ch)
1382                   break;
1383
1384                 this_len--;
1385                 this_pos++;
1386               }
1387
1388             if (this_len == 0)
1389               {
1390                 pos += len;
1391                 break;
1392               }
1393
1394             pos++;
1395           }
1396
1397         n--;
1398       }
1399   /* Backwards search.  */
1400   else if (lim < pos && multibyte)
1401     while (n < 0)
1402       {
1403         while (1)
1404           {
1405             /* Try matching at position POS.  */
1406             int this_pos = pos - len;
1407             int this_pos_byte = pos_byte - len_byte;
1408             int this_len = len;
1409             int this_len_byte = len_byte;
1410             unsigned char *p = pat;
1411
1412             if (pos - len < lim)
1413               goto stop;
1414
1415             while (this_len > 0)
1416               {
1417                 int charlen, buf_charlen;
1418                 int pat_ch, buf_ch;
1419
1420                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1421                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1422                                                  ZV_BYTE - this_pos_byte,
1423                                                  buf_charlen);
1424                 TRANSLATE (buf_ch, trt, buf_ch);
1425
1426                 if (buf_ch != pat_ch)
1427                   break;
1428
1429                 this_len_byte -= charlen;
1430                 this_len--;
1431                 p += charlen;
1432                 this_pos_byte += buf_charlen;
1433                 this_pos++;
1434               }
1435
1436             if (this_len == 0)
1437               {
1438                 pos -= len;
1439                 pos_byte -= len_byte;
1440                 break;
1441               }
1442
1443             DEC_BOTH (pos, pos_byte);
1444           }
1445
1446         n++;
1447       }
1448   else if (lim < pos)
1449     while (n < 0)
1450       {
1451         while (1)
1452           {
1453             /* Try matching at position POS.  */
1454             int this_pos = pos - len;
1455             int this_len = len;
1456             unsigned char *p = pat;
1457
1458             if (pos - len < lim)
1459               goto stop;
1460
1461             while (this_len > 0)
1462               {
1463                 int pat_ch = *p++;
1464                 int buf_ch = FETCH_BYTE (this_pos);
1465                 TRANSLATE (buf_ch, trt, buf_ch);
1466
1467                 if (buf_ch != pat_ch)
1468                   break;
1469                 this_len--;
1470                 this_pos++;
1471               }
1472
1473             if (this_len == 0)
1474               {
1475                 pos -= len;
1476                 break;
1477               }
1478
1479             pos--;
1480           }
1481
1482         n++;
1483       }
1484
1485  stop:
1486   if (n == 0)
1487     {
1488       if (forward)
1489         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1490       else
1491         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1492
1493       return pos;
1494     }
1495   else if (n > 0)
1496     return -n;
1497   else
1498     return n;
1499 }
1500 \f
1501 /* Do Boyer-Moore search N times for the string PAT,
1502    whose length is LEN/LEN_BYTE,
1503    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1504    DIRECTION says which direction we search in.
1505    TRT and INVERSE_TRT are translation tables.
1506
1507    This kind of search works if all the characters in PAT that have
1508    nontrivial translation are the same aside from the last byte.  This
1509    makes it possible to translate just the last byte of a character,
1510    and do so after just a simple test of the context.
1511
1512    If that criterion is not satisfied, do not call this function.  */
1513
1514 static int
1515 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1516              pos, pos_byte, lim, lim_byte, charset_base)
1517      int n;
1518      unsigned char *base_pat;
1519      int len, len_byte;
1520      Lisp_Object trt;
1521      Lisp_Object inverse_trt;
1522      int pos, pos_byte;
1523      int lim, lim_byte;
1524      int charset_base;
1525 {
1526   int direction = ((n > 0) ? 1 : -1);
1527   register int dirlen;
1528   int infinity, limit, stride_for_teases = 0;
1529   register int *BM_tab;
1530   int *BM_tab_base;
1531   register unsigned char *cursor, *p_limit;
1532   register int i, j;
1533   unsigned char *pat, *pat_end;
1534   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1535
1536   unsigned char simple_translate[0400];
1537   int translate_prev_byte = 0;
1538   int translate_anteprev_byte = 0;
1539
1540 #ifdef C_ALLOCA
1541   int BM_tab_space[0400];
1542   BM_tab = &BM_tab_space[0];
1543 #else
1544   BM_tab = (int *) alloca (0400 * sizeof (int));
1545 #endif
1546   /* The general approach is that we are going to maintain that we know */
1547   /* the first (closest to the present position, in whatever direction */
1548   /* we're searching) character that could possibly be the last */
1549   /* (furthest from present position) character of a valid match.  We */
1550   /* advance the state of our knowledge by looking at that character */
1551   /* and seeing whether it indeed matches the last character of the */
1552   /* pattern.  If it does, we take a closer look.  If it does not, we */
1553   /* move our pointer (to putative last characters) as far as is */
1554   /* logically possible.  This amount of movement, which I call a */
1555   /* stride, will be the length of the pattern if the actual character */
1556   /* appears nowhere in the pattern, otherwise it will be the distance */
1557   /* from the last occurrence of that character to the end of the */
1558   /* pattern. */
1559   /* As a coding trick, an enormous stride is coded into the table for */
1560   /* characters that match the last character.  This allows use of only */
1561   /* a single test, a test for having gone past the end of the */
1562   /* permissible match region, to test for both possible matches (when */
1563   /* the stride goes past the end immediately) and failure to */
1564   /* match (where you get nudged past the end one stride at a time). */
1565
1566   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1567   /* is determined only by the last character of the putative match. */
1568   /* If that character does not match, we will stride the proper */
1569   /* distance to propose a match that superimposes it on the last */
1570   /* instance of a character that matches it (per trt), or misses */
1571   /* it entirely if there is none. */
1572
1573   dirlen = len_byte * direction;
1574   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1575
1576   /* Record position after the end of the pattern.  */
1577   pat_end = base_pat + len_byte;
1578   /* BASE_PAT points to a character that we start scanning from.
1579      It is the first character in a forward search,
1580      the last character in a backward search.  */
1581   if (direction < 0)
1582     base_pat = pat_end - 1;
1583
1584   BM_tab_base = BM_tab;
1585   BM_tab += 0400;
1586   j = dirlen;           /* to get it in a register */
1587   /* A character that does not appear in the pattern induces a */
1588   /* stride equal to the pattern length. */
1589   while (BM_tab_base != BM_tab)
1590     {
1591       *--BM_tab = j;
1592       *--BM_tab = j;
1593       *--BM_tab = j;
1594       *--BM_tab = j;
1595     }
1596
1597   /* We use this for translation, instead of TRT itself.
1598      We fill this in to handle the characters that actually
1599      occur in the pattern.  Others don't matter anyway!  */
1600   bzero (simple_translate, sizeof simple_translate);
1601   for (i = 0; i < 0400; i++)
1602     simple_translate[i] = i;
1603
1604   i = 0;
1605   while (i != infinity)
1606     {
1607       unsigned char *ptr = base_pat + i;
1608       i += direction;
1609       if (i == dirlen)
1610         i = infinity;
1611       if (! NILP (trt))
1612         {
1613           int ch;
1614           int untranslated;
1615           int this_translated = 1;
1616
1617           if (multibyte
1618               /* Is *PTR the last byte of a character?  */
1619               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1620             {
1621               unsigned char *charstart = ptr;
1622               while (! CHAR_HEAD_P (*charstart))
1623                 charstart--;
1624               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1625               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1626                 {
1627                   TRANSLATE (ch, trt, untranslated);
1628                   if (! CHAR_HEAD_P (*ptr))
1629                     {
1630                       translate_prev_byte = ptr[-1];
1631                       if (! CHAR_HEAD_P (translate_prev_byte))
1632                         translate_anteprev_byte = ptr[-2];
1633                     }
1634                 }
1635               else
1636                 {
1637                   this_translated = 0;
1638                   ch = *ptr;
1639                 }
1640             }
1641           else if (!multibyte)
1642             TRANSLATE (ch, trt, *ptr);
1643           else
1644             {
1645               ch = *ptr;
1646               this_translated = 0;
1647             }
1648
1649           if (ch > 0400)
1650             j = ((unsigned char) ch) | 0200;
1651           else
1652             j = (unsigned char) ch;
1653
1654           if (i == infinity)
1655             stride_for_teases = BM_tab[j];
1656
1657           BM_tab[j] = dirlen - i;
1658           /* A translation table is accompanied by its inverse -- see */
1659           /* comment following downcase_table for details */
1660           if (this_translated)
1661             {
1662               int starting_ch = ch;
1663               int starting_j = j;
1664               while (1)
1665                 {
1666                   TRANSLATE (ch, inverse_trt, ch);
1667                   if (ch > 0400)
1668                     j = ((unsigned char) ch) | 0200;
1669                   else
1670                     j = (unsigned char) ch;
1671
1672                   /* For all the characters that map into CH,
1673                      set up simple_translate to map the last byte
1674                      into STARTING_J.  */
1675                   simple_translate[j] = starting_j;
1676                   if (ch == starting_ch)
1677                     break;
1678                   BM_tab[j] = dirlen - i;
1679                 }
1680             }
1681         }
1682       else
1683         {
1684           j = *ptr;
1685
1686           if (i == infinity)
1687             stride_for_teases = BM_tab[j];
1688           BM_tab[j] = dirlen - i;
1689         }
1690       /* stride_for_teases tells how much to stride if we get a */
1691       /* match on the far character but are subsequently */
1692       /* disappointed, by recording what the stride would have been */
1693       /* for that character if the last character had been */
1694       /* different. */
1695     }
1696   infinity = dirlen - infinity;
1697   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1698   /* loop invariant - POS_BYTE points at where last char (first
1699      char if reverse) of pattern would align in a possible match.  */
1700   while (n != 0)
1701     {
1702       int tail_end;
1703       unsigned char *tail_end_ptr;
1704
1705       /* It's been reported that some (broken) compiler thinks that
1706          Boolean expressions in an arithmetic context are unsigned.
1707          Using an explicit ?1:0 prevents this.  */
1708       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1709           < 0)
1710         return (n * (0 - direction));
1711       /* First we do the part we can by pointers (maybe nothing) */
1712       QUIT;
1713       pat = base_pat;
1714       limit = pos_byte - dirlen + direction;
1715       if (direction > 0)
1716         {
1717           limit = BUFFER_CEILING_OF (limit);
1718           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1719              can take on without hitting edge of buffer or the gap.  */
1720           limit = min (limit, pos_byte + 20000);
1721           limit = min (limit, lim_byte - 1);
1722         }
1723       else
1724         {
1725           limit = BUFFER_FLOOR_OF (limit);
1726           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1727              can take on without hitting edge of buffer or the gap.  */
1728           limit = max (limit, pos_byte - 20000);
1729           limit = max (limit, lim_byte);
1730         }
1731       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1732       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1733
1734       if ((limit - pos_byte) * direction > 20)
1735         {
1736           unsigned char *p2;
1737
1738           p_limit = BYTE_POS_ADDR (limit);
1739           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1740           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1741           while (1)             /* use one cursor setting as long as i can */
1742             {
1743               if (direction > 0) /* worth duplicating */
1744                 {
1745                   /* Use signed comparison if appropriate
1746                      to make cursor+infinity sure to be > p_limit.
1747                      Assuming that the buffer lies in a range of addresses
1748                      that are all "positive" (as ints) or all "negative",
1749                      either kind of comparison will work as long
1750                      as we don't step by infinity.  So pick the kind
1751                      that works when we do step by infinity.  */
1752                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1753                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1754                       cursor += BM_tab[*cursor];
1755                   else
1756                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1757                       cursor += BM_tab[*cursor];
1758                 }
1759               else
1760                 {
1761                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1762                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1763                       cursor += BM_tab[*cursor];
1764                   else
1765                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1766                       cursor += BM_tab[*cursor];
1767                 }
1768 /* If you are here, cursor is beyond the end of the searched region. */
1769 /* This can happen if you match on the far character of the pattern, */
1770 /* because the "stride" of that character is infinity, a number able */
1771 /* to throw you well beyond the end of the search.  It can also */
1772 /* happen if you fail to match within the permitted region and would */
1773 /* otherwise try a character beyond that region */
1774               if ((cursor - p_limit) * direction <= len_byte)
1775                 break;  /* a small overrun is genuine */
1776               cursor -= infinity; /* large overrun = hit */
1777               i = dirlen - direction;
1778               if (! NILP (trt))
1779                 {
1780                   while ((i -= direction) + direction != 0)
1781                     {
1782                       int ch;
1783                       cursor -= direction;
1784                       /* Translate only the last byte of a character.  */
1785                       if (! multibyte
1786                           || ((cursor == tail_end_ptr
1787                                || CHAR_HEAD_P (cursor[1]))
1788                               && (CHAR_HEAD_P (cursor[0])
1789                                   || (translate_prev_byte == cursor[-1]
1790                                       && (CHAR_HEAD_P (translate_prev_byte)
1791                                           || translate_anteprev_byte == cursor[-2])))))
1792                         ch = simple_translate[*cursor];
1793                       else
1794                         ch = *cursor;
1795                       if (pat[i] != ch)
1796                         break;
1797                     }
1798                 }
1799               else
1800                 {
1801                   while ((i -= direction) + direction != 0)
1802                     {
1803                       cursor -= direction;
1804                       if (pat[i] != *cursor)
1805                         break;
1806                     }
1807                 }
1808               cursor += dirlen - i - direction; /* fix cursor */
1809               if (i + direction == 0)
1810                 {
1811                   int position;
1812
1813                   cursor -= direction;
1814
1815                   position = pos_byte + cursor - p2 + ((direction > 0)
1816                                                        ? 1 - len_byte : 0);
1817                   set_search_regs (position, len_byte);
1818
1819                   if ((n -= direction) != 0)
1820                     cursor += dirlen; /* to resume search */
1821                   else
1822                     return ((direction > 0)
1823                             ? search_regs.end[0] : search_regs.start[0]);
1824                 }
1825               else
1826                 cursor += stride_for_teases; /* <sigh> we lose -  */
1827             }
1828           pos_byte += cursor - p2;
1829         }
1830       else
1831         /* Now we'll pick up a clump that has to be done the hard */
1832         /* way because it covers a discontinuity */
1833         {
1834           limit = ((direction > 0)
1835                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1836                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1837           limit = ((direction > 0)
1838                    ? min (limit + len_byte, lim_byte - 1)
1839                    : max (limit - len_byte, lim_byte));
1840           /* LIMIT is now the last value POS_BYTE can have
1841              and still be valid for a possible match.  */
1842           while (1)
1843             {
1844               /* This loop can be coded for space rather than */
1845               /* speed because it will usually run only once. */
1846               /* (the reach is at most len + 21, and typically */
1847               /* does not exceed len) */
1848               while ((limit - pos_byte) * direction >= 0)
1849                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1850               /* now run the same tests to distinguish going off the */
1851               /* end, a match or a phony match. */
1852               if ((pos_byte - limit) * direction <= len_byte)
1853                 break;  /* ran off the end */
1854               /* Found what might be a match.
1855                  Set POS_BYTE back to last (first if reverse) pos.  */
1856               pos_byte -= infinity;
1857               i = dirlen - direction;
1858               while ((i -= direction) + direction != 0)
1859                 {
1860                   int ch;
1861                   unsigned char *ptr;
1862                   pos_byte -= direction;
1863                   ptr = BYTE_POS_ADDR (pos_byte);
1864                   /* Translate only the last byte of a character.  */
1865                   if (! multibyte
1866                       || ((ptr == tail_end_ptr
1867                            || CHAR_HEAD_P (ptr[1]))
1868                           && (CHAR_HEAD_P (ptr[0])
1869                               || (translate_prev_byte == ptr[-1]
1870                                   && (CHAR_HEAD_P (translate_prev_byte)
1871                                       || translate_anteprev_byte == ptr[-2])))))
1872                     ch = simple_translate[*ptr];
1873                   else
1874                     ch = *ptr;
1875                   if (pat[i] != ch)
1876                     break;
1877                 }
1878               /* Above loop has moved POS_BYTE part or all the way
1879                  back to the first pos (last pos if reverse).
1880                  Set it once again at the last (first if reverse) char.  */
1881               pos_byte += dirlen - i- direction;
1882               if (i + direction == 0)
1883                 {
1884                   int position;
1885                   pos_byte -= direction;
1886
1887                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1888
1889                   set_search_regs (position, len_byte);
1890
1891                   if ((n -= direction) != 0)
1892                     pos_byte += dirlen; /* to resume search */
1893                   else
1894                     return ((direction > 0)
1895                             ? search_regs.end[0] : search_regs.start[0]);
1896                 }
1897               else
1898                 pos_byte += stride_for_teases;
1899             }
1900           }
1901       /* We have done one clump.  Can we continue? */
1902       if ((lim_byte - pos_byte) * direction < 0)
1903         return ((0 - n) * direction);
1904     }
1905   return BYTE_TO_CHAR (pos_byte);
1906 }
1907
1908 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1909    for the overall match just found in the current buffer.
1910    Also clear out the match data for registers 1 and up.  */
1911
1912 static void
1913 set_search_regs (beg_byte, nbytes)
1914      int beg_byte, nbytes;
1915 {
1916   int i;
1917
1918   /* Make sure we have registers in which to store
1919      the match position.  */
1920   if (search_regs.num_regs == 0)
1921     {
1922       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1923       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1924       search_regs.num_regs = 2;
1925     }
1926
1927   /* Clear out the other registers.  */
1928   for (i = 1; i < search_regs.num_regs; i++)
1929     {
1930       search_regs.start[i] = -1;
1931       search_regs.end[i] = -1;
1932     }
1933
1934   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1935   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1936   XSETBUFFER (last_thing_searched, current_buffer);
1937 }
1938 \f
1939 /* Given a string of words separated by word delimiters,
1940   compute a regexp that matches those exact words
1941   separated by arbitrary punctuation.  */
1942
1943 static Lisp_Object
1944 wordify (string)
1945      Lisp_Object string;
1946 {
1947   register unsigned char *p, *o;
1948   register int i, i_byte, len, punct_count = 0, word_count = 0;
1949   Lisp_Object val;
1950   int prev_c = 0;
1951   int adjust;
1952
1953   CHECK_STRING (string);
1954   p = SDATA (string);
1955   len = SCHARS (string);
1956
1957   for (i = 0, i_byte = 0; i < len; )
1958     {
1959       int c;
1960
1961       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1962
1963       if (SYNTAX (c) != Sword)
1964         {
1965           punct_count++;
1966           if (i > 0 && SYNTAX (prev_c) == Sword)
1967             word_count++;
1968         }
1969
1970       prev_c = c;
1971     }
1972
1973   if (SYNTAX (prev_c) == Sword)
1974     word_count++;
1975   if (!word_count)
1976     return empty_string;
1977
1978   adjust = - punct_count + 5 * (word_count - 1) + 4;
1979   if (STRING_MULTIBYTE (string))
1980     val = make_uninit_multibyte_string (len + adjust,
1981                                         SBYTES (string)
1982                                         + adjust);
1983   else
1984     val = make_uninit_string (len + adjust);
1985
1986   o = SDATA (val);
1987   *o++ = '\\';
1988   *o++ = 'b';
1989   prev_c = 0;
1990
1991   for (i = 0, i_byte = 0; i < len; )
1992     {
1993       int c;
1994       int i_byte_orig = i_byte;
1995
1996       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1997
1998       if (SYNTAX (c) == Sword)
1999         {
2000           bcopy (SDATA (string) + i_byte_orig, o,
2001                  i_byte - i_byte_orig);
2002           o += i_byte - i_byte_orig;
2003         }
2004       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2005         {
2006           *o++ = '\\';
2007           *o++ = 'W';
2008           *o++ = '\\';
2009           *o++ = 'W';
2010           *o++ = '*';
2011         }
2012
2013       prev_c = c;
2014     }
2015
2016   *o++ = '\\';
2017   *o++ = 'b';
2018
2019   return val;
2020 }
2021 \f
2022 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2023        "MSearch backward: ",
2024        doc: /* Search backward from point for STRING.
2025 Set point to the beginning of the occurrence found, and return point.
2026 An optional second argument bounds the search; it is a buffer position.
2027 The match found must not extend before that position.
2028 Optional third argument, if t, means if fail just return nil (no error).
2029  If not nil and not t, position at limit of search and return nil.
2030 Optional fourth argument is repeat count--search for successive occurrences.
2031
2032 Search case-sensitivity is determined by the value of the variable
2033 `case-fold-search', which see.
2034
2035 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2036      (string, bound, noerror, count)
2037      Lisp_Object string, bound, noerror, count;
2038 {
2039   return search_command (string, bound, noerror, count, -1, 0, 0);
2040 }
2041
2042 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2043        doc: /* Search forward from point for STRING.
2044 Set point to the end of the occurrence found, and return point.
2045 An optional second argument bounds the search; it is a buffer position.
2046 The match found must not extend after that position.  nil is equivalent
2047   to (point-max).
2048 Optional third argument, if t, means if fail just return nil (no error).
2049   If not nil and not t, move to limit of search and return nil.
2050 Optional fourth argument is repeat count--search for successive occurrences.
2051
2052 Search case-sensitivity is determined by the value of the variable
2053 `case-fold-search', which see.
2054
2055 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2056      (string, bound, noerror, count)
2057      Lisp_Object string, bound, noerror, count;
2058 {
2059   return search_command (string, bound, noerror, count, 1, 0, 0);
2060 }
2061
2062 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2063        "sWord search backward: ",
2064        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2065 Set point to the beginning of the occurrence found, and return point.
2066 An optional second argument bounds the search; it is a buffer position.
2067 The match found must not extend before that position.
2068 Optional third argument, if t, means if fail just return nil (no error).
2069   If not nil and not t, move to limit of search and return nil.
2070 Optional fourth argument is repeat count--search for successive occurrences.  */)
2071      (string, bound, noerror, count)
2072      Lisp_Object string, bound, noerror, count;
2073 {
2074   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2075 }
2076
2077 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2078        "sWord search: ",
2079        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2080 Set point to the end of the occurrence found, and return point.
2081 An optional second argument bounds the search; it is a buffer position.
2082 The match found must not extend after that position.
2083 Optional third argument, if t, means if fail just return nil (no error).
2084   If not nil and not t, move to limit of search and return nil.
2085 Optional fourth argument is repeat count--search for successive occurrences.  */)
2086      (string, bound, noerror, count)
2087      Lisp_Object string, bound, noerror, count;
2088 {
2089   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2090 }
2091
2092 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2093        "sRE search backward: ",
2094        doc: /* Search backward from point for match for regular expression REGEXP.
2095 Set point to the beginning of the match, and return point.
2096 The match found is the one starting last in the buffer
2097 and yet ending before the origin of the search.
2098 An optional second argument bounds the search; it is a buffer position.
2099 The match found must start at or after that position.
2100 Optional third argument, if t, means if fail just return nil (no error).
2101   If not nil and not t, move to limit of search and return nil.
2102 Optional fourth argument is repeat count--search for successive occurrences.
2103 See also the functions `match-beginning', `match-end', `match-string',
2104 and `replace-match'.  */)
2105      (regexp, bound, noerror, count)
2106      Lisp_Object regexp, bound, noerror, count;
2107 {
2108   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2109 }
2110
2111 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2112        "sRE search: ",
2113        doc: /* Search forward from point for regular expression REGEXP.
2114 Set point to the end of the occurrence found, and return point.
2115 An optional second argument bounds the search; it is a buffer position.
2116 The match found must not extend after that position.
2117 Optional third argument, if t, means if fail just return nil (no error).
2118   If not nil and not t, move to limit of search and return nil.
2119 Optional fourth argument is repeat count--search for successive occurrences.
2120 See also the functions `match-beginning', `match-end', `match-string',
2121 and `replace-match'.  */)
2122      (regexp, bound, noerror, count)
2123      Lisp_Object regexp, bound, noerror, count;
2124 {
2125   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2126 }
2127
2128 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2129        "sPosix search backward: ",
2130        doc: /* Search backward from point for match for regular expression REGEXP.
2131 Find the longest match in accord with Posix regular expression rules.
2132 Set point to the beginning of the match, and return point.
2133 The match found is the one starting last in the buffer
2134 and yet ending before the origin of the search.
2135 An optional second argument bounds the search; it is a buffer position.
2136 The match found must start at or after that position.
2137 Optional third argument, if t, means if fail just return nil (no error).
2138   If not nil and not t, move to limit of search and return nil.
2139 Optional fourth argument is repeat count--search for successive occurrences.
2140 See also the functions `match-beginning', `match-end', `match-string',
2141 and `replace-match'.  */)
2142      (regexp, bound, noerror, count)
2143      Lisp_Object regexp, bound, noerror, count;
2144 {
2145   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2146 }
2147
2148 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2149        "sPosix search: ",
2150        doc: /* Search forward from point for regular expression REGEXP.
2151 Find the longest match in accord with Posix regular expression rules.
2152 Set point to the end of the occurrence found, and return point.
2153 An optional second argument bounds the search; it is a buffer position.
2154 The match found must not extend after that position.
2155 Optional third argument, if t, means if fail just return nil (no error).
2156   If not nil and not t, move to limit of search and return nil.
2157 Optional fourth argument is repeat count--search for successive occurrences.
2158 See also the functions `match-beginning', `match-end', `match-string',
2159 and `replace-match'.  */)
2160      (regexp, bound, noerror, count)
2161      Lisp_Object regexp, bound, noerror, count;
2162 {
2163   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2164 }
2165 \f
2166 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2167        doc: /* Replace text matched by last search with NEWTEXT.
2168 Leave point at the end of the replacement text.
2169
2170 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2171 Otherwise maybe capitalize the whole text, or maybe just word initials,
2172 based on the replaced text.
2173 If the replaced text has only capital letters
2174 and has at least one multiletter word, convert NEWTEXT to all caps.
2175 Otherwise if all words are capitalized in the replaced text,
2176 capitalize each word in NEWTEXT.
2177
2178 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2179 Otherwise treat `\\' as special:
2180   `\\&' in NEWTEXT means substitute original matched text.
2181   `\\N' means substitute what matched the Nth `\\(...\\)'.
2182        If Nth parens didn't match, substitute nothing.
2183   `\\\\' means insert one `\\'.
2184 Case conversion does not apply to these substitutions.
2185
2186 FIXEDCASE and LITERAL are optional arguments.
2187
2188 The optional fourth argument STRING can be a string to modify.
2189 This is meaningful when the previous match was done against STRING,
2190 using `string-match'.  When used this way, `replace-match'
2191 creates and returns a new string made by copying STRING and replacing
2192 the part of STRING that was matched.
2193
2194 The optional fifth argument SUBEXP specifies a subexpression;
2195 it says to replace just that subexpression with NEWTEXT,
2196 rather than replacing the entire matched text.
2197 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2198 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2199 NEWTEXT in place of subexp N.
2200 This is useful only after a regular expression search or match,
2201 since only regular expressions have distinguished subexpressions.  */)
2202      (newtext, fixedcase, literal, string, subexp)
2203      Lisp_Object newtext, fixedcase, literal, string, subexp;
2204 {
2205   enum { nochange, all_caps, cap_initial } case_action;
2206   register int pos, pos_byte;
2207   int some_multiletter_word;
2208   int some_lowercase;
2209   int some_uppercase;
2210   int some_nonuppercase_initial;
2211   register int c, prevc;
2212   int sub;
2213   int opoint, newpoint;
2214
2215   CHECK_STRING (newtext);
2216
2217   if (! NILP (string))
2218     CHECK_STRING (string);
2219
2220   case_action = nochange;       /* We tried an initialization */
2221                                 /* but some C compilers blew it */
2222
2223   if (search_regs.num_regs <= 0)
2224     error ("replace-match called before any match found");
2225
2226   if (NILP (subexp))
2227     sub = 0;
2228   else
2229     {
2230       CHECK_NUMBER (subexp);
2231       sub = XINT (subexp);
2232       if (sub < 0 || sub >= search_regs.num_regs)
2233         args_out_of_range (subexp, make_number (search_regs.num_regs));
2234     }
2235
2236   if (NILP (string))
2237     {
2238       if (search_regs.start[sub] < BEGV
2239           || search_regs.start[sub] > search_regs.end[sub]
2240           || search_regs.end[sub] > ZV)
2241         args_out_of_range (make_number (search_regs.start[sub]),
2242                            make_number (search_regs.end[sub]));
2243     }
2244   else
2245     {
2246       if (search_regs.start[sub] < 0
2247           || search_regs.start[sub] > search_regs.end[sub]
2248           || search_regs.end[sub] > SCHARS (string))
2249         args_out_of_range (make_number (search_regs.start[sub]),
2250                            make_number (search_regs.end[sub]));
2251     }
2252
2253   if (NILP (fixedcase))
2254     {
2255       /* Decide how to casify by examining the matched text. */
2256       int last;
2257
2258       pos = search_regs.start[sub];
2259       last = search_regs.end[sub];
2260
2261       if (NILP (string))
2262         pos_byte = CHAR_TO_BYTE (pos);
2263       else
2264         pos_byte = string_char_to_byte (string, pos);
2265
2266       prevc = '\n';
2267       case_action = all_caps;
2268
2269       /* some_multiletter_word is set nonzero if any original word
2270          is more than one letter long. */
2271       some_multiletter_word = 0;
2272       some_lowercase = 0;
2273       some_nonuppercase_initial = 0;
2274       some_uppercase = 0;
2275
2276       while (pos < last)
2277         {
2278           if (NILP (string))
2279             {
2280               c = FETCH_CHAR (pos_byte);
2281               INC_BOTH (pos, pos_byte);
2282             }
2283           else
2284             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2285
2286           if (LOWERCASEP (c))
2287             {
2288               /* Cannot be all caps if any original char is lower case */
2289
2290               some_lowercase = 1;
2291               if (SYNTAX (prevc) != Sword)
2292                 some_nonuppercase_initial = 1;
2293               else
2294                 some_multiletter_word = 1;
2295             }
2296           else if (!NOCASEP (c))
2297             {
2298               some_uppercase = 1;
2299               if (SYNTAX (prevc) != Sword)
2300                 ;
2301               else
2302                 some_multiletter_word = 1;
2303             }
2304           else
2305             {
2306               /* If the initial is a caseless word constituent,
2307                  treat that like a lowercase initial.  */
2308               if (SYNTAX (prevc) != Sword)
2309                 some_nonuppercase_initial = 1;
2310             }
2311
2312           prevc = c;
2313         }
2314
2315       /* Convert to all caps if the old text is all caps
2316          and has at least one multiletter word.  */
2317       if (! some_lowercase && some_multiletter_word)
2318         case_action = all_caps;
2319       /* Capitalize each word, if the old text has all capitalized words.  */
2320       else if (!some_nonuppercase_initial && some_multiletter_word)
2321         case_action = cap_initial;
2322       else if (!some_nonuppercase_initial && some_uppercase)
2323         /* Should x -> yz, operating on X, give Yz or YZ?
2324            We'll assume the latter.  */
2325         case_action = all_caps;
2326       else
2327         case_action = nochange;
2328     }
2329
2330   /* Do replacement in a string.  */
2331   if (!NILP (string))
2332     {
2333       Lisp_Object before, after;
2334
2335       before = Fsubstring (string, make_number (0),
2336                            make_number (search_regs.start[sub]));
2337       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2338
2339       /* Substitute parts of the match into NEWTEXT
2340          if desired.  */
2341       if (NILP (literal))
2342         {
2343           int lastpos = 0;
2344           int lastpos_byte = 0;
2345           /* We build up the substituted string in ACCUM.  */
2346           Lisp_Object accum;
2347           Lisp_Object middle;
2348           int length = SBYTES (newtext);
2349
2350           accum = Qnil;
2351
2352           for (pos_byte = 0, pos = 0; pos_byte < length;)
2353             {
2354               int substart = -1;
2355               int subend = 0;
2356               int delbackslash = 0;
2357
2358               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2359
2360               if (c == '\\')
2361                 {
2362                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2363
2364                   if (c == '&')
2365                     {
2366                       substart = search_regs.start[sub];
2367                       subend = search_regs.end[sub];
2368                     }
2369                   else if (c >= '1' && c <= '9')
2370                     {
2371                       if (search_regs.start[c - '0'] >= 0
2372                           && c <= search_regs.num_regs + '0')
2373                         {
2374                           substart = search_regs.start[c - '0'];
2375                           subend = search_regs.end[c - '0'];
2376                         }
2377                       else
2378                         {
2379                           /* If that subexp did not match,
2380                              replace \\N with nothing.  */
2381                           substart = 0;
2382                           subend = 0;
2383                         }
2384                     }
2385                   else if (c == '\\')
2386                     delbackslash = 1;
2387                   else
2388                     error ("Invalid use of `\\' in replacement text");
2389                 }
2390               if (substart >= 0)
2391                 {
2392                   if (pos - 2 != lastpos)
2393                     middle = substring_both (newtext, lastpos,
2394                                              lastpos_byte,
2395                                              pos - 2, pos_byte - 2);
2396                   else
2397                     middle = Qnil;
2398                   accum = concat3 (accum, middle,
2399                                    Fsubstring (string,
2400                                                make_number (substart),
2401                                                make_number (subend)));
2402                   lastpos = pos;
2403                   lastpos_byte = pos_byte;
2404                 }
2405               else if (delbackslash)
2406                 {
2407                   middle = substring_both (newtext, lastpos,
2408                                            lastpos_byte,
2409                                            pos - 1, pos_byte - 1);
2410
2411                   accum = concat2 (accum, middle);
2412                   lastpos = pos;
2413                   lastpos_byte = pos_byte;
2414                 }
2415             }
2416
2417           if (pos != lastpos)
2418             middle = substring_both (newtext, lastpos,
2419                                      lastpos_byte,
2420                                      pos, pos_byte);
2421           else
2422             middle = Qnil;
2423
2424           newtext = concat2 (accum, middle);
2425         }
2426
2427       /* Do case substitution in NEWTEXT if desired.  */
2428       if (case_action == all_caps)
2429         newtext = Fupcase (newtext);
2430       else if (case_action == cap_initial)
2431         newtext = Fupcase_initials (newtext);
2432
2433       return concat3 (before, newtext, after);
2434     }
2435
2436   /* Record point, then move (quietly) to the start of the match.  */
2437   if (PT >= search_regs.end[sub])
2438     opoint = PT - ZV;
2439   else if (PT > search_regs.start[sub])
2440     opoint = search_regs.end[sub] - ZV;
2441   else
2442     opoint = PT;
2443
2444   /* If we want non-literal replacement,
2445      perform substitution on the replacement string.  */
2446   if (NILP (literal))
2447     {
2448       int length = SBYTES (newtext);
2449       unsigned char *substed;
2450       int substed_alloc_size, substed_len;
2451       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2452       int str_multibyte = STRING_MULTIBYTE (newtext);
2453       Lisp_Object rev_tbl;
2454       int really_changed = 0;
2455
2456       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2457                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2458                                           make_number (0))
2459                 : Qnil);
2460
2461       substed_alloc_size = length * 2 + 100;
2462       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2463       substed_len = 0;
2464
2465       /* Go thru NEWTEXT, producing the actual text to insert in
2466          SUBSTED while adjusting multibyteness to that of the current
2467          buffer.  */
2468
2469       for (pos_byte = 0, pos = 0; pos_byte < length;)
2470         {
2471           unsigned char str[MAX_MULTIBYTE_LENGTH];
2472           unsigned char *add_stuff = NULL;
2473           int add_len = 0;
2474           int idx = -1;
2475
2476           if (str_multibyte)
2477             {
2478               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2479               if (!buf_multibyte)
2480                 c = multibyte_char_to_unibyte (c, rev_tbl);
2481             }
2482           else
2483             {
2484               /* Note that we don't have to increment POS.  */
2485               c = SREF (newtext, pos_byte++);
2486               if (buf_multibyte)
2487                 c = unibyte_char_to_multibyte (c);
2488             }
2489
2490           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2491              or set IDX to a match index, which means put that part
2492              of the buffer text into SUBSTED.  */
2493
2494           if (c == '\\')
2495             {
2496               really_changed = 1;
2497
2498               if (str_multibyte)
2499                 {
2500                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2501                                                       pos, pos_byte);
2502                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2503                     c = multibyte_char_to_unibyte (c, rev_tbl);
2504                 }
2505               else
2506                 {
2507                   c = SREF (newtext, pos_byte++);
2508                   if (buf_multibyte)
2509                     c = unibyte_char_to_multibyte (c);
2510                 }
2511
2512               if (c == '&')
2513                 idx = sub;
2514               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2515                 {
2516                   if (search_regs.start[c - '0'] >= 1)
2517                     idx = c - '0';
2518                 }
2519               else if (c == '\\')
2520                 add_len = 1, add_stuff = "\\";
2521               else
2522                 {
2523                   xfree (substed);
2524                   error ("Invalid use of `\\' in replacement text");
2525                 }
2526             }
2527           else
2528             {
2529               add_len = CHAR_STRING (c, str);
2530               add_stuff = str;
2531             }
2532
2533           /* If we want to copy part of a previous match,
2534              set up ADD_STUFF and ADD_LEN to point to it.  */
2535           if (idx >= 0)
2536             {
2537               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2538               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2539               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2540                 move_gap (search_regs.start[idx]);
2541               add_stuff = BYTE_POS_ADDR (begbyte);
2542             }
2543
2544           /* Now the stuff we want to add to SUBSTED
2545              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2546
2547           /* Make sure SUBSTED is big enough.  */
2548           if (substed_len + add_len >= substed_alloc_size)
2549             {
2550               substed_alloc_size = substed_len + add_len + 500;
2551               substed = (unsigned char *) xrealloc (substed,
2552                                                     substed_alloc_size + 1);
2553             }
2554
2555           /* Now add to the end of SUBSTED.  */
2556           if (add_stuff)
2557             {
2558               bcopy (add_stuff, substed + substed_len, add_len);
2559               substed_len += add_len;
2560             }
2561         }
2562
2563       if (really_changed)
2564         {
2565           if (buf_multibyte)
2566             {
2567               int nchars = multibyte_chars_in_text (substed, substed_len);
2568
2569               newtext = make_multibyte_string (substed, nchars, substed_len);
2570             }
2571           else
2572             newtext = make_unibyte_string (substed, substed_len);
2573         }
2574       xfree (substed);
2575     }
2576
2577   /* Replace the old text with the new in the cleanest possible way.  */
2578   replace_range (search_regs.start[sub], search_regs.end[sub],
2579                  newtext, 1, 0, 1);
2580   newpoint = search_regs.start[sub] + SCHARS (newtext);
2581
2582   if (case_action == all_caps)
2583     Fupcase_region (make_number (search_regs.start[sub]),
2584                     make_number (newpoint));
2585   else if (case_action == cap_initial)
2586     Fupcase_initials_region (make_number (search_regs.start[sub]),
2587                              make_number (newpoint));
2588
2589   /* Adjust search data for this change.  */
2590   {
2591     int oldend = search_regs.end[sub];
2592     int change = newpoint - search_regs.end[sub];
2593     int i;
2594
2595     for (i = 0; i < search_regs.num_regs; i++)
2596       {
2597         if (search_regs.start[i] > oldend)
2598           search_regs.start[i] += change;
2599         if (search_regs.end[i] > oldend)
2600           search_regs.end[i] += change;
2601       }
2602   }
2603
2604   /* Put point back where it was in the text.  */
2605   if (opoint <= 0)
2606     TEMP_SET_PT (opoint + ZV);
2607   else
2608     TEMP_SET_PT (opoint);
2609
2610   /* Now move point "officially" to the start of the inserted replacement.  */
2611   move_if_not_intangible (newpoint);
2612
2613   return Qnil;
2614 }
2615 \f
2616 static Lisp_Object
2617 match_limit (num, beginningp)
2618      Lisp_Object num;
2619      int beginningp;
2620 {
2621   register int n;
2622
2623   CHECK_NUMBER (num);
2624   n = XINT (num);
2625   if (n < 0)
2626     args_out_of_range (num, 0);
2627   if (search_regs.num_regs <= 0)
2628     error ("No match data, because no search succeeded");
2629   if (n >= search_regs.num_regs
2630       || search_regs.start[n] < 0)
2631     return Qnil;
2632   return (make_number ((beginningp) ? search_regs.start[n]
2633                                     : search_regs.end[n]));
2634 }
2635
2636 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2637        doc: /* Return position of start of text matched by last search.
2638 SUBEXP, a number, specifies which parenthesized expression in the last
2639   regexp.
2640 Value is nil if SUBEXPth pair didn't match, or there were less than
2641   SUBEXP pairs.
2642 Zero means the entire text matched by the whole regexp or whole string.  */)
2643      (subexp)
2644      Lisp_Object subexp;
2645 {
2646   return match_limit (subexp, 1);
2647 }
2648
2649 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2650        doc: /* Return position of end of text matched by last search.
2651 SUBEXP, a number, specifies which parenthesized expression in the last
2652   regexp.
2653 Value is nil if SUBEXPth pair didn't match, or there were less than
2654   SUBEXP pairs.
2655 Zero means the entire text matched by the whole regexp or whole string.  */)
2656      (subexp)
2657      Lisp_Object subexp;
2658 {
2659   return match_limit (subexp, 0);
2660 }
2661
2662 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2663        doc: /* Return a list containing all info on what the last search matched.
2664 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2665 All the elements are markers or nil (nil if the Nth pair didn't match)
2666 if the last match was on a buffer; integers or nil if a string was matched.
2667 Use `store-match-data' to reinstate the data in this list.
2668
2669 If INTEGERS (the optional first argument) is non-nil, always use integers
2670 \(rather than markers) to represent buffer positions.
2671 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2672 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2673
2674 Return value is undefined if the last search failed.  */)
2675      (integers, reuse)
2676      Lisp_Object integers, reuse;
2677 {
2678   Lisp_Object tail, prev;
2679   Lisp_Object *data;
2680   int i, len;
2681
2682   if (NILP (last_thing_searched))
2683     return Qnil;
2684
2685   prev = Qnil;
2686
2687   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2688                                  * sizeof (Lisp_Object));
2689
2690   len = -1;
2691   for (i = 0; i < search_regs.num_regs; i++)
2692     {
2693       int start = search_regs.start[i];
2694       if (start >= 0)
2695         {
2696           if (EQ (last_thing_searched, Qt)
2697               || ! NILP (integers))
2698             {
2699               XSETFASTINT (data[2 * i], start);
2700               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2701             }
2702           else if (BUFFERP (last_thing_searched))
2703             {
2704               data[2 * i] = Fmake_marker ();
2705               Fset_marker (data[2 * i],
2706                            make_number (start),
2707                            last_thing_searched);
2708               data[2 * i + 1] = Fmake_marker ();
2709               Fset_marker (data[2 * i + 1],
2710                            make_number (search_regs.end[i]),
2711                            last_thing_searched);
2712             }
2713           else
2714             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2715             abort ();
2716
2717           len = i;
2718         }
2719       else
2720         data[2 * i] = data [2 * i + 1] = Qnil;
2721     }
2722
2723   /* If REUSE is not usable, cons up the values and return them.  */
2724   if (! CONSP (reuse))
2725     return Flist (2 * len + 2, data);
2726
2727   /* If REUSE is a list, store as many value elements as will fit
2728      into the elements of REUSE.  */
2729   for (i = 0, tail = reuse; CONSP (tail);
2730        i++, tail = XCDR (tail))
2731     {
2732       if (i < 2 * len + 2)
2733         XSETCAR (tail, data[i]);
2734       else
2735         XSETCAR (tail, Qnil);
2736       prev = tail;
2737     }
2738
2739   /* If we couldn't fit all value elements into REUSE,
2740      cons up the rest of them and add them to the end of REUSE.  */
2741   if (i < 2 * len + 2)
2742     XSETCDR (prev, Flist (2 * len + 2 - i, data + i));
2743
2744   return reuse;
2745 }
2746
2747
2748 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2749        doc: /* Set internal data on last search match from elements of LIST.
2750 LIST should have been created by calling `match-data' previously.  */)
2751      (list)
2752      register Lisp_Object list;
2753 {
2754   register int i;
2755   register Lisp_Object marker;
2756
2757   if (running_asynch_code)
2758     save_search_regs ();
2759
2760   if (!CONSP (list) && !NILP (list))
2761     list = wrong_type_argument (Qconsp, list);
2762
2763   /* Unless we find a marker with a buffer in LIST, assume that this
2764      match data came from a string.  */
2765   last_thing_searched = Qt;
2766
2767   /* Allocate registers if they don't already exist.  */
2768   {
2769     int length = XFASTINT (Flength (list)) / 2;
2770
2771     if (length > search_regs.num_regs)
2772       {
2773         if (search_regs.num_regs == 0)
2774           {
2775             search_regs.start
2776               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2777             search_regs.end
2778               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2779           }
2780         else
2781           {
2782             search_regs.start
2783               = (regoff_t *) xrealloc (search_regs.start,
2784                                        length * sizeof (regoff_t));
2785             search_regs.end
2786               = (regoff_t *) xrealloc (search_regs.end,
2787                                        length * sizeof (regoff_t));
2788           }
2789
2790         for (i = search_regs.num_regs; i < length; i++)
2791           search_regs.start[i] = -1;
2792
2793         search_regs.num_regs = length;
2794       }
2795   }
2796
2797   for (i = 0; i < search_regs.num_regs; i++)
2798     {
2799       marker = Fcar (list);
2800       if (NILP (marker))
2801         {
2802           search_regs.start[i] = -1;
2803           list = Fcdr (list);
2804         }
2805       else
2806         {
2807           int from;
2808
2809           if (MARKERP (marker))
2810             {
2811               if (XMARKER (marker)->buffer == 0)
2812                 XSETFASTINT (marker, 0);
2813               else
2814                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2815             }
2816
2817           CHECK_NUMBER_COERCE_MARKER (marker);
2818           from = XINT (marker);
2819           list = Fcdr (list);
2820
2821           marker = Fcar (list);
2822           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2823             XSETFASTINT (marker, 0);
2824
2825           CHECK_NUMBER_COERCE_MARKER (marker);
2826           search_regs.start[i] = from;
2827           search_regs.end[i] = XINT (marker);
2828         }
2829       list = Fcdr (list);
2830     }
2831
2832   return Qnil;
2833 }
2834
2835 /* If non-zero the match data have been saved in saved_search_regs
2836    during the execution of a sentinel or filter. */
2837 static int search_regs_saved;
2838 static struct re_registers saved_search_regs;
2839
2840 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2841    if asynchronous code (filter or sentinel) is running. */
2842 static void
2843 save_search_regs ()
2844 {
2845   if (!search_regs_saved)
2846     {
2847       saved_search_regs.num_regs = search_regs.num_regs;
2848       saved_search_regs.start = search_regs.start;
2849       saved_search_regs.end = search_regs.end;
2850       search_regs.num_regs = 0;
2851       search_regs.start = 0;
2852       search_regs.end = 0;
2853
2854       search_regs_saved = 1;
2855     }
2856 }
2857
2858 /* Called upon exit from filters and sentinels. */
2859 void
2860 restore_match_data ()
2861 {
2862   if (search_regs_saved)
2863     {
2864       if (search_regs.num_regs > 0)
2865         {
2866           xfree (search_regs.start);
2867           xfree (search_regs.end);
2868         }
2869       search_regs.num_regs = saved_search_regs.num_regs;
2870       search_regs.start = saved_search_regs.start;
2871       search_regs.end = saved_search_regs.end;
2872
2873       search_regs_saved = 0;
2874     }
2875 }
2876
2877 /* Quote a string to inactivate reg-expr chars */
2878
2879 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2880        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2881      (string)
2882      Lisp_Object string;
2883 {
2884   register unsigned char *in, *out, *end;
2885   register unsigned char *temp;
2886   int backslashes_added = 0;
2887
2888   CHECK_STRING (string);
2889
2890   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2891
2892   /* Now copy the data into the new string, inserting escapes. */
2893
2894   in = SDATA (string);
2895   end = in + SBYTES (string);
2896   out = temp;
2897
2898   for (; in != end; in++)
2899     {
2900       if (*in == '[' || *in == ']'
2901           || *in == '*' || *in == '.' || *in == '\\'
2902           || *in == '?' || *in == '+'
2903           || *in == '^' || *in == '$')
2904         *out++ = '\\', backslashes_added++;
2905       *out++ = *in;
2906     }
2907
2908   return make_specified_string (temp,
2909                                 SCHARS (string) + backslashes_added,
2910                                 out - temp,
2911                                 STRING_MULTIBYTE (string));
2912 }
2913 \f
2914 void
2915 syms_of_search ()
2916 {
2917   register int i;
2918
2919   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2920     {
2921       searchbufs[i].buf.allocated = 100;
2922       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
2923       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2924       searchbufs[i].regexp = Qnil;
2925       staticpro (&searchbufs[i].regexp);
2926       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2927     }
2928   searchbuf_head = &searchbufs[0];
2929
2930   Qsearch_failed = intern ("search-failed");
2931   staticpro (&Qsearch_failed);
2932   Qinvalid_regexp = intern ("invalid-regexp");
2933   staticpro (&Qinvalid_regexp);
2934
2935   Fput (Qsearch_failed, Qerror_conditions,
2936         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2937   Fput (Qsearch_failed, Qerror_message,
2938         build_string ("Search failed"));
2939
2940   Fput (Qinvalid_regexp, Qerror_conditions,
2941         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2942   Fput (Qinvalid_regexp, Qerror_message,
2943         build_string ("Invalid regexp"));
2944
2945   last_thing_searched = Qnil;
2946   staticpro (&last_thing_searched);
2947
2948   defsubr (&Slooking_at);
2949   defsubr (&Sposix_looking_at);
2950   defsubr (&Sstring_match);
2951   defsubr (&Sposix_string_match);
2952   defsubr (&Ssearch_forward);
2953   defsubr (&Ssearch_backward);
2954   defsubr (&Sword_search_forward);
2955   defsubr (&Sword_search_backward);
2956   defsubr (&Sre_search_forward);
2957   defsubr (&Sre_search_backward);
2958   defsubr (&Sposix_search_forward);
2959   defsubr (&Sposix_search_backward);
2960   defsubr (&Sreplace_match);
2961   defsubr (&Smatch_beginning);
2962   defsubr (&Smatch_end);
2963   defsubr (&Smatch_data);
2964   defsubr (&Sset_match_data);
2965   defsubr (&Sregexp_quote);
2966 }
2967
2968 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
2969    (do not change this comment) */