src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
   3              Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 static void set_search_regs ();
  87 static void save_search_regs ();
  88 static int simple_search ();
  89 static int boyer_moore ();
  90 static int search_buffer ();
  91
  92 static void
  93 matcher_overflow ()
  94 {
  95   error ("Stack overflow in regexp matcher");
  96 }
  97
  98 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  99    PATTERN is the pattern to compile.
 100    CP is the place to put the result.
 101    TRANSLATE is a translation table for ignoring case, or nil for none.
 102    REGP is the structure that says where to store the "register"
 103    values that will result from matching this pattern.
 104    If it is 0, we should compile the pattern not to record any
 105    subexpression bounds.
 106    POSIX is nonzero if we want full backtracking (POSIX style)
 107    for this pattern.  0 means backtrack only enough to get a valid match.
 108    MULTIBYTE is nonzero if we want to handle multibyte characters in
 109    PATTERN.  0 means all multibyte characters are recognized just as
 110    sequences of binary data.  */
 111
 112 static void
 113 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 114      struct regexp_cache *cp;
 115      Lisp_Object pattern;
 116      Lisp_Object translate;
 117      struct re_registers *regp;
 118      int posix;
 119      int multibyte;
 120 {
 121   unsigned char *raw_pattern;
 122   int raw_pattern_size;
 123   char *val;
 124   reg_syntax_t old;
 125
 126   /* MULTIBYTE says whether the text to be searched is multibyte.
 127      We must convert PATTERN to match that, or we will not really
 128      find things right.  */
 129
 130   if (multibyte == STRING_MULTIBYTE (pattern))
 131     {
 132       raw_pattern = (unsigned char *) SDATA (pattern);
 133       raw_pattern_size = SBYTES (pattern);
 134     }
 135   else if (multibyte)
 136     {
 137       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 138                                                   SCHARS (pattern));
 139       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 140       copy_text (SDATA (pattern), raw_pattern,
 141                  SCHARS (pattern), 0, 1);
 142     }
 143   else
 144     {
 145       /* Converting multibyte to single-byte.
 146
 147          ??? Perhaps this conversion should be done in a special way
 148          by subtracting nonascii-insert-offset from each non-ASCII char,
 149          so that only the multibyte chars which really correspond to
 150          the chosen single-byte character set can possibly match.  */
 151       raw_pattern_size = SCHARS (pattern);
 152       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 153       copy_text (SDATA (pattern), raw_pattern,
 154                  SBYTES (pattern), 1, 0);
 155     }
 156
 157   cp->regexp = Qnil;
 158   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 159   cp->posix = posix;
 160   cp->buf.multibyte = multibyte;
 161   BLOCK_INPUT;
 162   old = re_set_syntax (RE_SYNTAX_EMACS
 163                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 164   val = (char *) re_compile_pattern ((char *)raw_pattern,
 165                                      raw_pattern_size, &cp->buf);
 166   re_set_syntax (old);
 167   UNBLOCK_INPUT;
 168   if (val)
 169     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 170
 171   cp->regexp = Fcopy_sequence (pattern);
 172 }
 173
 174 /* Shrink each compiled regexp buffer in the cache
 175    to the size actually used right now.
 176    This is called from garbage collection.  */
 177
 178 void
 179 shrink_regexp_cache ()
 180 {
 181   struct regexp_cache *cp;
 182
 183   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 184     {
 185       cp->buf.allocated = cp->buf.used;
 186       cp->buf.buffer
 187         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 188     }
 189 }
 190
 191 /* Compile a regexp if necessary, but first check to see if there's one in
 192    the cache.
 193    PATTERN is the pattern to compile.
 194    TRANSLATE is a translation table for ignoring case, or nil for none.
 195    REGP is the structure that says where to store the "register"
 196    values that will result from matching this pattern.
 197    If it is 0, we should compile the pattern not to record any
 198    subexpression bounds.
 199    POSIX is nonzero if we want full backtracking (POSIX style)
 200    for this pattern.  0 means backtrack only enough to get a valid match.  */
 201
 202 struct re_pattern_buffer *
 203 compile_pattern (pattern, regp, translate, posix, multibyte)
 204      Lisp_Object pattern;
 205      struct re_registers *regp;
 206      Lisp_Object translate;
 207      int posix, multibyte;
 208 {
 209   struct regexp_cache *cp, **cpp;
 210
 211   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 212     {
 213       cp = *cpp;
 214       /* Entries are initialized to nil, and may be set to nil by
 215          compile_pattern_1 if the pattern isn't valid.  Don't apply
 216          string accessors in those cases.  However, compile_pattern_1
 217          is only applied to the cache entry we pick here to reuse.  So
 218          nil should never appear before a non-nil entry.  */
 219       if (NILP (cp->regexp))
 220         goto compile_it;
 221       if (SCHARS (cp->regexp) == SCHARS (pattern)
 222           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 223           && !NILP (Fstring_equal (cp->regexp, pattern))
 224           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 225           && cp->posix == posix
 226           && cp->buf.multibyte == multibyte)
 227         break;
 228
 229       /* If we're at the end of the cache, compile into the nil cell
 230          we found, or the last (least recently used) cell with a
 231          string value.  */
 232       if (cp->next == 0)
 233         {
 234         compile_it:
 235           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 236           break;
 237         }
 238     }
 239
 240   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 241      either because we found it in the cache or because we just compiled it.
 242      Move it to the front of the queue to mark it as most recently used.  */
 243   *cpp = cp->next;
 244   cp->next = searchbuf_head;
 245   searchbuf_head = cp;
 246
 247   /* Advise the searching functions about the space we have allocated
 248      for register data.  */
 249   if (regp)
 250     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 251
 252   return &cp->buf;
 253 }
 254
 255 /* Error condition used for failing searches */
 256 Lisp_Object Qsearch_failed;
 257
 258 Lisp_Object
 259 signal_failure (arg)
 260      Lisp_Object arg;
 261 {
 262   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 263   return Qnil;
 264 }
 265 \f
 266 static Lisp_Object
 267 looking_at_1 (string, posix)
 268      Lisp_Object string;
 269      int posix;
 270 {
 271   Lisp_Object val;
 272   unsigned char *p1, *p2;
 273   int s1, s2;
 274   register int i;
 275   struct re_pattern_buffer *bufp;
 276
 277   if (running_asynch_code)
 278     save_search_regs ();
 279
 280   CHECK_STRING (string);
 281   bufp = compile_pattern (string, &search_regs,
 282                           (!NILP (current_buffer->case_fold_search)
 283                            ? DOWNCASE_TABLE : Qnil),
 284                           posix,
 285                           !NILP (current_buffer->enable_multibyte_characters));
 286
 287   immediate_quit = 1;
 288   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 289
 290   /* Get pointers and sizes of the two strings
 291      that make up the visible portion of the buffer. */
 292
 293   p1 = BEGV_ADDR;
 294   s1 = GPT_BYTE - BEGV_BYTE;
 295   p2 = GAP_END_ADDR;
 296   s2 = ZV_BYTE - GPT_BYTE;
 297   if (s1 < 0)
 298     {
 299       p2 = p1;
 300       s2 = ZV_BYTE - BEGV_BYTE;
 301       s1 = 0;
 302     }
 303   if (s2 < 0)
 304     {
 305       s1 = ZV_BYTE - BEGV_BYTE;
 306       s2 = 0;
 307     }
 308
 309   re_match_object = Qnil;
 310
 311   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 312                   PT_BYTE - BEGV_BYTE, &search_regs,
 313                   ZV_BYTE - BEGV_BYTE);
 314   immediate_quit = 0;
 315
 316   if (i == -2)
 317     matcher_overflow ();
 318
 319   val = (0 <= i ? Qt : Qnil);
 320   if (i >= 0)
 321     for (i = 0; i < search_regs.num_regs; i++)
 322       if (search_regs.start[i] >= 0)
 323         {
 324           search_regs.start[i]
 325             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 326           search_regs.end[i]
 327             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 328         }
 329   XSETBUFFER (last_thing_searched, current_buffer);
 330   return val;
 331 }
 332
 333 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 334        doc: /* Return t if text after point matches regular expression REGEXP.
 335 This function modifies the match data that `match-beginning',
 336 `match-end' and `match-data' access; save and restore the match
 337 data if you want to preserve them.  */)
 338      (regexp)
 339      Lisp_Object regexp;
 340 {
 341   return looking_at_1 (regexp, 0);
 342 }
 343
 344 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 345        doc: /* Return t if text after point matches regular expression REGEXP.
 346 Find the longest match, in accord with Posix regular expression rules.
 347 This function modifies the match data that `match-beginning',
 348 `match-end' and `match-data' access; save and restore the match
 349 data if you want to preserve them.  */)
 350      (regexp)
 351      Lisp_Object regexp;
 352 {
 353   return looking_at_1 (regexp, 1);
 354 }
 355 \f
 356 static Lisp_Object
 357 string_match_1 (regexp, string, start, posix)
 358      Lisp_Object regexp, string, start;
 359      int posix;
 360 {
 361   int val;
 362   struct re_pattern_buffer *bufp;
 363   int pos, pos_byte;
 364   int i;
 365
 366   if (running_asynch_code)
 367     save_search_regs ();
 368
 369   CHECK_STRING (regexp);
 370   CHECK_STRING (string);
 371
 372   if (NILP (start))
 373     pos = 0, pos_byte = 0;
 374   else
 375     {
 376       int len = SCHARS (string);
 377
 378       CHECK_NUMBER (start);
 379       pos = XINT (start);
 380       if (pos < 0 && -pos <= len)
 381         pos = len + pos;
 382       else if (0 > pos || pos > len)
 383         args_out_of_range (string, start);
 384       pos_byte = string_char_to_byte (string, pos);
 385     }
 386
 387   bufp = compile_pattern (regexp, &search_regs,
 388                           (!NILP (current_buffer->case_fold_search)
 389                            ? DOWNCASE_TABLE : Qnil),
 390                           posix,
 391                           STRING_MULTIBYTE (string));
 392   immediate_quit = 1;
 393   re_match_object = string;
 394
 395   val = re_search (bufp, (char *) SDATA (string),
 396                    SBYTES (string), pos_byte,
 397                    SBYTES (string) - pos_byte,
 398                    &search_regs);
 399   immediate_quit = 0;
 400   last_thing_searched = Qt;
 401   if (val == -2)
 402     matcher_overflow ();
 403   if (val < 0) return Qnil;
 404
 405   for (i = 0; i < search_regs.num_regs; i++)
 406     if (search_regs.start[i] >= 0)
 407       {
 408         search_regs.start[i]
 409           = string_byte_to_char (string, search_regs.start[i]);
 410         search_regs.end[i]
 411           = string_byte_to_char (string, search_regs.end[i]);
 412       }
 413
 414   return make_number (string_byte_to_char (string, val));
 415 }
 416
 417 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 418        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 419 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 420 If third arg START is non-nil, start search at that index in STRING.
 421 For index of first char beyond the match, do (match-end 0).
 422 `match-end' and `match-beginning' also give indices of substrings
 423 matched by parenthesis constructs in the pattern.
 424
 425 You can use the function `match-string' to extract the substrings
 426 matched by the parenthesis constructions in REGEXP. */)
 427      (regexp, string, start)
 428      Lisp_Object regexp, string, start;
 429 {
 430   return string_match_1 (regexp, string, start, 0);
 431 }
 432
 433 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 434        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 435 Find the longest match, in accord with Posix regular expression rules.
 436 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 437 If third arg START is non-nil, start search at that index in STRING.
 438 For index of first char beyond the match, do (match-end 0).
 439 `match-end' and `match-beginning' also give indices of substrings
 440 matched by parenthesis constructs in the pattern.  */)
 441      (regexp, string, start)
 442      Lisp_Object regexp, string, start;
 443 {
 444   return string_match_1 (regexp, string, start, 1);
 445 }
 446
 447 /* Match REGEXP against STRING, searching all of STRING,
 448    and return the index of the match, or negative on failure.
 449    This does not clobber the match data.  */
 450
 451 int
 452 fast_string_match (regexp, string)
 453      Lisp_Object regexp, string;
 454 {
 455   int val;
 456   struct re_pattern_buffer *bufp;
 457
 458   bufp = compile_pattern (regexp, 0, Qnil,
 459                           0, STRING_MULTIBYTE (string));
 460   immediate_quit = 1;
 461   re_match_object = string;
 462
 463   val = re_search (bufp, (char *) SDATA (string),
 464                    SBYTES (string), 0,
 465                    SBYTES (string), 0);
 466   immediate_quit = 0;
 467   return val;
 468 }
 469
 470 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 471    and return the index of the match, or negative on failure.
 472    This does not clobber the match data.
 473    We assume that STRING contains single-byte characters.  */
 474
 475 extern Lisp_Object Vascii_downcase_table;
 476
 477 int
 478 fast_c_string_match_ignore_case (regexp, string)
 479      Lisp_Object regexp;
 480      const char *string;
 481 {
 482   int val;
 483   struct re_pattern_buffer *bufp;
 484   int len = strlen (string);
 485
 486   regexp = string_make_unibyte (regexp);
 487   re_match_object = Qt;
 488   bufp = compile_pattern (regexp, 0,
 489                           Vascii_downcase_table, 0,
 490                           0);
 491   immediate_quit = 1;
 492   val = re_search (bufp, string, len, 0, len, 0);
 493   immediate_quit = 0;
 494   return val;
 495 }
 496
 497 /* Like fast_string_match but ignore case.  */
 498
 499 int
 500 fast_string_match_ignore_case (regexp, string)
 501      Lisp_Object regexp, string;
 502 {
 503   int val;
 504   struct re_pattern_buffer *bufp;
 505
 506   bufp = compile_pattern (regexp, 0, Vascii_downcase_table,
 507                           0, STRING_MULTIBYTE (string));
 508   immediate_quit = 1;
 509   re_match_object = string;
 510
 511   val = re_search (bufp, (char *) SDATA (string),
 512                    SBYTES (string), 0,
 513                    SBYTES (string), 0);
 514   immediate_quit = 0;
 515   return val;
 516 }
 517 \f
 518 /* The newline cache: remembering which sections of text have no newlines.  */
 519
 520 /* If the user has requested newline caching, make sure it's on.
 521    Otherwise, make sure it's off.
 522    This is our cheezy way of associating an action with the change of
 523    state of a buffer-local variable.  */
 524 static void
 525 newline_cache_on_off (buf)
 526      struct buffer *buf;
 527 {
 528   if (NILP (buf->cache_long_line_scans))
 529     {
 530       /* It should be off.  */
 531       if (buf->newline_cache)
 532         {
 533           free_region_cache (buf->newline_cache);
 534           buf->newline_cache = 0;
 535         }
 536     }
 537   else
 538     {
 539       /* It should be on.  */
 540       if (buf->newline_cache == 0)
 541         buf->newline_cache = new_region_cache ();
 542     }
 543 }
 544
 545 \f
 546 /* Search for COUNT instances of the character TARGET between START and END.
 547
 548    If COUNT is positive, search forwards; END must be >= START.
 549    If COUNT is negative, search backwards for the -COUNTth instance;
 550       END must be <= START.
 551    If COUNT is zero, do anything you please; run rogue, for all I care.
 552
 553    If END is zero, use BEGV or ZV instead, as appropriate for the
 554    direction indicated by COUNT.
 555
 556    If we find COUNT instances, set *SHORTAGE to zero, and return the
 557    position past the COUNTth match.  Note that for reverse motion
 558    this is not the same as the usual convention for Emacs motion commands.
 559
 560    If we don't find COUNT instances before reaching END, set *SHORTAGE
 561    to the number of TARGETs left unfound, and return END.
 562
 563    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 564    except when inside redisplay.  */
 565
 566 int
 567 scan_buffer (target, start, end, count, shortage, allow_quit)
 568      register int target;
 569      int start, end;
 570      int count;
 571      int *shortage;
 572      int allow_quit;
 573 {
 574   struct region_cache *newline_cache;
 575   int direction;
 576
 577   if (count > 0)
 578     {
 579       direction = 1;
 580       if (! end) end = ZV;
 581     }
 582   else
 583     {
 584       direction = -1;
 585       if (! end) end = BEGV;
 586     }
 587
 588   newline_cache_on_off (current_buffer);
 589   newline_cache = current_buffer->newline_cache;
 590
 591   if (shortage != 0)
 592     *shortage = 0;
 593
 594   immediate_quit = allow_quit;
 595
 596   if (count > 0)
 597     while (start != end)
 598       {
 599         /* Our innermost scanning loop is very simple; it doesn't know
 600            about gaps, buffer ends, or the newline cache.  ceiling is
 601            the position of the last character before the next such
 602            obstacle --- the last character the dumb search loop should
 603            examine.  */
 604         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 605         int start_byte = CHAR_TO_BYTE (start);
 606         int tem;
 607
 608         /* If we're looking for a newline, consult the newline cache
 609            to see where we can avoid some scanning.  */
 610         if (target == '\n' && newline_cache)
 611           {
 612             int next_change;
 613             immediate_quit = 0;
 614             while (region_cache_forward
 615                    (current_buffer, newline_cache, start_byte, &next_change))
 616               start_byte = next_change;
 617             immediate_quit = allow_quit;
 618
 619             /* START should never be after END.  */
 620             if (start_byte > ceiling_byte)
 621               start_byte = ceiling_byte;
 622
 623             /* Now the text after start is an unknown region, and
 624                next_change is the position of the next known region. */
 625             ceiling_byte = min (next_change - 1, ceiling_byte);
 626           }
 627
 628         /* The dumb loop can only scan text stored in contiguous
 629            bytes. BUFFER_CEILING_OF returns the last character
 630            position that is contiguous, so the ceiling is the
 631            position after that.  */
 632         tem = BUFFER_CEILING_OF (start_byte);
 633         ceiling_byte = min (tem, ceiling_byte);
 634
 635         {
 636           /* The termination address of the dumb loop.  */
 637           register unsigned char *ceiling_addr
 638             = BYTE_POS_ADDR (ceiling_byte) + 1;
 639           register unsigned char *cursor
 640             = BYTE_POS_ADDR (start_byte);
 641           unsigned char *base = cursor;
 642
 643           while (cursor < ceiling_addr)
 644             {
 645               unsigned char *scan_start = cursor;
 646
 647               /* The dumb loop.  */
 648               while (*cursor != target && ++cursor < ceiling_addr)
 649                 ;
 650
 651               /* If we're looking for newlines, cache the fact that
 652                  the region from start to cursor is free of them. */
 653               if (target == '\n' && newline_cache)
 654                 know_region_cache (current_buffer, newline_cache,
 655                                    start_byte + scan_start - base,
 656                                    start_byte + cursor - base);
 657
 658               /* Did we find the target character?  */
 659               if (cursor < ceiling_addr)
 660                 {
 661                   if (--count == 0)
 662                     {
 663                       immediate_quit = 0;
 664                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 665                     }
 666                   cursor++;
 667                 }
 668             }
 669
 670           start = BYTE_TO_CHAR (start_byte + cursor - base);
 671         }
 672       }
 673   else
 674     while (start > end)
 675       {
 676         /* The last character to check before the next obstacle.  */
 677         int ceiling_byte = CHAR_TO_BYTE (end);
 678         int start_byte = CHAR_TO_BYTE (start);
 679         int tem;
 680
 681         /* Consult the newline cache, if appropriate.  */
 682         if (target == '\n' && newline_cache)
 683           {
 684             int next_change;
 685             immediate_quit = 0;
 686             while (region_cache_backward
 687                    (current_buffer, newline_cache, start_byte, &next_change))
 688               start_byte = next_change;
 689             immediate_quit = allow_quit;
 690
 691             /* Start should never be at or before end.  */
 692             if (start_byte <= ceiling_byte)
 693               start_byte = ceiling_byte + 1;
 694
 695             /* Now the text before start is an unknown region, and
 696                next_change is the position of the next known region. */
 697             ceiling_byte = max (next_change, ceiling_byte);
 698           }
 699
 700         /* Stop scanning before the gap.  */
 701         tem = BUFFER_FLOOR_OF (start_byte - 1);
 702         ceiling_byte = max (tem, ceiling_byte);
 703
 704         {
 705           /* The termination address of the dumb loop.  */
 706           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 707           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 708           unsigned char *base = cursor;
 709
 710           while (cursor >= ceiling_addr)
 711             {
 712               unsigned char *scan_start = cursor;
 713
 714               while (*cursor != target && --cursor >= ceiling_addr)
 715                 ;
 716
 717               /* If we're looking for newlines, cache the fact that
 718                  the region from after the cursor to start is free of them.  */
 719               if (target == '\n' && newline_cache)
 720                 know_region_cache (current_buffer, newline_cache,
 721                                    start_byte + cursor - base,
 722                                    start_byte + scan_start - base);
 723
 724               /* Did we find the target character?  */
 725               if (cursor >= ceiling_addr)
 726                 {
 727                   if (++count >= 0)
 728                     {
 729                       immediate_quit = 0;
 730                       return BYTE_TO_CHAR (start_byte + cursor - base);
 731                     }
 732                   cursor--;
 733                 }
 734             }
 735
 736           start = BYTE_TO_CHAR (start_byte + cursor - base);
 737         }
 738       }
 739
 740   immediate_quit = 0;
 741   if (shortage != 0)
 742     *shortage = count * direction;
 743   return start;
 744 }
 745 \f
 746 /* Search for COUNT instances of a line boundary, which means either a
 747    newline or (if selective display enabled) a carriage return.
 748    Start at START.  If COUNT is negative, search backwards.
 749
 750    We report the resulting position by calling TEMP_SET_PT_BOTH.
 751
 752    If we find COUNT instances. we position after (always after,
 753    even if scanning backwards) the COUNTth match, and return 0.
 754
 755    If we don't find COUNT instances before reaching the end of the
 756    buffer (or the beginning, if scanning backwards), we return
 757    the number of line boundaries left unfound, and position at
 758    the limit we bumped up against.
 759
 760    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 761    except in special cases.  */
 762
 763 int
 764 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 765      int start, start_byte;
 766      int limit, limit_byte;
 767      register int count;
 768      int allow_quit;
 769 {
 770   int direction = ((count > 0) ? 1 : -1);
 771
 772   register unsigned char *cursor;
 773   unsigned char *base;
 774
 775   register int ceiling;
 776   register unsigned char *ceiling_addr;
 777
 778   int old_immediate_quit = immediate_quit;
 779
 780   /* The code that follows is like scan_buffer
 781      but checks for either newline or carriage return.  */
 782
 783   if (allow_quit)
 784     immediate_quit++;
 785
 786   start_byte = CHAR_TO_BYTE (start);
 787
 788   if (count > 0)
 789     {
 790       while (start_byte < limit_byte)
 791         {
 792           ceiling =  BUFFER_CEILING_OF (start_byte);
 793           ceiling = min (limit_byte - 1, ceiling);
 794           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 795           base = (cursor = BYTE_POS_ADDR (start_byte));
 796           while (1)
 797             {
 798               while (*cursor != '\n' && ++cursor != ceiling_addr)
 799                 ;
 800
 801               if (cursor != ceiling_addr)
 802                 {
 803                   if (--count == 0)
 804                     {
 805                       immediate_quit = old_immediate_quit;
 806                       start_byte = start_byte + cursor - base + 1;
 807                       start = BYTE_TO_CHAR (start_byte);
 808                       TEMP_SET_PT_BOTH (start, start_byte);
 809                       return 0;
 810                     }
 811                   else
 812                     if (++cursor == ceiling_addr)
 813                       break;
 814                 }
 815               else
 816                 break;
 817             }
 818           start_byte += cursor - base;
 819         }
 820     }
 821   else
 822     {
 823       while (start_byte > limit_byte)
 824         {
 825           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 826           ceiling = max (limit_byte, ceiling);
 827           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 828           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 829           while (1)
 830             {
 831               while (--cursor != ceiling_addr && *cursor != '\n')
 832                 ;
 833
 834               if (cursor != ceiling_addr)
 835                 {
 836                   if (++count == 0)
 837                     {
 838                       immediate_quit = old_immediate_quit;
 839                       /* Return the position AFTER the match we found.  */
 840                       start_byte = start_byte + cursor - base + 1;
 841                       start = BYTE_TO_CHAR (start_byte);
 842                       TEMP_SET_PT_BOTH (start, start_byte);
 843                       return 0;
 844                     }
 845                 }
 846               else
 847                 break;
 848             }
 849           /* Here we add 1 to compensate for the last decrement
 850              of CURSOR, which took it past the valid range.  */
 851           start_byte += cursor - base + 1;
 852         }
 853     }
 854
 855   TEMP_SET_PT_BOTH (limit, limit_byte);
 856   immediate_quit = old_immediate_quit;
 857
 858   return count * direction;
 859 }
 860
 861 int
 862 find_next_newline_no_quit (from, cnt)
 863      register int from, cnt;
 864 {
 865   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 866 }
 867
 868 /* Like find_next_newline, but returns position before the newline,
 869    not after, and only search up to TO.  This isn't just
 870    find_next_newline (...)-1, because you might hit TO.  */
 871
 872 int
 873 find_before_next_newline (from, to, cnt)
 874      int from, to, cnt;
 875 {
 876   int shortage;
 877   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 878
 879   if (shortage == 0)
 880     pos--;
 881
 882   return pos;
 883 }
 884 \f
 885 /* Subroutines of Lisp buffer search functions. */
 886
 887 static Lisp_Object
 888 search_command (string, bound, noerror, count, direction, RE, posix)
 889      Lisp_Object string, bound, noerror, count;
 890      int direction;
 891      int RE;
 892      int posix;
 893 {
 894   register int np;
 895   int lim, lim_byte;
 896   int n = direction;
 897
 898   if (!NILP (count))
 899     {
 900       CHECK_NUMBER (count);
 901       n *= XINT (count);
 902     }
 903
 904   CHECK_STRING (string);
 905   if (NILP (bound))
 906     {
 907       if (n > 0)
 908         lim = ZV, lim_byte = ZV_BYTE;
 909       else
 910         lim = BEGV, lim_byte = BEGV_BYTE;
 911     }
 912   else
 913     {
 914       CHECK_NUMBER_COERCE_MARKER (bound);
 915       lim = XINT (bound);
 916       if (n > 0 ? lim < PT : lim > PT)
 917         error ("Invalid search bound (wrong side of point)");
 918       if (lim > ZV)
 919         lim = ZV, lim_byte = ZV_BYTE;
 920       else if (lim < BEGV)
 921         lim = BEGV, lim_byte = BEGV_BYTE;
 922       else
 923         lim_byte = CHAR_TO_BYTE (lim);
 924     }
 925
 926   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 927                       (!NILP (current_buffer->case_fold_search)
 928                        ? current_buffer->case_canon_table
 929                        : Qnil),
 930                       (!NILP (current_buffer->case_fold_search)
 931                        ? current_buffer->case_eqv_table
 932                        : Qnil),
 933                       posix);
 934   if (np <= 0)
 935     {
 936       if (NILP (noerror))
 937         return signal_failure (string);
 938       if (!EQ (noerror, Qt))
 939         {
 940           if (lim < BEGV || lim > ZV)
 941             abort ();
 942           SET_PT_BOTH (lim, lim_byte);
 943           return Qnil;
 944 #if 0 /* This would be clean, but maybe programs depend on
 945          a value of nil here.  */
 946           np = lim;
 947 #endif
 948         }
 949       else
 950         return Qnil;
 951     }
 952
 953   if (np < BEGV || np > ZV)
 954     abort ();
 955
 956   SET_PT (np);
 957
 958   return make_number (np);
 959 }
 960 \f
 961 /* Return 1 if REGEXP it matches just one constant string.  */
 962
 963 static int
 964 trivial_regexp_p (regexp)
 965      Lisp_Object regexp;
 966 {
 967   int len = SBYTES (regexp);
 968   unsigned char *s = SDATA (regexp);
 969   while (--len >= 0)
 970     {
 971       switch (*s++)
 972         {
 973         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 974           return 0;
 975         case '\\':
 976           if (--len < 0)
 977             return 0;
 978           switch (*s++)
 979             {
 980             case '|': case '(': case ')': case '`': case '\'': case 'b':
 981             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 982             case 'S': case '=': case '{': case '}': case '_':
 983             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 984             case '1': case '2': case '3': case '4': case '5':
 985             case '6': case '7': case '8': case '9':
 986               return 0;
 987             }
 988         }
 989     }
 990   return 1;
 991 }
 992
 993 /* Search for the n'th occurrence of STRING in the current buffer,
 994    starting at position POS and stopping at position LIM,
 995    treating STRING as a literal string if RE is false or as
 996    a regular expression if RE is true.
 997
 998    If N is positive, searching is forward and LIM must be greater than POS.
 999    If N is negative, searching is backward and LIM must be less than POS.
1000
1001    Returns -x if x occurrences remain to be found (x > 0),
1002    or else the position at the beginning of the Nth occurrence
1003    (if searching backward) or the end (if searching forward).
1004
1005    POSIX is nonzero if we want full backtracking (POSIX style)
1006    for this pattern.  0 means backtrack only enough to get a valid match.  */
1007
1008 #define TRANSLATE(out, trt, d)                  \
1009 do                                              \
1010   {                                             \
1011     if (! NILP (trt))                           \
1012       {                                         \
1013         Lisp_Object temp;                       \
1014         temp = Faref (trt, make_number (d));    \
1015         if (INTEGERP (temp))                    \
1016           out = XINT (temp);                    \
1017         else                                    \
1018           out = d;                              \
1019       }                                         \
1020     else                                        \
1021       out = d;                                  \
1022   }                                             \
1023 while (0)
1024
1025 static int
1026 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1027                RE, trt, inverse_trt, posix)
1028      Lisp_Object string;
1029      int pos;
1030      int pos_byte;
1031      int lim;
1032      int lim_byte;
1033      int n;
1034      int RE;
1035      Lisp_Object trt;
1036      Lisp_Object inverse_trt;
1037      int posix;
1038 {
1039   int len = SCHARS (string);
1040   int len_byte = SBYTES (string);
1041   register int i;
1042
1043   if (running_asynch_code)
1044     save_search_regs ();
1045
1046   /* Searching 0 times means don't move.  */
1047   /* Null string is found at starting position.  */
1048   if (len == 0 || n == 0)
1049     {
1050       set_search_regs (pos_byte, 0);
1051       return pos;
1052     }
1053
1054   if (RE && !trivial_regexp_p (string))
1055     {
1056       unsigned char *p1, *p2;
1057       int s1, s2;
1058       struct re_pattern_buffer *bufp;
1059
1060       bufp = compile_pattern (string, &search_regs, trt, posix,
1061                               !NILP (current_buffer->enable_multibyte_characters));
1062
1063       immediate_quit = 1;       /* Quit immediately if user types ^G,
1064                                    because letting this function finish
1065                                    can take too long. */
1066       QUIT;                     /* Do a pending quit right away,
1067                                    to avoid paradoxical behavior */
1068       /* Get pointers and sizes of the two strings
1069          that make up the visible portion of the buffer. */
1070
1071       p1 = BEGV_ADDR;
1072       s1 = GPT_BYTE - BEGV_BYTE;
1073       p2 = GAP_END_ADDR;
1074       s2 = ZV_BYTE - GPT_BYTE;
1075       if (s1 < 0)
1076         {
1077           p2 = p1;
1078           s2 = ZV_BYTE - BEGV_BYTE;
1079           s1 = 0;
1080         }
1081       if (s2 < 0)
1082         {
1083           s1 = ZV_BYTE - BEGV_BYTE;
1084           s2 = 0;
1085         }
1086       re_match_object = Qnil;
1087
1088       while (n < 0)
1089         {
1090           int val;
1091           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1092                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1093                              &search_regs,
1094                              /* Don't allow match past current point */
1095                              pos_byte - BEGV_BYTE);
1096           if (val == -2)
1097             {
1098               matcher_overflow ();
1099             }
1100           if (val >= 0)
1101             {
1102               pos_byte = search_regs.start[0] + BEGV_BYTE;
1103               for (i = 0; i < search_regs.num_regs; i++)
1104                 if (search_regs.start[i] >= 0)
1105                   {
1106                     search_regs.start[i]
1107                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1108                     search_regs.end[i]
1109                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1110                   }
1111               XSETBUFFER (last_thing_searched, current_buffer);
1112               /* Set pos to the new position. */
1113               pos = search_regs.start[0];
1114             }
1115           else
1116             {
1117               immediate_quit = 0;
1118               return (n);
1119             }
1120           n++;
1121         }
1122       while (n > 0)
1123         {
1124           int val;
1125           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1126                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1127                              &search_regs,
1128                              lim_byte - BEGV_BYTE);
1129           if (val == -2)
1130             {
1131               matcher_overflow ();
1132             }
1133           if (val >= 0)
1134             {
1135               pos_byte = search_regs.end[0] + BEGV_BYTE;
1136               for (i = 0; i < search_regs.num_regs; i++)
1137                 if (search_regs.start[i] >= 0)
1138                   {
1139                     search_regs.start[i]
1140                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1141                     search_regs.end[i]
1142                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1143                   }
1144               XSETBUFFER (last_thing_searched, current_buffer);
1145               pos = search_regs.end[0];
1146             }
1147           else
1148             {
1149               immediate_quit = 0;
1150               return (0 - n);
1151             }
1152           n--;
1153         }
1154       immediate_quit = 0;
1155       return (pos);
1156     }
1157   else                          /* non-RE case */
1158     {
1159       unsigned char *raw_pattern, *pat;
1160       int raw_pattern_size;
1161       int raw_pattern_size_byte;
1162       unsigned char *patbuf;
1163       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1164       unsigned char *base_pat = SDATA (string);
1165       int charset_base = -1;
1166       int boyer_moore_ok = 1;
1167
1168       /* MULTIBYTE says whether the text to be searched is multibyte.
1169          We must convert PATTERN to match that, or we will not really
1170          find things right.  */
1171
1172       if (multibyte == STRING_MULTIBYTE (string))
1173         {
1174           raw_pattern = (unsigned char *) SDATA (string);
1175           raw_pattern_size = SCHARS (string);
1176           raw_pattern_size_byte = SBYTES (string);
1177         }
1178       else if (multibyte)
1179         {
1180           raw_pattern_size = SCHARS (string);
1181           raw_pattern_size_byte
1182             = count_size_as_multibyte (SDATA (string),
1183                                        raw_pattern_size);
1184           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1185           copy_text (SDATA (string), raw_pattern,
1186                      SCHARS (string), 0, 1);
1187         }
1188       else
1189         {
1190           /* Converting multibyte to single-byte.
1191
1192              ??? Perhaps this conversion should be done in a special way
1193              by subtracting nonascii-insert-offset from each non-ASCII char,
1194              so that only the multibyte chars which really correspond to
1195              the chosen single-byte character set can possibly match.  */
1196           raw_pattern_size = SCHARS (string);
1197           raw_pattern_size_byte = SCHARS (string);
1198           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1199           copy_text (SDATA (string), raw_pattern,
1200                      SBYTES (string), 1, 0);
1201         }
1202
1203       /* Copy and optionally translate the pattern.  */
1204       len = raw_pattern_size;
1205       len_byte = raw_pattern_size_byte;
1206       patbuf = (unsigned char *) alloca (len_byte);
1207       pat = patbuf;
1208       base_pat = raw_pattern;
1209       if (multibyte)
1210         {
1211           while (--len >= 0)
1212             {
1213               unsigned char str[MAX_MULTIBYTE_LENGTH];
1214               int c, translated, inverse;
1215               int in_charlen, charlen;
1216
1217               /* If we got here and the RE flag is set, it's because we're
1218                  dealing with a regexp known to be trivial, so the backslash
1219                  just quotes the next character.  */
1220               if (RE && *base_pat == '\\')
1221                 {
1222                   len--;
1223                   len_byte--;
1224                   base_pat++;
1225                 }
1226
1227               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1228
1229               /* Translate the character, if requested.  */
1230               TRANSLATE (translated, trt, c);
1231               /* If translation changed the byte-length, go back
1232                  to the original character.  */
1233               charlen = CHAR_STRING (translated, str);
1234               if (in_charlen != charlen)
1235                 {
1236                   translated = c;
1237                   charlen = CHAR_STRING (c, str);
1238                 }
1239
1240               /* If we are searching for something strange,
1241                  an invalid multibyte code, don't use boyer-moore.  */
1242               if (! ASCII_BYTE_P (translated)
1243                   && (charlen == 1 /* 8bit code */
1244                       || charlen != in_charlen /* invalid multibyte code */
1245                       ))
1246                 boyer_moore_ok = 0;
1247
1248               TRANSLATE (inverse, inverse_trt, c);
1249
1250               /* Did this char actually get translated?
1251                  Would any other char get translated into it?  */
1252               if (translated != c || inverse != c)
1253                 {
1254                   /* Keep track of which character set row
1255                      contains the characters that need translation.  */
1256                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1257                   int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1258
1259                   if (charset_base_code != inverse_charset_base)
1260                     boyer_moore_ok = 0;
1261                   else if (charset_base == -1)
1262                     charset_base = charset_base_code;
1263                   else if (charset_base != charset_base_code)
1264                     /* If two different rows appear, needing translation,
1265                        then we cannot use boyer_moore search.  */
1266                     boyer_moore_ok = 0;
1267                 }
1268
1269               /* Store this character into the translated pattern.  */
1270               bcopy (str, pat, charlen);
1271               pat += charlen;
1272               base_pat += in_charlen;
1273               len_byte -= in_charlen;
1274             }
1275         }
1276       else
1277         {
1278           /* Unibyte buffer.  */
1279           charset_base = 0;
1280           while (--len >= 0)
1281             {
1282               int c, translated;
1283
1284               /* If we got here and the RE flag is set, it's because we're
1285                  dealing with a regexp known to be trivial, so the backslash
1286                  just quotes the next character.  */
1287               if (RE && *base_pat == '\\')
1288                 {
1289                   len--;
1290                   base_pat++;
1291                 }
1292               c = *base_pat++;
1293               TRANSLATE (translated, trt, c);
1294               *pat++ = translated;
1295             }
1296         }
1297
1298       len_byte = pat - patbuf;
1299       len = raw_pattern_size;
1300       pat = base_pat = patbuf;
1301
1302       if (boyer_moore_ok)
1303         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1304                             pos, pos_byte, lim, lim_byte,
1305                             charset_base);
1306       else
1307         return simple_search (n, pat, len, len_byte, trt,
1308                               pos, pos_byte, lim, lim_byte);
1309     }
1310 }
1311 \f
1312 /* Do a simple string search N times for the string PAT,
1313    whose length is LEN/LEN_BYTE,
1314    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1315    TRT is the translation table.
1316
1317    Return the character position where the match is found.
1318    Otherwise, if M matches remained to be found, return -M.
1319
1320    This kind of search works regardless of what is in PAT and
1321    regardless of what is in TRT.  It is used in cases where
1322    boyer_moore cannot work.  */
1323
1324 static int
1325 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1326      int n;
1327      unsigned char *pat;
1328      int len, len_byte;
1329      Lisp_Object trt;
1330      int pos, pos_byte;
1331      int lim, lim_byte;
1332 {
1333   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1334   int forward = n > 0;
1335
1336   if (lim > pos && multibyte)
1337     while (n > 0)
1338       {
1339         while (1)
1340           {
1341             /* Try matching at position POS.  */
1342             int this_pos = pos;
1343             int this_pos_byte = pos_byte;
1344             int this_len = len;
1345             int this_len_byte = len_byte;
1346             unsigned char *p = pat;
1347             if (pos + len > lim)
1348               goto stop;
1349
1350             while (this_len > 0)
1351               {
1352                 int charlen, buf_charlen;
1353                 int pat_ch, buf_ch;
1354
1355                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1356                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1357                                                  ZV_BYTE - this_pos_byte,
1358                                                  buf_charlen);
1359                 TRANSLATE (buf_ch, trt, buf_ch);
1360
1361                 if (buf_ch != pat_ch)
1362                   break;
1363
1364                 this_len_byte -= charlen;
1365                 this_len--;
1366                 p += charlen;
1367
1368                 this_pos_byte += buf_charlen;
1369                 this_pos++;
1370               }
1371
1372             if (this_len == 0)
1373               {
1374                 pos += len;
1375                 pos_byte += len_byte;
1376                 break;
1377               }
1378
1379             INC_BOTH (pos, pos_byte);
1380           }
1381
1382         n--;
1383       }
1384   else if (lim > pos)
1385     while (n > 0)
1386       {
1387         while (1)
1388           {
1389             /* Try matching at position POS.  */
1390             int this_pos = pos;
1391             int this_len = len;
1392             unsigned char *p = pat;
1393
1394             if (pos + len > lim)
1395               goto stop;
1396
1397             while (this_len > 0)
1398               {
1399                 int pat_ch = *p++;
1400                 int buf_ch = FETCH_BYTE (this_pos);
1401                 TRANSLATE (buf_ch, trt, buf_ch);
1402
1403                 if (buf_ch != pat_ch)
1404                   break;
1405
1406                 this_len--;
1407                 this_pos++;
1408               }
1409
1410             if (this_len == 0)
1411               {
1412                 pos += len;
1413                 break;
1414               }
1415
1416             pos++;
1417           }
1418
1419         n--;
1420       }
1421   /* Backwards search.  */
1422   else if (lim < pos && multibyte)
1423     while (n < 0)
1424       {
1425         while (1)
1426           {
1427             /* Try matching at position POS.  */
1428             int this_pos = pos - len;
1429             int this_pos_byte = pos_byte - len_byte;
1430             int this_len = len;
1431             int this_len_byte = len_byte;
1432             unsigned char *p = pat;
1433
1434             if (pos - len < lim)
1435               goto stop;
1436
1437             while (this_len > 0)
1438               {
1439                 int charlen, buf_charlen;
1440                 int pat_ch, buf_ch;
1441
1442                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1443                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1444                                                  ZV_BYTE - this_pos_byte,
1445                                                  buf_charlen);
1446                 TRANSLATE (buf_ch, trt, buf_ch);
1447
1448                 if (buf_ch != pat_ch)
1449                   break;
1450
1451                 this_len_byte -= charlen;
1452                 this_len--;
1453                 p += charlen;
1454                 this_pos_byte += buf_charlen;
1455                 this_pos++;
1456               }
1457
1458             if (this_len == 0)
1459               {
1460                 pos -= len;
1461                 pos_byte -= len_byte;
1462                 break;
1463               }
1464
1465             DEC_BOTH (pos, pos_byte);
1466           }
1467
1468         n++;
1469       }
1470   else if (lim < pos)
1471     while (n < 0)
1472       {
1473         while (1)
1474           {
1475             /* Try matching at position POS.  */
1476             int this_pos = pos - len;
1477             int this_len = len;
1478             unsigned char *p = pat;
1479
1480             if (pos - len < lim)
1481               goto stop;
1482
1483             while (this_len > 0)
1484               {
1485                 int pat_ch = *p++;
1486                 int buf_ch = FETCH_BYTE (this_pos);
1487                 TRANSLATE (buf_ch, trt, buf_ch);
1488
1489                 if (buf_ch != pat_ch)
1490                   break;
1491                 this_len--;
1492                 this_pos++;
1493               }
1494
1495             if (this_len == 0)
1496               {
1497                 pos -= len;
1498                 break;
1499               }
1500
1501             pos--;
1502           }
1503
1504         n++;
1505       }
1506
1507  stop:
1508   if (n == 0)
1509     {
1510       if (forward)
1511         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1512       else
1513         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1514
1515       return pos;
1516     }
1517   else if (n > 0)
1518     return -n;
1519   else
1520     return n;
1521 }
1522 \f
1523 /* Do Boyer-Moore search N times for the string PAT,
1524    whose length is LEN/LEN_BYTE,
1525    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1526    DIRECTION says which direction we search in.
1527    TRT and INVERSE_TRT are translation tables.
1528
1529    This kind of search works if all the characters in PAT that have
1530    nontrivial translation are the same aside from the last byte.  This
1531    makes it possible to translate just the last byte of a character,
1532    and do so after just a simple test of the context.
1533
1534    If that criterion is not satisfied, do not call this function.  */
1535
1536 static int
1537 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1538              pos, pos_byte, lim, lim_byte, charset_base)
1539      int n;
1540      unsigned char *base_pat;
1541      int len, len_byte;
1542      Lisp_Object trt;
1543      Lisp_Object inverse_trt;
1544      int pos, pos_byte;
1545      int lim, lim_byte;
1546      int charset_base;
1547 {
1548   int direction = ((n > 0) ? 1 : -1);
1549   register int dirlen;
1550   int infinity, limit, stride_for_teases = 0;
1551   register int *BM_tab;
1552   int *BM_tab_base;
1553   register unsigned char *cursor, *p_limit;
1554   register int i, j;
1555   unsigned char *pat, *pat_end;
1556   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1557
1558   unsigned char simple_translate[0400];
1559   int translate_prev_byte = 0;
1560   int translate_anteprev_byte = 0;
1561
1562 #ifdef C_ALLOCA
1563   int BM_tab_space[0400];
1564   BM_tab = &BM_tab_space[0];
1565 #else
1566   BM_tab = (int *) alloca (0400 * sizeof (int));
1567 #endif
1568   /* The general approach is that we are going to maintain that we know */
1569   /* the first (closest to the present position, in whatever direction */
1570   /* we're searching) character that could possibly be the last */
1571   /* (furthest from present position) character of a valid match.  We */
1572   /* advance the state of our knowledge by looking at that character */
1573   /* and seeing whether it indeed matches the last character of the */
1574   /* pattern.  If it does, we take a closer look.  If it does not, we */
1575   /* move our pointer (to putative last characters) as far as is */
1576   /* logically possible.  This amount of movement, which I call a */
1577   /* stride, will be the length of the pattern if the actual character */
1578   /* appears nowhere in the pattern, otherwise it will be the distance */
1579   /* from the last occurrence of that character to the end of the */
1580   /* pattern. */
1581   /* As a coding trick, an enormous stride is coded into the table for */
1582   /* characters that match the last character.  This allows use of only */
1583   /* a single test, a test for having gone past the end of the */
1584   /* permissible match region, to test for both possible matches (when */
1585   /* the stride goes past the end immediately) and failure to */
1586   /* match (where you get nudged past the end one stride at a time). */
1587
1588   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1589   /* is determined only by the last character of the putative match. */
1590   /* If that character does not match, we will stride the proper */
1591   /* distance to propose a match that superimposes it on the last */
1592   /* instance of a character that matches it (per trt), or misses */
1593   /* it entirely if there is none. */
1594
1595   dirlen = len_byte * direction;
1596   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1597
1598   /* Record position after the end of the pattern.  */
1599   pat_end = base_pat + len_byte;
1600   /* BASE_PAT points to a character that we start scanning from.
1601      It is the first character in a forward search,
1602      the last character in a backward search.  */
1603   if (direction < 0)
1604     base_pat = pat_end - 1;
1605
1606   BM_tab_base = BM_tab;
1607   BM_tab += 0400;
1608   j = dirlen;           /* to get it in a register */
1609   /* A character that does not appear in the pattern induces a */
1610   /* stride equal to the pattern length. */
1611   while (BM_tab_base != BM_tab)
1612     {
1613       *--BM_tab = j;
1614       *--BM_tab = j;
1615       *--BM_tab = j;
1616       *--BM_tab = j;
1617     }
1618
1619   /* We use this for translation, instead of TRT itself.
1620      We fill this in to handle the characters that actually
1621      occur in the pattern.  Others don't matter anyway!  */
1622   bzero (simple_translate, sizeof simple_translate);
1623   for (i = 0; i < 0400; i++)
1624     simple_translate[i] = i;
1625
1626   i = 0;
1627   while (i != infinity)
1628     {
1629       unsigned char *ptr = base_pat + i;
1630       i += direction;
1631       if (i == dirlen)
1632         i = infinity;
1633       if (! NILP (trt))
1634         {
1635           int ch;
1636           int untranslated;
1637           int this_translated = 1;
1638
1639           if (multibyte
1640               /* Is *PTR the last byte of a character?  */
1641               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1642             {
1643               unsigned char *charstart = ptr;
1644               while (! CHAR_HEAD_P (*charstart))
1645                 charstart--;
1646               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1647               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1648                 {
1649                   TRANSLATE (ch, trt, untranslated);
1650                   if (! CHAR_HEAD_P (*ptr))
1651                     {
1652                       translate_prev_byte = ptr[-1];
1653                       if (! CHAR_HEAD_P (translate_prev_byte))
1654                         translate_anteprev_byte = ptr[-2];
1655                     }
1656                 }
1657               else
1658                 {
1659                   this_translated = 0;
1660                   ch = *ptr;
1661                 }
1662             }
1663           else if (!multibyte)
1664             TRANSLATE (ch, trt, *ptr);
1665           else
1666             {
1667               ch = *ptr;
1668               this_translated = 0;
1669             }
1670
1671           if (ch > 0400)
1672             j = ((unsigned char) ch) | 0200;
1673           else
1674             j = (unsigned char) ch;
1675
1676           if (i == infinity)
1677             stride_for_teases = BM_tab[j];
1678
1679           BM_tab[j] = dirlen - i;
1680           /* A translation table is accompanied by its inverse -- see */
1681           /* comment following downcase_table for details */
1682           if (this_translated)
1683             {
1684               int starting_ch = ch;
1685               int starting_j = j;
1686               while (1)
1687                 {
1688                   TRANSLATE (ch, inverse_trt, ch);
1689                   if (ch > 0400)
1690                     j = ((unsigned char) ch) | 0200;
1691                   else
1692                     j = (unsigned char) ch;
1693
1694                   /* For all the characters that map into CH,
1695                      set up simple_translate to map the last byte
1696                      into STARTING_J.  */
1697                   simple_translate[j] = starting_j;
1698                   if (ch == starting_ch)
1699                     break;
1700                   BM_tab[j] = dirlen - i;
1701                 }
1702             }
1703         }
1704       else
1705         {
1706           j = *ptr;
1707
1708           if (i == infinity)
1709             stride_for_teases = BM_tab[j];
1710           BM_tab[j] = dirlen - i;
1711         }
1712       /* stride_for_teases tells how much to stride if we get a */
1713       /* match on the far character but are subsequently */
1714       /* disappointed, by recording what the stride would have been */
1715       /* for that character if the last character had been */
1716       /* different. */
1717     }
1718   infinity = dirlen - infinity;
1719   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1720   /* loop invariant - POS_BYTE points at where last char (first
1721      char if reverse) of pattern would align in a possible match.  */
1722   while (n != 0)
1723     {
1724       int tail_end;
1725       unsigned char *tail_end_ptr;
1726
1727       /* It's been reported that some (broken) compiler thinks that
1728          Boolean expressions in an arithmetic context are unsigned.
1729          Using an explicit ?1:0 prevents this.  */
1730       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1731           < 0)
1732         return (n * (0 - direction));
1733       /* First we do the part we can by pointers (maybe nothing) */
1734       QUIT;
1735       pat = base_pat;
1736       limit = pos_byte - dirlen + direction;
1737       if (direction > 0)
1738         {
1739           limit = BUFFER_CEILING_OF (limit);
1740           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1741              can take on without hitting edge of buffer or the gap.  */
1742           limit = min (limit, pos_byte + 20000);
1743           limit = min (limit, lim_byte - 1);
1744         }
1745       else
1746         {
1747           limit = BUFFER_FLOOR_OF (limit);
1748           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1749              can take on without hitting edge of buffer or the gap.  */
1750           limit = max (limit, pos_byte - 20000);
1751           limit = max (limit, lim_byte);
1752         }
1753       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1754       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1755
1756       if ((limit - pos_byte) * direction > 20)
1757         {
1758           unsigned char *p2;
1759
1760           p_limit = BYTE_POS_ADDR (limit);
1761           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1762           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1763           while (1)             /* use one cursor setting as long as i can */
1764             {
1765               if (direction > 0) /* worth duplicating */
1766                 {
1767                   /* Use signed comparison if appropriate
1768                      to make cursor+infinity sure to be > p_limit.
1769                      Assuming that the buffer lies in a range of addresses
1770                      that are all "positive" (as ints) or all "negative",
1771                      either kind of comparison will work as long
1772                      as we don't step by infinity.  So pick the kind
1773                      that works when we do step by infinity.  */
1774                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1775                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1776                       cursor += BM_tab[*cursor];
1777                   else
1778                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1779                       cursor += BM_tab[*cursor];
1780                 }
1781               else
1782                 {
1783                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1784                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1785                       cursor += BM_tab[*cursor];
1786                   else
1787                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1788                       cursor += BM_tab[*cursor];
1789                 }
1790 /* If you are here, cursor is beyond the end of the searched region. */
1791 /* This can happen if you match on the far character of the pattern, */
1792 /* because the "stride" of that character is infinity, a number able */
1793 /* to throw you well beyond the end of the search.  It can also */
1794 /* happen if you fail to match within the permitted region and would */
1795 /* otherwise try a character beyond that region */
1796               if ((cursor - p_limit) * direction <= len_byte)
1797                 break;  /* a small overrun is genuine */
1798               cursor -= infinity; /* large overrun = hit */
1799               i = dirlen - direction;
1800               if (! NILP (trt))
1801                 {
1802                   while ((i -= direction) + direction != 0)
1803                     {
1804                       int ch;
1805                       cursor -= direction;
1806                       /* Translate only the last byte of a character.  */
1807                       if (! multibyte
1808                           || ((cursor == tail_end_ptr
1809                                || CHAR_HEAD_P (cursor[1]))
1810                               && (CHAR_HEAD_P (cursor[0])
1811                                   || (translate_prev_byte == cursor[-1]
1812                                       && (CHAR_HEAD_P (translate_prev_byte)
1813                                           || translate_anteprev_byte == cursor[-2])))))
1814                         ch = simple_translate[*cursor];
1815                       else
1816                         ch = *cursor;
1817                       if (pat[i] != ch)
1818                         break;
1819                     }
1820                 }
1821               else
1822                 {
1823                   while ((i -= direction) + direction != 0)
1824                     {
1825                       cursor -= direction;
1826                       if (pat[i] != *cursor)
1827                         break;
1828                     }
1829                 }
1830               cursor += dirlen - i - direction; /* fix cursor */
1831               if (i + direction == 0)
1832                 {
1833                   int position;
1834
1835                   cursor -= direction;
1836
1837                   position = pos_byte + cursor - p2 + ((direction > 0)
1838                                                        ? 1 - len_byte : 0);
1839                   set_search_regs (position, len_byte);
1840
1841                   if ((n -= direction) != 0)
1842                     cursor += dirlen; /* to resume search */
1843                   else
1844                     return ((direction > 0)
1845                             ? search_regs.end[0] : search_regs.start[0]);
1846                 }
1847               else
1848                 cursor += stride_for_teases; /* <sigh> we lose -  */
1849             }
1850           pos_byte += cursor - p2;
1851         }
1852       else
1853         /* Now we'll pick up a clump that has to be done the hard */
1854         /* way because it covers a discontinuity */
1855         {
1856           limit = ((direction > 0)
1857                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1858                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1859           limit = ((direction > 0)
1860                    ? min (limit + len_byte, lim_byte - 1)
1861                    : max (limit - len_byte, lim_byte));
1862           /* LIMIT is now the last value POS_BYTE can have
1863              and still be valid for a possible match.  */
1864           while (1)
1865             {
1866               /* This loop can be coded for space rather than */
1867               /* speed because it will usually run only once. */
1868               /* (the reach is at most len + 21, and typically */
1869               /* does not exceed len) */
1870               while ((limit - pos_byte) * direction >= 0)
1871                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1872               /* now run the same tests to distinguish going off the */
1873               /* end, a match or a phony match. */
1874               if ((pos_byte - limit) * direction <= len_byte)
1875                 break;  /* ran off the end */
1876               /* Found what might be a match.
1877                  Set POS_BYTE back to last (first if reverse) pos.  */
1878               pos_byte -= infinity;
1879               i = dirlen - direction;
1880               while ((i -= direction) + direction != 0)
1881                 {
1882                   int ch;
1883                   unsigned char *ptr;
1884                   pos_byte -= direction;
1885                   ptr = BYTE_POS_ADDR (pos_byte);
1886                   /* Translate only the last byte of a character.  */
1887                   if (! multibyte
1888                       || ((ptr == tail_end_ptr
1889                            || CHAR_HEAD_P (ptr[1]))
1890                           && (CHAR_HEAD_P (ptr[0])
1891                               || (translate_prev_byte == ptr[-1]
1892                                   && (CHAR_HEAD_P (translate_prev_byte)
1893                                       || translate_anteprev_byte == ptr[-2])))))
1894                     ch = simple_translate[*ptr];
1895                   else
1896                     ch = *ptr;
1897                   if (pat[i] != ch)
1898                     break;
1899                 }
1900               /* Above loop has moved POS_BYTE part or all the way
1901                  back to the first pos (last pos if reverse).
1902                  Set it once again at the last (first if reverse) char.  */
1903               pos_byte += dirlen - i- direction;
1904               if (i + direction == 0)
1905                 {
1906                   int position;
1907                   pos_byte -= direction;
1908
1909                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1910
1911                   set_search_regs (position, len_byte);
1912
1913                   if ((n -= direction) != 0)
1914                     pos_byte += dirlen; /* to resume search */
1915                   else
1916                     return ((direction > 0)
1917                             ? search_regs.end[0] : search_regs.start[0]);
1918                 }
1919               else
1920                 pos_byte += stride_for_teases;
1921             }
1922           }
1923       /* We have done one clump.  Can we continue? */
1924       if ((lim_byte - pos_byte) * direction < 0)
1925         return ((0 - n) * direction);
1926     }
1927   return BYTE_TO_CHAR (pos_byte);
1928 }
1929
1930 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1931    for the overall match just found in the current buffer.
1932    Also clear out the match data for registers 1 and up.  */
1933
1934 static void
1935 set_search_regs (beg_byte, nbytes)
1936      int beg_byte, nbytes;
1937 {
1938   int i;
1939
1940   /* Make sure we have registers in which to store
1941      the match position.  */
1942   if (search_regs.num_regs == 0)
1943     {
1944       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1945       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1946       search_regs.num_regs = 2;
1947     }
1948
1949   /* Clear out the other registers.  */
1950   for (i = 1; i < search_regs.num_regs; i++)
1951     {
1952       search_regs.start[i] = -1;
1953       search_regs.end[i] = -1;
1954     }
1955
1956   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1957   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1958   XSETBUFFER (last_thing_searched, current_buffer);
1959 }
1960 \f
1961 /* Given a string of words separated by word delimiters,
1962   compute a regexp that matches those exact words
1963   separated by arbitrary punctuation.  */
1964
1965 static Lisp_Object
1966 wordify (string)
1967      Lisp_Object string;
1968 {
1969   register unsigned char *p, *o;
1970   register int i, i_byte, len, punct_count = 0, word_count = 0;
1971   Lisp_Object val;
1972   int prev_c = 0;
1973   int adjust;
1974
1975   CHECK_STRING (string);
1976   p = SDATA (string);
1977   len = SCHARS (string);
1978
1979   for (i = 0, i_byte = 0; i < len; )
1980     {
1981       int c;
1982
1983       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1984
1985       if (SYNTAX (c) != Sword)
1986         {
1987           punct_count++;
1988           if (i > 0 && SYNTAX (prev_c) == Sword)
1989             word_count++;
1990         }
1991
1992       prev_c = c;
1993     }
1994
1995   if (SYNTAX (prev_c) == Sword)
1996     word_count++;
1997   if (!word_count)
1998     return empty_string;
1999
2000   adjust = - punct_count + 5 * (word_count - 1) + 4;
2001   if (STRING_MULTIBYTE (string))
2002     val = make_uninit_multibyte_string (len + adjust,
2003                                         SBYTES (string)
2004                                         + adjust);
2005   else
2006     val = make_uninit_string (len + adjust);
2007
2008   o = SDATA (val);
2009   *o++ = '\\';
2010   *o++ = 'b';
2011   prev_c = 0;
2012
2013   for (i = 0, i_byte = 0; i < len; )
2014     {
2015       int c;
2016       int i_byte_orig = i_byte;
2017
2018       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2019
2020       if (SYNTAX (c) == Sword)
2021         {
2022           bcopy (SDATA (string) + i_byte_orig, o,
2023                  i_byte - i_byte_orig);
2024           o += i_byte - i_byte_orig;
2025         }
2026       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2027         {
2028           *o++ = '\\';
2029           *o++ = 'W';
2030           *o++ = '\\';
2031           *o++ = 'W';
2032           *o++ = '*';
2033         }
2034
2035       prev_c = c;
2036     }
2037
2038   *o++ = '\\';
2039   *o++ = 'b';
2040
2041   return val;
2042 }
2043 \f
2044 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2045        "MSearch backward: ",
2046        doc: /* Search backward from point for STRING.
2047 Set point to the beginning of the occurrence found, and return point.
2048 An optional second argument bounds the search; it is a buffer position.
2049 The match found must not extend before that position.
2050 Optional third argument, if t, means if fail just return nil (no error).
2051  If not nil and not t, position at limit of search and return nil.
2052 Optional fourth argument is repeat count--search for successive occurrences.
2053
2054 Search case-sensitivity is determined by the value of the variable
2055 `case-fold-search', which see.
2056
2057 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2058      (string, bound, noerror, count)
2059      Lisp_Object string, bound, noerror, count;
2060 {
2061   return search_command (string, bound, noerror, count, -1, 0, 0);
2062 }
2063
2064 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2065        doc: /* Search forward from point for STRING.
2066 Set point to the end of the occurrence found, and return point.
2067 An optional second argument bounds the search; it is a buffer position.
2068 The match found must not extend after that position.  nil is equivalent
2069   to (point-max).
2070 Optional third argument, if t, means if fail just return nil (no error).
2071   If not nil and not t, move to limit of search and return nil.
2072 Optional fourth argument is repeat count--search for successive occurrences.
2073
2074 Search case-sensitivity is determined by the value of the variable
2075 `case-fold-search', which see.
2076
2077 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2078      (string, bound, noerror, count)
2079      Lisp_Object string, bound, noerror, count;
2080 {
2081   return search_command (string, bound, noerror, count, 1, 0, 0);
2082 }
2083
2084 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2085        "sWord search backward: ",
2086        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2087 Set point to the beginning of the occurrence found, and return point.
2088 An optional second argument bounds the search; it is a buffer position.
2089 The match found must not extend before that position.
2090 Optional third argument, if t, means if fail just return nil (no error).
2091   If not nil and not t, move to limit of search and return nil.
2092 Optional fourth argument is repeat count--search for successive occurrences.  */)
2093      (string, bound, noerror, count)
2094      Lisp_Object string, bound, noerror, count;
2095 {
2096   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2097 }
2098
2099 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2100        "sWord search: ",
2101        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2102 Set point to the end of the occurrence found, and return point.
2103 An optional second argument bounds the search; it is a buffer position.
2104 The match found must not extend after that position.
2105 Optional third argument, if t, means if fail just return nil (no error).
2106   If not nil and not t, move to limit of search and return nil.
2107 Optional fourth argument is repeat count--search for successive occurrences.  */)
2108      (string, bound, noerror, count)
2109      Lisp_Object string, bound, noerror, count;
2110 {
2111   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2112 }
2113
2114 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2115        "sRE search backward: ",
2116        doc: /* Search backward from point for match for regular expression REGEXP.
2117 Set point to the beginning of the match, and return point.
2118 The match found is the one starting last in the buffer
2119 and yet ending before the origin of the search.
2120 An optional second argument bounds the search; it is a buffer position.
2121 The match found must start at or after that position.
2122 Optional third argument, if t, means if fail just return nil (no error).
2123   If not nil and not t, move to limit of search and return nil.
2124 Optional fourth argument is repeat count--search for successive occurrences.
2125 See also the functions `match-beginning', `match-end', `match-string',
2126 and `replace-match'.  */)
2127      (regexp, bound, noerror, count)
2128      Lisp_Object regexp, bound, noerror, count;
2129 {
2130   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2131 }
2132
2133 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2134        "sRE search: ",
2135        doc: /* Search forward from point for regular expression REGEXP.
2136 Set point to the end of the occurrence found, and return point.
2137 An optional second argument bounds the search; it is a buffer position.
2138 The match found must not extend after that position.
2139 Optional third argument, if t, means if fail just return nil (no error).
2140   If not nil and not t, move to limit of search and return nil.
2141 Optional fourth argument is repeat count--search for successive occurrences.
2142 See also the functions `match-beginning', `match-end', `match-string',
2143 and `replace-match'.  */)
2144      (regexp, bound, noerror, count)
2145      Lisp_Object regexp, bound, noerror, count;
2146 {
2147   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2148 }
2149
2150 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2151        "sPosix search backward: ",
2152        doc: /* Search backward from point for match for regular expression REGEXP.
2153 Find the longest match in accord with Posix regular expression rules.
2154 Set point to the beginning of the match, and return point.
2155 The match found is the one starting last in the buffer
2156 and yet ending before the origin of the search.
2157 An optional second argument bounds the search; it is a buffer position.
2158 The match found must start at or after that position.
2159 Optional third argument, if t, means if fail just return nil (no error).
2160   If not nil and not t, move to limit of search and return nil.
2161 Optional fourth argument is repeat count--search for successive occurrences.
2162 See also the functions `match-beginning', `match-end', `match-string',
2163 and `replace-match'.  */)
2164      (regexp, bound, noerror, count)
2165      Lisp_Object regexp, bound, noerror, count;
2166 {
2167   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2168 }
2169
2170 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2171        "sPosix search: ",
2172        doc: /* Search forward from point for regular expression REGEXP.
2173 Find the longest match in accord with Posix regular expression rules.
2174 Set point to the end of the occurrence found, and return point.
2175 An optional second argument bounds the search; it is a buffer position.
2176 The match found must not extend after that position.
2177 Optional third argument, if t, means if fail just return nil (no error).
2178   If not nil and not t, move to limit of search and return nil.
2179 Optional fourth argument is repeat count--search for successive occurrences.
2180 See also the functions `match-beginning', `match-end', `match-string',
2181 and `replace-match'.  */)
2182      (regexp, bound, noerror, count)
2183      Lisp_Object regexp, bound, noerror, count;
2184 {
2185   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2186 }
2187 \f
2188 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2189        doc: /* Replace text matched by last search with NEWTEXT.
2190 Leave point at the end of the replacement text.
2191
2192 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2193 Otherwise maybe capitalize the whole text, or maybe just word initials,
2194 based on the replaced text.
2195 If the replaced text has only capital letters
2196 and has at least one multiletter word, convert NEWTEXT to all caps.
2197 Otherwise if all words are capitalized in the replaced text,
2198 capitalize each word in NEWTEXT.
2199
2200 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2201 Otherwise treat `\\' as special:
2202   `\\&' in NEWTEXT means substitute original matched text.
2203   `\\N' means substitute what matched the Nth `\\(...\\)'.
2204        If Nth parens didn't match, substitute nothing.
2205   `\\\\' means insert one `\\'.
2206 Case conversion does not apply to these substitutions.
2207
2208 FIXEDCASE and LITERAL are optional arguments.
2209
2210 The optional fourth argument STRING can be a string to modify.
2211 This is meaningful when the previous match was done against STRING,
2212 using `string-match'.  When used this way, `replace-match'
2213 creates and returns a new string made by copying STRING and replacing
2214 the part of STRING that was matched.
2215
2216 The optional fifth argument SUBEXP specifies a subexpression;
2217 it says to replace just that subexpression with NEWTEXT,
2218 rather than replacing the entire matched text.
2219 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2220 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2221 NEWTEXT in place of subexp N.
2222 This is useful only after a regular expression search or match,
2223 since only regular expressions have distinguished subexpressions.  */)
2224      (newtext, fixedcase, literal, string, subexp)
2225      Lisp_Object newtext, fixedcase, literal, string, subexp;
2226 {
2227   enum { nochange, all_caps, cap_initial } case_action;
2228   register int pos, pos_byte;
2229   int some_multiletter_word;
2230   int some_lowercase;
2231   int some_uppercase;
2232   int some_nonuppercase_initial;
2233   register int c, prevc;
2234   int sub;
2235   int opoint, newpoint;
2236
2237   CHECK_STRING (newtext);
2238
2239   if (! NILP (string))
2240     CHECK_STRING (string);
2241
2242   case_action = nochange;       /* We tried an initialization */
2243                                 /* but some C compilers blew it */
2244
2245   if (search_regs.num_regs <= 0)
2246     error ("replace-match called before any match found");
2247
2248   if (NILP (subexp))
2249     sub = 0;
2250   else
2251     {
2252       CHECK_NUMBER (subexp);
2253       sub = XINT (subexp);
2254       if (sub < 0 || sub >= search_regs.num_regs)
2255         args_out_of_range (subexp, make_number (search_regs.num_regs));
2256     }
2257
2258   if (NILP (string))
2259     {
2260       if (search_regs.start[sub] < BEGV
2261           || search_regs.start[sub] > search_regs.end[sub]
2262           || search_regs.end[sub] > ZV)
2263         args_out_of_range (make_number (search_regs.start[sub]),
2264                            make_number (search_regs.end[sub]));
2265     }
2266   else
2267     {
2268       if (search_regs.start[sub] < 0
2269           || search_regs.start[sub] > search_regs.end[sub]
2270           || search_regs.end[sub] > SCHARS (string))
2271         args_out_of_range (make_number (search_regs.start[sub]),
2272                            make_number (search_regs.end[sub]));
2273     }
2274
2275   if (NILP (fixedcase))
2276     {
2277       /* Decide how to casify by examining the matched text. */
2278       int last;
2279
2280       pos = search_regs.start[sub];
2281       last = search_regs.end[sub];
2282
2283       if (NILP (string))
2284         pos_byte = CHAR_TO_BYTE (pos);
2285       else
2286         pos_byte = string_char_to_byte (string, pos);
2287
2288       prevc = '\n';
2289       case_action = all_caps;
2290
2291       /* some_multiletter_word is set nonzero if any original word
2292          is more than one letter long. */
2293       some_multiletter_word = 0;
2294       some_lowercase = 0;
2295       some_nonuppercase_initial = 0;
2296       some_uppercase = 0;
2297
2298       while (pos < last)
2299         {
2300           if (NILP (string))
2301             {
2302               c = FETCH_CHAR (pos_byte);
2303               INC_BOTH (pos, pos_byte);
2304             }
2305           else
2306             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2307
2308           if (LOWERCASEP (c))
2309             {
2310               /* Cannot be all caps if any original char is lower case */
2311
2312               some_lowercase = 1;
2313               if (SYNTAX (prevc) != Sword)
2314                 some_nonuppercase_initial = 1;
2315               else
2316                 some_multiletter_word = 1;
2317             }
2318           else if (!NOCASEP (c))
2319             {
2320               some_uppercase = 1;
2321               if (SYNTAX (prevc) != Sword)
2322                 ;
2323               else
2324                 some_multiletter_word = 1;
2325             }
2326           else
2327             {
2328               /* If the initial is a caseless word constituent,
2329                  treat that like a lowercase initial.  */
2330               if (SYNTAX (prevc) != Sword)
2331                 some_nonuppercase_initial = 1;
2332             }
2333
2334           prevc = c;
2335         }
2336
2337       /* Convert to all caps if the old text is all caps
2338          and has at least one multiletter word.  */
2339       if (! some_lowercase && some_multiletter_word)
2340         case_action = all_caps;
2341       /* Capitalize each word, if the old text has all capitalized words.  */
2342       else if (!some_nonuppercase_initial && some_multiletter_word)
2343         case_action = cap_initial;
2344       else if (!some_nonuppercase_initial && some_uppercase)
2345         /* Should x -> yz, operating on X, give Yz or YZ?
2346            We'll assume the latter.  */
2347         case_action = all_caps;
2348       else
2349         case_action = nochange;
2350     }
2351
2352   /* Do replacement in a string.  */
2353   if (!NILP (string))
2354     {
2355       Lisp_Object before, after;
2356
2357       before = Fsubstring (string, make_number (0),
2358                            make_number (search_regs.start[sub]));
2359       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2360
2361       /* Substitute parts of the match into NEWTEXT
2362          if desired.  */
2363       if (NILP (literal))
2364         {
2365           int lastpos = 0;
2366           int lastpos_byte = 0;
2367           /* We build up the substituted string in ACCUM.  */
2368           Lisp_Object accum;
2369           Lisp_Object middle;
2370           int length = SBYTES (newtext);
2371
2372           accum = Qnil;
2373
2374           for (pos_byte = 0, pos = 0; pos_byte < length;)
2375             {
2376               int substart = -1;
2377               int subend = 0;
2378               int delbackslash = 0;
2379
2380               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2381
2382               if (c == '\\')
2383                 {
2384                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2385
2386                   if (c == '&')
2387                     {
2388                       substart = search_regs.start[sub];
2389                       subend = search_regs.end[sub];
2390                     }
2391                   else if (c >= '1' && c <= '9')
2392                     {
2393                       if (search_regs.start[c - '0'] >= 0
2394                           && c <= search_regs.num_regs + '0')
2395                         {
2396                           substart = search_regs.start[c - '0'];
2397                           subend = search_regs.end[c - '0'];
2398                         }
2399                       else
2400                         {
2401                           /* If that subexp did not match,
2402                              replace \\N with nothing.  */
2403                           substart = 0;
2404                           subend = 0;
2405                         }
2406                     }
2407                   else if (c == '\\')
2408                     delbackslash = 1;
2409                   else
2410                     error ("Invalid use of `\\' in replacement text");
2411                 }
2412               if (substart >= 0)
2413                 {
2414                   if (pos - 2 != lastpos)
2415                     middle = substring_both (newtext, lastpos,
2416                                              lastpos_byte,
2417                                              pos - 2, pos_byte - 2);
2418                   else
2419                     middle = Qnil;
2420                   accum = concat3 (accum, middle,
2421                                    Fsubstring (string,
2422                                                make_number (substart),
2423                                                make_number (subend)));
2424                   lastpos = pos;
2425                   lastpos_byte = pos_byte;
2426                 }
2427               else if (delbackslash)
2428                 {
2429                   middle = substring_both (newtext, lastpos,
2430                                            lastpos_byte,
2431                                            pos - 1, pos_byte - 1);
2432
2433                   accum = concat2 (accum, middle);
2434                   lastpos = pos;
2435                   lastpos_byte = pos_byte;
2436                 }
2437             }
2438
2439           if (pos != lastpos)
2440             middle = substring_both (newtext, lastpos,
2441                                      lastpos_byte,
2442                                      pos, pos_byte);
2443           else
2444             middle = Qnil;
2445
2446           newtext = concat2 (accum, middle);
2447         }
2448
2449       /* Do case substitution in NEWTEXT if desired.  */
2450       if (case_action == all_caps)
2451         newtext = Fupcase (newtext);
2452       else if (case_action == cap_initial)
2453         newtext = Fupcase_initials (newtext);
2454
2455       return concat3 (before, newtext, after);
2456     }
2457
2458   /* Record point, then move (quietly) to the start of the match.  */
2459   if (PT >= search_regs.end[sub])
2460     opoint = PT - ZV;
2461   else if (PT > search_regs.start[sub])
2462     opoint = search_regs.end[sub] - ZV;
2463   else
2464     opoint = PT;
2465
2466   /* If we want non-literal replacement,
2467      perform substitution on the replacement string.  */
2468   if (NILP (literal))
2469     {
2470       int length = SBYTES (newtext);
2471       unsigned char *substed;
2472       int substed_alloc_size, substed_len;
2473       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2474       int str_multibyte = STRING_MULTIBYTE (newtext);
2475       Lisp_Object rev_tbl;
2476       int really_changed = 0;
2477
2478       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2479                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2480                                           make_number (0))
2481                 : Qnil);
2482
2483       substed_alloc_size = length * 2 + 100;
2484       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2485       substed_len = 0;
2486
2487       /* Go thru NEWTEXT, producing the actual text to insert in
2488          SUBSTED while adjusting multibyteness to that of the current
2489          buffer.  */
2490
2491       for (pos_byte = 0, pos = 0; pos_byte < length;)
2492         {
2493           unsigned char str[MAX_MULTIBYTE_LENGTH];
2494           unsigned char *add_stuff = NULL;
2495           int add_len = 0;
2496           int idx = -1;
2497
2498           if (str_multibyte)
2499             {
2500               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2501               if (!buf_multibyte)
2502                 c = multibyte_char_to_unibyte (c, rev_tbl);
2503             }
2504           else
2505             {
2506               /* Note that we don't have to increment POS.  */
2507               c = SREF (newtext, pos_byte++);
2508               if (buf_multibyte)
2509                 c = unibyte_char_to_multibyte (c);
2510             }
2511
2512           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2513              or set IDX to a match index, which means put that part
2514              of the buffer text into SUBSTED.  */
2515
2516           if (c == '\\')
2517             {
2518               really_changed = 1;
2519
2520               if (str_multibyte)
2521                 {
2522                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2523                                                       pos, pos_byte);
2524                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2525                     c = multibyte_char_to_unibyte (c, rev_tbl);
2526                 }
2527               else
2528                 {
2529                   c = SREF (newtext, pos_byte++);
2530                   if (buf_multibyte)
2531                     c = unibyte_char_to_multibyte (c);
2532                 }
2533
2534               if (c == '&')
2535                 idx = sub;
2536               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2537                 {
2538                   if (search_regs.start[c - '0'] >= 1)
2539                     idx = c - '0';
2540                 }
2541               else if (c == '\\')
2542                 add_len = 1, add_stuff = "\\";
2543               else
2544                 {
2545                   xfree (substed);
2546                   error ("Invalid use of `\\' in replacement text");
2547                 }
2548             }
2549           else
2550             {
2551               add_len = CHAR_STRING (c, str);
2552               add_stuff = str;
2553             }
2554
2555           /* If we want to copy part of a previous match,
2556              set up ADD_STUFF and ADD_LEN to point to it.  */
2557           if (idx >= 0)
2558             {
2559               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2560               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2561               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2562                 move_gap (search_regs.start[idx]);
2563               add_stuff = BYTE_POS_ADDR (begbyte);
2564             }
2565
2566           /* Now the stuff we want to add to SUBSTED
2567              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2568
2569           /* Make sure SUBSTED is big enough.  */
2570           if (substed_len + add_len >= substed_alloc_size)
2571             {
2572               substed_alloc_size = substed_len + add_len + 500;
2573               substed = (unsigned char *) xrealloc (substed,
2574                                                     substed_alloc_size + 1);
2575             }
2576
2577           /* Now add to the end of SUBSTED.  */
2578           if (add_stuff)
2579             {
2580               bcopy (add_stuff, substed + substed_len, add_len);
2581               substed_len += add_len;
2582             }
2583         }
2584
2585       if (really_changed)
2586         {
2587           if (buf_multibyte)
2588             {
2589               int nchars = multibyte_chars_in_text (substed, substed_len);
2590
2591               newtext = make_multibyte_string (substed, nchars, substed_len);
2592             }
2593           else
2594             newtext = make_unibyte_string (substed, substed_len);
2595         }
2596       xfree (substed);
2597     }
2598
2599   /* Replace the old text with the new in the cleanest possible way.  */
2600   replace_range (search_regs.start[sub], search_regs.end[sub],
2601                  newtext, 1, 0, 1);
2602   newpoint = search_regs.start[sub] + SCHARS (newtext);
2603
2604   if (case_action == all_caps)
2605     Fupcase_region (make_number (search_regs.start[sub]),
2606                     make_number (newpoint));
2607   else if (case_action == cap_initial)
2608     Fupcase_initials_region (make_number (search_regs.start[sub]),
2609                              make_number (newpoint));
2610
2611   /* Adjust search data for this change.  */
2612   {
2613     int oldend = search_regs.end[sub];
2614     int oldstart = search_regs.start[sub];
2615     int change = newpoint - search_regs.end[sub];
2616     int i;
2617
2618     for (i = 0; i < search_regs.num_regs; i++)
2619       {
2620         if (search_regs.start[i] >= oldend)
2621           search_regs.start[i] += change;
2622         else if (search_regs.start[i] > oldstart)
2623           search_regs.start[i] = oldstart;
2624         if (search_regs.end[i] >= oldend)
2625           search_regs.end[i] += change;
2626         else if (search_regs.end[i] > oldstart)
2627           search_regs.end[i] = oldstart;
2628       }
2629   }
2630
2631   /* Put point back where it was in the text.  */
2632   if (opoint <= 0)
2633     TEMP_SET_PT (opoint + ZV);
2634   else
2635     TEMP_SET_PT (opoint);
2636
2637   /* Now move point "officially" to the start of the inserted replacement.  */
2638   move_if_not_intangible (newpoint);
2639
2640   return Qnil;
2641 }
2642 \f
2643 static Lisp_Object
2644 match_limit (num, beginningp)
2645      Lisp_Object num;
2646      int beginningp;
2647 {
2648   register int n;
2649
2650   CHECK_NUMBER (num);
2651   n = XINT (num);
2652   if (n < 0)
2653     args_out_of_range (num, make_number (0));
2654   if (search_regs.num_regs <= 0)
2655     error ("No match data, because no search succeeded");
2656   if (n >= search_regs.num_regs
2657       || search_regs.start[n] < 0)
2658     return Qnil;
2659   return (make_number ((beginningp) ? search_regs.start[n]
2660                                     : search_regs.end[n]));
2661 }
2662
2663 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2664        doc: /* Return position of start of text matched by last search.
2665 SUBEXP, a number, specifies which parenthesized expression in the last
2666   regexp.
2667 Value is nil if SUBEXPth pair didn't match, or there were less than
2668   SUBEXP pairs.
2669 Zero means the entire text matched by the whole regexp or whole string.  */)
2670      (subexp)
2671      Lisp_Object subexp;
2672 {
2673   return match_limit (subexp, 1);
2674 }
2675
2676 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2677        doc: /* Return position of end of text matched by last search.
2678 SUBEXP, a number, specifies which parenthesized expression in the last
2679   regexp.
2680 Value is nil if SUBEXPth pair didn't match, or there were less than
2681   SUBEXP pairs.
2682 Zero means the entire text matched by the whole regexp or whole string.  */)
2683      (subexp)
2684      Lisp_Object subexp;
2685 {
2686   return match_limit (subexp, 0);
2687 }
2688
2689 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2690        doc: /* Return a list containing all info on what the last search matched.
2691 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2692 All the elements are markers or nil (nil if the Nth pair didn't match)
2693 if the last match was on a buffer; integers or nil if a string was matched.
2694 Use `store-match-data' to reinstate the data in this list.
2695
2696 If INTEGERS (the optional first argument) is non-nil, always use
2697 integers \(rather than markers) to represent buffer positions.  In
2698 this case, and if the last match was in a buffer, the buffer will get
2699 stored as one additional element at the end of the list.
2700
2701 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2702 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2703
2704 Return value is undefined if the last search failed.  */)
2705      (integers, reuse)
2706      Lisp_Object integers, reuse;
2707 {
2708   Lisp_Object tail, prev;
2709   Lisp_Object *data;
2710   int i, len;
2711
2712   if (NILP (last_thing_searched))
2713     return Qnil;
2714
2715   prev = Qnil;
2716
2717   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2718                                  * sizeof (Lisp_Object));
2719
2720   len = 0;
2721   for (i = 0; i < search_regs.num_regs; i++)
2722     {
2723       int start = search_regs.start[i];
2724       if (start >= 0)
2725         {
2726           if (EQ (last_thing_searched, Qt)
2727               || ! NILP (integers))
2728             {
2729               XSETFASTINT (data[2 * i], start);
2730               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2731             }
2732           else if (BUFFERP (last_thing_searched))
2733             {
2734               data[2 * i] = Fmake_marker ();
2735               Fset_marker (data[2 * i],
2736                            make_number (start),
2737                            last_thing_searched);
2738               data[2 * i + 1] = Fmake_marker ();
2739               Fset_marker (data[2 * i + 1],
2740                            make_number (search_regs.end[i]),
2741                            last_thing_searched);
2742             }
2743           else
2744             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2745             abort ();
2746
2747           len = 2*(i+1);
2748         }
2749       else
2750         data[2 * i] = data [2 * i + 1] = Qnil;
2751     }
2752
2753   if (BUFFERP (last_thing_searched) && !NILP (integers))
2754     {
2755       data[len] = last_thing_searched;
2756       len++;
2757     }
2758
2759   /* If REUSE is not usable, cons up the values and return them.  */
2760   if (! CONSP (reuse))
2761     return Flist (len, data);
2762
2763   /* If REUSE is a list, store as many value elements as will fit
2764      into the elements of REUSE.  */
2765   for (i = 0, tail = reuse; CONSP (tail);
2766        i++, tail = XCDR (tail))
2767     {
2768       if (i < len)
2769         XSETCAR (tail, data[i]);
2770       else
2771         XSETCAR (tail, Qnil);
2772       prev = tail;
2773     }
2774
2775   /* If we couldn't fit all value elements into REUSE,
2776      cons up the rest of them and add them to the end of REUSE.  */
2777   if (i < len)
2778     XSETCDR (prev, Flist (len - i, data + i));
2779
2780   return reuse;
2781 }
2782
2783
2784 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2785        doc: /* Set internal data on last search match from elements of LIST.
2786 LIST should have been created by calling `match-data' previously.  */)
2787      (list)
2788      register Lisp_Object list;
2789 {
2790   register int i;
2791   register Lisp_Object marker;
2792
2793   if (running_asynch_code)
2794     save_search_regs ();
2795
2796   if (!CONSP (list) && !NILP (list))
2797     list = wrong_type_argument (Qconsp, list);
2798
2799   /* Unless we find a marker with a buffer or an explicit buffer
2800      in LIST, assume that this match data came from a string.  */
2801   last_thing_searched = Qt;
2802
2803   /* Allocate registers if they don't already exist.  */
2804   {
2805     int length = XFASTINT (Flength (list)) / 2;
2806
2807     if (length > search_regs.num_regs)
2808       {
2809         if (search_regs.num_regs == 0)
2810           {
2811             search_regs.start
2812               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2813             search_regs.end
2814               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2815           }
2816         else
2817           {
2818             search_regs.start
2819               = (regoff_t *) xrealloc (search_regs.start,
2820                                        length * sizeof (regoff_t));
2821             search_regs.end
2822               = (regoff_t *) xrealloc (search_regs.end,
2823                                        length * sizeof (regoff_t));
2824           }
2825
2826         for (i = search_regs.num_regs; i < length; i++)
2827           search_regs.start[i] = -1;
2828
2829         search_regs.num_regs = length;
2830       }
2831
2832     for (i = 0;; i++)
2833       {
2834         marker = Fcar (list);
2835         if (BUFFERP (marker))
2836           {
2837             last_thing_searched = marker;
2838             break;
2839           }
2840         if (i >= length)
2841           break;
2842         if (NILP (marker))
2843           {
2844             search_regs.start[i] = -1;
2845             list = Fcdr (list);
2846           }
2847         else
2848           {
2849             int from;
2850
2851             if (MARKERP (marker))
2852               {
2853                 if (XMARKER (marker)->buffer == 0)
2854                   XSETFASTINT (marker, 0);
2855                 else
2856                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2857               }
2858
2859             CHECK_NUMBER_COERCE_MARKER (marker);
2860             from = XINT (marker);
2861             list = Fcdr (list);
2862
2863             marker = Fcar (list);
2864             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2865               XSETFASTINT (marker, 0);
2866
2867             CHECK_NUMBER_COERCE_MARKER (marker);
2868             search_regs.start[i] = from;
2869             search_regs.end[i] = XINT (marker);
2870           }
2871         list = Fcdr (list);
2872       }
2873
2874     for (; i < search_regs.num_regs; i++)
2875       search_regs.start[i] = -1;
2876   }
2877
2878   return Qnil;
2879 }
2880
2881 /* If non-zero the match data have been saved in saved_search_regs
2882    during the execution of a sentinel or filter. */
2883 static int search_regs_saved;
2884 static struct re_registers saved_search_regs;
2885 static Lisp_Object saved_last_thing_searched;
2886
2887 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2888    if asynchronous code (filter or sentinel) is running. */
2889 static void
2890 save_search_regs ()
2891 {
2892   if (!search_regs_saved)
2893     {
2894       saved_search_regs.num_regs = search_regs.num_regs;
2895       saved_search_regs.start = search_regs.start;
2896       saved_search_regs.end = search_regs.end;
2897       saved_last_thing_searched = last_thing_searched;
2898       last_thing_searched = Qnil;
2899       search_regs.num_regs = 0;
2900       search_regs.start = 0;
2901       search_regs.end = 0;
2902
2903       search_regs_saved = 1;
2904     }
2905 }
2906
2907 /* Called upon exit from filters and sentinels. */
2908 void
2909 restore_match_data ()
2910 {
2911   if (search_regs_saved)
2912     {
2913       if (search_regs.num_regs > 0)
2914         {
2915           xfree (search_regs.start);
2916           xfree (search_regs.end);
2917         }
2918       search_regs.num_regs = saved_search_regs.num_regs;
2919       search_regs.start = saved_search_regs.start;
2920       search_regs.end = saved_search_regs.end;
2921       last_thing_searched = saved_last_thing_searched;
2922       saved_last_thing_searched = Qnil;
2923       search_regs_saved = 0;
2924     }
2925 }
2926
2927 /* Quote a string to inactivate reg-expr chars */
2928
2929 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2930        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2931      (string)
2932      Lisp_Object string;
2933 {
2934   register unsigned char *in, *out, *end;
2935   register unsigned char *temp;
2936   int backslashes_added = 0;
2937
2938   CHECK_STRING (string);
2939
2940   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2941
2942   /* Now copy the data into the new string, inserting escapes. */
2943
2944   in = SDATA (string);
2945   end = in + SBYTES (string);
2946   out = temp;
2947
2948   for (; in != end; in++)
2949     {
2950       if (*in == '[' || *in == ']'
2951           || *in == '*' || *in == '.' || *in == '\\'
2952           || *in == '?' || *in == '+'
2953           || *in == '^' || *in == '$')
2954         *out++ = '\\', backslashes_added++;
2955       *out++ = *in;
2956     }
2957
2958   return make_specified_string (temp,
2959                                 SCHARS (string) + backslashes_added,
2960                                 out - temp,
2961                                 STRING_MULTIBYTE (string));
2962 }
2963 \f
2964 void
2965 syms_of_search ()
2966 {
2967   register int i;
2968
2969   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2970     {
2971       searchbufs[i].buf.allocated = 100;
2972       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
2973       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2974       searchbufs[i].regexp = Qnil;
2975       staticpro (&searchbufs[i].regexp);
2976       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2977     }
2978   searchbuf_head = &searchbufs[0];
2979
2980   Qsearch_failed = intern ("search-failed");
2981   staticpro (&Qsearch_failed);
2982   Qinvalid_regexp = intern ("invalid-regexp");
2983   staticpro (&Qinvalid_regexp);
2984
2985   Fput (Qsearch_failed, Qerror_conditions,
2986         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2987   Fput (Qsearch_failed, Qerror_message,
2988         build_string ("Search failed"));
2989
2990   Fput (Qinvalid_regexp, Qerror_conditions,
2991         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2992   Fput (Qinvalid_regexp, Qerror_message,
2993         build_string ("Invalid regexp"));
2994
2995   last_thing_searched = Qnil;
2996   staticpro (&last_thing_searched);
2997
2998   saved_last_thing_searched = Qnil;
2999   staticpro (&saved_last_thing_searched);
3000
3001   defsubr (&Slooking_at);
3002   defsubr (&Sposix_looking_at);
3003   defsubr (&Sstring_match);
3004   defsubr (&Sposix_string_match);
3005   defsubr (&Ssearch_forward);
3006   defsubr (&Ssearch_backward);
3007   defsubr (&Sword_search_forward);
3008   defsubr (&Sword_search_backward);
3009   defsubr (&Sre_search_forward);
3010   defsubr (&Sre_search_backward);
3011   defsubr (&Sposix_search_forward);
3012   defsubr (&Sposix_search_backward);
3013   defsubr (&Sreplace_match);
3014   defsubr (&Smatch_beginning);
3015   defsubr (&Smatch_end);
3016   defsubr (&Smatch_data);
3017   defsubr (&Sset_match_data);
3018   defsubr (&Sregexp_quote);
3019 }
3020
3021 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3022    (do not change this comment) */