src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87 static int simple_search ();
  88 static int boyer_moore ();
  89 static int search_buffer ();
  90
  91 static void
  92 matcher_overflow ()
  93 {
  94   error ("Stack overflow in regexp matcher");
  95 }
  96
  97 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  98    PATTERN is the pattern to compile.
  99    CP is the place to put the result.
 100    TRANSLATE is a translation table for ignoring case, or nil for none.
 101    REGP is the structure that says where to store the "register"
 102    values that will result from matching this pattern.
 103    If it is 0, we should compile the pattern not to record any
 104    subexpression bounds.
 105    POSIX is nonzero if we want full backtracking (POSIX style)
 106    for this pattern.  0 means backtrack only enough to get a valid match.
 107    MULTIBYTE is nonzero if we want to handle multibyte characters in
 108    PATTERN.  0 means all multibyte characters are recognized just as
 109    sequences of binary data.  */
 110
 111 static void
 112 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 113      struct regexp_cache *cp;
 114      Lisp_Object pattern;
 115      Lisp_Object translate;
 116      struct re_registers *regp;
 117      int posix;
 118      int multibyte;
 119 {
 120   unsigned char *raw_pattern;
 121   int raw_pattern_size;
 122   char *val;
 123   reg_syntax_t old;
 124
 125   /* MULTIBYTE says whether the text to be searched is multibyte.
 126      We must convert PATTERN to match that, or we will not really
 127      find things right.  */
 128
 129   if (multibyte == STRING_MULTIBYTE (pattern))
 130     {
 131       raw_pattern = (unsigned char *) XSTRING (pattern)->data;
 132       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 133     }
 134   else if (multibyte)
 135     {
 136       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 137                                                   XSTRING (pattern)->size);
 138       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 139       copy_text (XSTRING (pattern)->data, raw_pattern,
 140                  XSTRING (pattern)->size, 0, 1);
 141     }
 142   else
 143     {
 144       /* Converting multibyte to single-byte.
 145
 146          ??? Perhaps this conversion should be done in a special way
 147          by subtracting nonascii-insert-offset from each non-ASCII char,
 148          so that only the multibyte chars which really correspond to
 149          the chosen single-byte character set can possibly match.  */
 150       raw_pattern_size = XSTRING (pattern)->size;
 151       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 152       copy_text (XSTRING (pattern)->data, raw_pattern,
 153                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 154     }
 155
 156   cp->regexp = Qnil;
 157   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 158   cp->posix = posix;
 159   cp->buf.multibyte = multibyte;
 160   BLOCK_INPUT;
 161   old = re_set_syntax (RE_SYNTAX_EMACS
 162                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 163   val = (char *) re_compile_pattern ((char *)raw_pattern,
 164                                      raw_pattern_size, &cp->buf);
 165   re_set_syntax (old);
 166   UNBLOCK_INPUT;
 167   if (val)
 168     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 169
 170   cp->regexp = Fcopy_sequence (pattern);
 171 }
 172
 173 /* Shrink each compiled regexp buffer in the cache
 174    to the size actually used right now.
 175    This is called from garbage collection.  */
 176
 177 void
 178 shrink_regexp_cache ()
 179 {
 180   struct regexp_cache *cp;
 181
 182   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 183     {
 184       cp->buf.allocated = cp->buf.used;
 185       cp->buf.buffer
 186         = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
 187     }
 188 }
 189
 190 /* Compile a regexp if necessary, but first check to see if there's one in
 191    the cache.
 192    PATTERN is the pattern to compile.
 193    TRANSLATE is a translation table for ignoring case, or nil for none.
 194    REGP is the structure that says where to store the "register"
 195    values that will result from matching this pattern.
 196    If it is 0, we should compile the pattern not to record any
 197    subexpression bounds.
 198    POSIX is nonzero if we want full backtracking (POSIX style)
 199    for this pattern.  0 means backtrack only enough to get a valid match.  */
 200
 201 struct re_pattern_buffer *
 202 compile_pattern (pattern, regp, translate, posix, multibyte)
 203      Lisp_Object pattern;
 204      struct re_registers *regp;
 205      Lisp_Object translate;
 206      int posix, multibyte;
 207 {
 208   struct regexp_cache *cp, **cpp;
 209
 210   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 211     {
 212       cp = *cpp;
 213       /* Entries are initialized to nil, and may be set to nil by
 214          compile_pattern_1 if the pattern isn't valid.  Don't apply
 215          XSTRING in those cases.  However, compile_pattern_1 is only
 216          applied to the cache entry we pick here to reuse.  So nil
 217          should never appear before a non-nil entry.  */
 218       if (NILP (cp->regexp))
 219         goto compile_it;
 220       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 221           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 222           && !NILP (Fstring_equal (cp->regexp, pattern))
 223           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 224           && cp->posix == posix
 225           && cp->buf.multibyte == multibyte)
 226         break;
 227
 228       /* If we're at the end of the cache, compile into the nil cell
 229          we found, or the last (least recently used) cell with a
 230          string value.  */
 231       if (cp->next == 0)
 232         {
 233         compile_it:
 234           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 235           break;
 236         }
 237     }
 238
 239   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 240      either because we found it in the cache or because we just compiled it.
 241      Move it to the front of the queue to mark it as most recently used.  */
 242   *cpp = cp->next;
 243   cp->next = searchbuf_head;
 244   searchbuf_head = cp;
 245
 246   /* Advise the searching functions about the space we have allocated
 247      for register data.  */
 248   if (regp)
 249     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 250
 251   return &cp->buf;
 252 }
 253
 254 /* Error condition used for failing searches */
 255 Lisp_Object Qsearch_failed;
 256
 257 Lisp_Object
 258 signal_failure (arg)
 259      Lisp_Object arg;
 260 {
 261   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 262   return Qnil;
 263 }
 264 \f
 265 static Lisp_Object
 266 looking_at_1 (string, posix)
 267      Lisp_Object string;
 268      int posix;
 269 {
 270   Lisp_Object val;
 271   unsigned char *p1, *p2;
 272   int s1, s2;
 273   register int i;
 274   struct re_pattern_buffer *bufp;
 275
 276   if (running_asynch_code)
 277     save_search_regs ();
 278
 279   CHECK_STRING (string);
 280   bufp = compile_pattern (string, &search_regs,
 281                           (!NILP (current_buffer->case_fold_search)
 282                            ? DOWNCASE_TABLE : Qnil),
 283                           posix,
 284                           !NILP (current_buffer->enable_multibyte_characters));
 285
 286   immediate_quit = 1;
 287   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 288
 289   /* Get pointers and sizes of the two strings
 290      that make up the visible portion of the buffer. */
 291
 292   p1 = BEGV_ADDR;
 293   s1 = GPT_BYTE - BEGV_BYTE;
 294   p2 = GAP_END_ADDR;
 295   s2 = ZV_BYTE - GPT_BYTE;
 296   if (s1 < 0)
 297     {
 298       p2 = p1;
 299       s2 = ZV_BYTE - BEGV_BYTE;
 300       s1 = 0;
 301     }
 302   if (s2 < 0)
 303     {
 304       s1 = ZV_BYTE - BEGV_BYTE;
 305       s2 = 0;
 306     }
 307
 308   re_match_object = Qnil;
 309
 310   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 311                   PT_BYTE - BEGV_BYTE, &search_regs,
 312                   ZV_BYTE - BEGV_BYTE);
 313   immediate_quit = 0;
 314
 315   if (i == -2)
 316     matcher_overflow ();
 317
 318   val = (0 <= i ? Qt : Qnil);
 319   if (i >= 0)
 320     for (i = 0; i < search_regs.num_regs; i++)
 321       if (search_regs.start[i] >= 0)
 322         {
 323           search_regs.start[i]
 324             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 325           search_regs.end[i]
 326             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 327         }
 328   XSETBUFFER (last_thing_searched, current_buffer);
 329   return val;
 330 }
 331
 332 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 333        doc: /* Return t if text after point matches regular expression REGEXP.
 334 This function modifies the match data that `match-beginning',
 335 `match-end' and `match-data' access; save and restore the match
 336 data if you want to preserve them.  */)
 337      (regexp)
 338      Lisp_Object regexp;
 339 {
 340   return looking_at_1 (regexp, 0);
 341 }
 342
 343 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 344        doc: /* Return t if text after point matches regular expression REGEXP.
 345 Find the longest match, in accord with Posix regular expression rules.
 346 This function modifies the match data that `match-beginning',
 347 `match-end' and `match-data' access; save and restore the match
 348 data if you want to preserve them.  */)
 349      (regexp)
 350      Lisp_Object regexp;
 351 {
 352   return looking_at_1 (regexp, 1);
 353 }
 354 \f
 355 static Lisp_Object
 356 string_match_1 (regexp, string, start, posix)
 357      Lisp_Object regexp, string, start;
 358      int posix;
 359 {
 360   int val;
 361   struct re_pattern_buffer *bufp;
 362   int pos, pos_byte;
 363   int i;
 364
 365   if (running_asynch_code)
 366     save_search_regs ();
 367
 368   CHECK_STRING (regexp);
 369   CHECK_STRING (string);
 370
 371   if (NILP (start))
 372     pos = 0, pos_byte = 0;
 373   else
 374     {
 375       int len = XSTRING (string)->size;
 376
 377       CHECK_NUMBER (start);
 378       pos = XINT (start);
 379       if (pos < 0 && -pos <= len)
 380         pos = len + pos;
 381       else if (0 > pos || pos > len)
 382         args_out_of_range (string, start);
 383       pos_byte = string_char_to_byte (string, pos);
 384     }
 385
 386   bufp = compile_pattern (regexp, &search_regs,
 387                           (!NILP (current_buffer->case_fold_search)
 388                            ? DOWNCASE_TABLE : Qnil),
 389                           posix,
 390                           STRING_MULTIBYTE (string));
 391   immediate_quit = 1;
 392   re_match_object = string;
 393
 394   val = re_search (bufp, (char *) XSTRING (string)->data,
 395                    STRING_BYTES (XSTRING (string)), pos_byte,
 396                    STRING_BYTES (XSTRING (string)) - pos_byte,
 397                    &search_regs);
 398   immediate_quit = 0;
 399   last_thing_searched = Qt;
 400   if (val == -2)
 401     matcher_overflow ();
 402   if (val < 0) return Qnil;
 403
 404   for (i = 0; i < search_regs.num_regs; i++)
 405     if (search_regs.start[i] >= 0)
 406       {
 407         search_regs.start[i]
 408           = string_byte_to_char (string, search_regs.start[i]);
 409         search_regs.end[i]
 410           = string_byte_to_char (string, search_regs.end[i]);
 411       }
 412
 413   return make_number (string_byte_to_char (string, val));
 414 }
 415
 416 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 417        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 418 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 419 If third arg START is non-nil, start search at that index in STRING.
 420 For index of first char beyond the match, do (match-end 0).
 421 `match-end' and `match-beginning' also give indices of substrings
 422 matched by parenthesis constructs in the pattern.  */)
 423      (regexp, string, start)
 424      Lisp_Object regexp, string, start;
 425 {
 426   return string_match_1 (regexp, string, start, 0);
 427 }
 428
 429 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 430        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 431 Find the longest match, in accord with Posix regular expression rules.
 432 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 433 If third arg START is non-nil, start search at that index in STRING.
 434 For index of first char beyond the match, do (match-end 0).
 435 `match-end' and `match-beginning' also give indices of substrings
 436 matched by parenthesis constructs in the pattern.  */)
 437      (regexp, string, start)
 438      Lisp_Object regexp, string, start;
 439 {
 440   return string_match_1 (regexp, string, start, 1);
 441 }
 442
 443 /* Match REGEXP against STRING, searching all of STRING,
 444    and return the index of the match, or negative on failure.
 445    This does not clobber the match data.  */
 446
 447 int
 448 fast_string_match (regexp, string)
 449      Lisp_Object regexp, string;
 450 {
 451   int val;
 452   struct re_pattern_buffer *bufp;
 453
 454   bufp = compile_pattern (regexp, 0, Qnil,
 455                           0, STRING_MULTIBYTE (string));
 456   immediate_quit = 1;
 457   re_match_object = string;
 458
 459   val = re_search (bufp, (char *) XSTRING (string)->data,
 460                    STRING_BYTES (XSTRING (string)), 0,
 461                    STRING_BYTES (XSTRING (string)), 0);
 462   immediate_quit = 0;
 463   return val;
 464 }
 465
 466 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 467    and return the index of the match, or negative on failure.
 468    This does not clobber the match data.
 469    We assume that STRING contains single-byte characters.  */
 470
 471 extern Lisp_Object Vascii_downcase_table;
 472
 473 int
 474 fast_c_string_match_ignore_case (regexp, string)
 475      Lisp_Object regexp;
 476      char *string;
 477 {
 478   int val;
 479   struct re_pattern_buffer *bufp;
 480   int len = strlen (string);
 481
 482   regexp = string_make_unibyte (regexp);
 483   re_match_object = Qt;
 484   bufp = compile_pattern (regexp, 0,
 485                           Vascii_downcase_table, 0,
 486                           0);
 487   immediate_quit = 1;
 488   val = re_search (bufp, string, len, 0, len, 0);
 489   immediate_quit = 0;
 490   return val;
 491 }
 492 \f
 493 /* The newline cache: remembering which sections of text have no newlines.  */
 494
 495 /* If the user has requested newline caching, make sure it's on.
 496    Otherwise, make sure it's off.
 497    This is our cheezy way of associating an action with the change of
 498    state of a buffer-local variable.  */
 499 static void
 500 newline_cache_on_off (buf)
 501      struct buffer *buf;
 502 {
 503   if (NILP (buf->cache_long_line_scans))
 504     {
 505       /* It should be off.  */
 506       if (buf->newline_cache)
 507         {
 508           free_region_cache (buf->newline_cache);
 509           buf->newline_cache = 0;
 510         }
 511     }
 512   else
 513     {
 514       /* It should be on.  */
 515       if (buf->newline_cache == 0)
 516         buf->newline_cache = new_region_cache ();
 517     }
 518 }
 519
 520 \f
 521 /* Search for COUNT instances of the character TARGET between START and END.
 522
 523    If COUNT is positive, search forwards; END must be >= START.
 524    If COUNT is negative, search backwards for the -COUNTth instance;
 525       END must be <= START.
 526    If COUNT is zero, do anything you please; run rogue, for all I care.
 527
 528    If END is zero, use BEGV or ZV instead, as appropriate for the
 529    direction indicated by COUNT.
 530
 531    If we find COUNT instances, set *SHORTAGE to zero, and return the
 532    position after the COUNTth match.  Note that for reverse motion
 533    this is not the same as the usual convention for Emacs motion commands.
 534
 535    If we don't find COUNT instances before reaching END, set *SHORTAGE
 536    to the number of TARGETs left unfound, and return END.
 537
 538    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 539    except when inside redisplay.  */
 540
 541 int
 542 scan_buffer (target, start, end, count, shortage, allow_quit)
 543      register int target;
 544      int start, end;
 545      int count;
 546      int *shortage;
 547      int allow_quit;
 548 {
 549   struct region_cache *newline_cache;
 550   int direction;
 551
 552   if (count > 0)
 553     {
 554       direction = 1;
 555       if (! end) end = ZV;
 556     }
 557   else
 558     {
 559       direction = -1;
 560       if (! end) end = BEGV;
 561     }
 562
 563   newline_cache_on_off (current_buffer);
 564   newline_cache = current_buffer->newline_cache;
 565
 566   if (shortage != 0)
 567     *shortage = 0;
 568
 569   immediate_quit = allow_quit;
 570
 571   if (count > 0)
 572     while (start != end)
 573       {
 574         /* Our innermost scanning loop is very simple; it doesn't know
 575            about gaps, buffer ends, or the newline cache.  ceiling is
 576            the position of the last character before the next such
 577            obstacle --- the last character the dumb search loop should
 578            examine.  */
 579         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 580         int start_byte = CHAR_TO_BYTE (start);
 581         int tem;
 582
 583         /* If we're looking for a newline, consult the newline cache
 584            to see where we can avoid some scanning.  */
 585         if (target == '\n' && newline_cache)
 586           {
 587             int next_change;
 588             immediate_quit = 0;
 589             while (region_cache_forward
 590                    (current_buffer, newline_cache, start_byte, &next_change))
 591               start_byte = next_change;
 592             immediate_quit = allow_quit;
 593
 594             /* START should never be after END.  */
 595             if (start_byte > ceiling_byte)
 596               start_byte = ceiling_byte;
 597
 598             /* Now the text after start is an unknown region, and
 599                next_change is the position of the next known region. */
 600             ceiling_byte = min (next_change - 1, ceiling_byte);
 601           }
 602
 603         /* The dumb loop can only scan text stored in contiguous
 604            bytes. BUFFER_CEILING_OF returns the last character
 605            position that is contiguous, so the ceiling is the
 606            position after that.  */
 607         tem = BUFFER_CEILING_OF (start_byte);
 608         ceiling_byte = min (tem, ceiling_byte);
 609
 610         {
 611           /* The termination address of the dumb loop.  */
 612           register unsigned char *ceiling_addr
 613             = BYTE_POS_ADDR (ceiling_byte) + 1;
 614           register unsigned char *cursor
 615             = BYTE_POS_ADDR (start_byte);
 616           unsigned char *base = cursor;
 617
 618           while (cursor < ceiling_addr)
 619             {
 620               unsigned char *scan_start = cursor;
 621
 622               /* The dumb loop.  */
 623               while (*cursor != target && ++cursor < ceiling_addr)
 624                 ;
 625
 626               /* If we're looking for newlines, cache the fact that
 627                  the region from start to cursor is free of them. */
 628               if (target == '\n' && newline_cache)
 629                 know_region_cache (current_buffer, newline_cache,
 630                                    start_byte + scan_start - base,
 631                                    start_byte + cursor - base);
 632
 633               /* Did we find the target character?  */
 634               if (cursor < ceiling_addr)
 635                 {
 636                   if (--count == 0)
 637                     {
 638                       immediate_quit = 0;
 639                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 640                     }
 641                   cursor++;
 642                 }
 643             }
 644
 645           start = BYTE_TO_CHAR (start_byte + cursor - base);
 646         }
 647       }
 648   else
 649     while (start > end)
 650       {
 651         /* The last character to check before the next obstacle.  */
 652         int ceiling_byte = CHAR_TO_BYTE (end);
 653         int start_byte = CHAR_TO_BYTE (start);
 654         int tem;
 655
 656         /* Consult the newline cache, if appropriate.  */
 657         if (target == '\n' && newline_cache)
 658           {
 659             int next_change;
 660             immediate_quit = 0;
 661             while (region_cache_backward
 662                    (current_buffer, newline_cache, start_byte, &next_change))
 663               start_byte = next_change;
 664             immediate_quit = allow_quit;
 665
 666             /* Start should never be at or before end.  */
 667             if (start_byte <= ceiling_byte)
 668               start_byte = ceiling_byte + 1;
 669
 670             /* Now the text before start is an unknown region, and
 671                next_change is the position of the next known region. */
 672             ceiling_byte = max (next_change, ceiling_byte);
 673           }
 674
 675         /* Stop scanning before the gap.  */
 676         tem = BUFFER_FLOOR_OF (start_byte - 1);
 677         ceiling_byte = max (tem, ceiling_byte);
 678
 679         {
 680           /* The termination address of the dumb loop.  */
 681           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 682           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 683           unsigned char *base = cursor;
 684
 685           while (cursor >= ceiling_addr)
 686             {
 687               unsigned char *scan_start = cursor;
 688
 689               while (*cursor != target && --cursor >= ceiling_addr)
 690                 ;
 691
 692               /* If we're looking for newlines, cache the fact that
 693                  the region from after the cursor to start is free of them.  */
 694               if (target == '\n' && newline_cache)
 695                 know_region_cache (current_buffer, newline_cache,
 696                                    start_byte + cursor - base,
 697                                    start_byte + scan_start - base);
 698
 699               /* Did we find the target character?  */
 700               if (cursor >= ceiling_addr)
 701                 {
 702                   if (++count >= 0)
 703                     {
 704                       immediate_quit = 0;
 705                       return BYTE_TO_CHAR (start_byte + cursor - base);
 706                     }
 707                   cursor--;
 708                 }
 709             }
 710
 711           start = BYTE_TO_CHAR (start_byte + cursor - base);
 712         }
 713       }
 714
 715   immediate_quit = 0;
 716   if (shortage != 0)
 717     *shortage = count * direction;
 718   return start;
 719 }
 720 \f
 721 /* Search for COUNT instances of a line boundary, which means either a
 722    newline or (if selective display enabled) a carriage return.
 723    Start at START.  If COUNT is negative, search backwards.
 724
 725    We report the resulting position by calling TEMP_SET_PT_BOTH.
 726
 727    If we find COUNT instances. we position after (always after,
 728    even if scanning backwards) the COUNTth match, and return 0.
 729
 730    If we don't find COUNT instances before reaching the end of the
 731    buffer (or the beginning, if scanning backwards), we return
 732    the number of line boundaries left unfound, and position at
 733    the limit we bumped up against.
 734
 735    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 736    except in special cases.  */
 737
 738 int
 739 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 740      int start, start_byte;
 741      int limit, limit_byte;
 742      register int count;
 743      int allow_quit;
 744 {
 745   int direction = ((count > 0) ? 1 : -1);
 746
 747   register unsigned char *cursor;
 748   unsigned char *base;
 749
 750   register int ceiling;
 751   register unsigned char *ceiling_addr;
 752
 753   int old_immediate_quit = immediate_quit;
 754
 755   /* The code that follows is like scan_buffer
 756      but checks for either newline or carriage return.  */
 757
 758   if (allow_quit)
 759     immediate_quit++;
 760
 761   start_byte = CHAR_TO_BYTE (start);
 762
 763   if (count > 0)
 764     {
 765       while (start_byte < limit_byte)
 766         {
 767           ceiling =  BUFFER_CEILING_OF (start_byte);
 768           ceiling = min (limit_byte - 1, ceiling);
 769           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 770           base = (cursor = BYTE_POS_ADDR (start_byte));
 771           while (1)
 772             {
 773               while (*cursor != '\n' && ++cursor != ceiling_addr)
 774                 ;
 775
 776               if (cursor != ceiling_addr)
 777                 {
 778                   if (--count == 0)
 779                     {
 780                       immediate_quit = old_immediate_quit;
 781                       start_byte = start_byte + cursor - base + 1;
 782                       start = BYTE_TO_CHAR (start_byte);
 783                       TEMP_SET_PT_BOTH (start, start_byte);
 784                       return 0;
 785                     }
 786                   else
 787                     if (++cursor == ceiling_addr)
 788                       break;
 789                 }
 790               else
 791                 break;
 792             }
 793           start_byte += cursor - base;
 794         }
 795     }
 796   else
 797     {
 798       while (start_byte > limit_byte)
 799         {
 800           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 801           ceiling = max (limit_byte, ceiling);
 802           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 803           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 804           while (1)
 805             {
 806               while (--cursor != ceiling_addr && *cursor != '\n')
 807                 ;
 808
 809               if (cursor != ceiling_addr)
 810                 {
 811                   if (++count == 0)
 812                     {
 813                       immediate_quit = old_immediate_quit;
 814                       /* Return the position AFTER the match we found.  */
 815                       start_byte = start_byte + cursor - base + 1;
 816                       start = BYTE_TO_CHAR (start_byte);
 817                       TEMP_SET_PT_BOTH (start, start_byte);
 818                       return 0;
 819                     }
 820                 }
 821               else
 822                 break;
 823             }
 824           /* Here we add 1 to compensate for the last decrement
 825              of CURSOR, which took it past the valid range.  */
 826           start_byte += cursor - base + 1;
 827         }
 828     }
 829
 830   TEMP_SET_PT_BOTH (limit, limit_byte);
 831   immediate_quit = old_immediate_quit;
 832
 833   return count * direction;
 834 }
 835
 836 int
 837 find_next_newline_no_quit (from, cnt)
 838      register int from, cnt;
 839 {
 840   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 841 }
 842
 843 /* Like find_next_newline, but returns position before the newline,
 844    not after, and only search up to TO.  This isn't just
 845    find_next_newline (...)-1, because you might hit TO.  */
 846
 847 int
 848 find_before_next_newline (from, to, cnt)
 849      int from, to, cnt;
 850 {
 851   int shortage;
 852   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 853
 854   if (shortage == 0)
 855     pos--;
 856
 857   return pos;
 858 }
 859 \f
 860 /* Subroutines of Lisp buffer search functions. */
 861
 862 static Lisp_Object
 863 search_command (string, bound, noerror, count, direction, RE, posix)
 864      Lisp_Object string, bound, noerror, count;
 865      int direction;
 866      int RE;
 867      int posix;
 868 {
 869   register int np;
 870   int lim, lim_byte;
 871   int n = direction;
 872
 873   if (!NILP (count))
 874     {
 875       CHECK_NUMBER (count);
 876       n *= XINT (count);
 877     }
 878
 879   CHECK_STRING (string);
 880   if (NILP (bound))
 881     {
 882       if (n > 0)
 883         lim = ZV, lim_byte = ZV_BYTE;
 884       else
 885         lim = BEGV, lim_byte = BEGV_BYTE;
 886     }
 887   else
 888     {
 889       CHECK_NUMBER_COERCE_MARKER (bound);
 890       lim = XINT (bound);
 891       if (n > 0 ? lim < PT : lim > PT)
 892         error ("Invalid search bound (wrong side of point)");
 893       if (lim > ZV)
 894         lim = ZV, lim_byte = ZV_BYTE;
 895       else if (lim < BEGV)
 896         lim = BEGV, lim_byte = BEGV_BYTE;
 897       else
 898         lim_byte = CHAR_TO_BYTE (lim);
 899     }
 900
 901   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 902                       (!NILP (current_buffer->case_fold_search)
 903                        ? current_buffer->case_canon_table
 904                        : Qnil),
 905                       (!NILP (current_buffer->case_fold_search)
 906                        ? current_buffer->case_eqv_table
 907                        : Qnil),
 908                       posix);
 909   if (np <= 0)
 910     {
 911       if (NILP (noerror))
 912         return signal_failure (string);
 913       if (!EQ (noerror, Qt))
 914         {
 915           if (lim < BEGV || lim > ZV)
 916             abort ();
 917           SET_PT_BOTH (lim, lim_byte);
 918           return Qnil;
 919 #if 0 /* This would be clean, but maybe programs depend on
 920          a value of nil here.  */
 921           np = lim;
 922 #endif
 923         }
 924       else
 925         return Qnil;
 926     }
 927
 928   if (np < BEGV || np > ZV)
 929     abort ();
 930
 931   SET_PT (np);
 932
 933   return make_number (np);
 934 }
 935 \f
 936 /* Return 1 if REGEXP it matches just one constant string.  */
 937
 938 static int
 939 trivial_regexp_p (regexp)
 940      Lisp_Object regexp;
 941 {
 942   int len = STRING_BYTES (XSTRING (regexp));
 943   unsigned char *s = XSTRING (regexp)->data;
 944   while (--len >= 0)
 945     {
 946       switch (*s++)
 947         {
 948         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 949           return 0;
 950         case '\\':
 951           if (--len < 0)
 952             return 0;
 953           switch (*s++)
 954             {
 955             case '|': case '(': case ')': case '`': case '\'': case 'b':
 956             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 957             case 'S': case '=': case '{': case '}':
 958             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 959             case '1': case '2': case '3': case '4': case '5':
 960             case '6': case '7': case '8': case '9':
 961               return 0;
 962             }
 963         }
 964     }
 965   return 1;
 966 }
 967
 968 /* Search for the n'th occurrence of STRING in the current buffer,
 969    starting at position POS and stopping at position LIM,
 970    treating STRING as a literal string if RE is false or as
 971    a regular expression if RE is true.
 972
 973    If N is positive, searching is forward and LIM must be greater than POS.
 974    If N is negative, searching is backward and LIM must be less than POS.
 975
 976    Returns -x if x occurrences remain to be found (x > 0),
 977    or else the position at the beginning of the Nth occurrence
 978    (if searching backward) or the end (if searching forward).
 979
 980    POSIX is nonzero if we want full backtracking (POSIX style)
 981    for this pattern.  0 means backtrack only enough to get a valid match.  */
 982
 983 #define TRANSLATE(out, trt, d)                  \
 984 do                                              \
 985   {                                             \
 986     if (! NILP (trt))                           \
 987       {                                         \
 988         Lisp_Object temp;                       \
 989         temp = Faref (trt, make_number (d));    \
 990         if (INTEGERP (temp))                    \
 991           out = XINT (temp);                    \
 992         else                                    \
 993           out = d;                              \
 994       }                                         \
 995     else                                        \
 996       out = d;                                  \
 997   }                                             \
 998 while (0)
 999
1000 static int
1001 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1002                RE, trt, inverse_trt, posix)
1003      Lisp_Object string;
1004      int pos;
1005      int pos_byte;
1006      int lim;
1007      int lim_byte;
1008      int n;
1009      int RE;
1010      Lisp_Object trt;
1011      Lisp_Object inverse_trt;
1012      int posix;
1013 {
1014   int len = XSTRING (string)->size;
1015   int len_byte = STRING_BYTES (XSTRING (string));
1016   register int i;
1017
1018   if (running_asynch_code)
1019     save_search_regs ();
1020
1021   /* Searching 0 times means don't move.  */
1022   /* Null string is found at starting position.  */
1023   if (len == 0 || n == 0)
1024     {
1025       set_search_regs (pos_byte, 0);
1026       return pos;
1027     }
1028
1029   if (RE && !trivial_regexp_p (string))
1030     {
1031       unsigned char *p1, *p2;
1032       int s1, s2;
1033       struct re_pattern_buffer *bufp;
1034
1035       bufp = compile_pattern (string, &search_regs, trt, posix,
1036                               !NILP (current_buffer->enable_multibyte_characters));
1037
1038       immediate_quit = 1;       /* Quit immediately if user types ^G,
1039                                    because letting this function finish
1040                                    can take too long. */
1041       QUIT;                     /* Do a pending quit right away,
1042                                    to avoid paradoxical behavior */
1043       /* Get pointers and sizes of the two strings
1044          that make up the visible portion of the buffer. */
1045
1046       p1 = BEGV_ADDR;
1047       s1 = GPT_BYTE - BEGV_BYTE;
1048       p2 = GAP_END_ADDR;
1049       s2 = ZV_BYTE - GPT_BYTE;
1050       if (s1 < 0)
1051         {
1052           p2 = p1;
1053           s2 = ZV_BYTE - BEGV_BYTE;
1054           s1 = 0;
1055         }
1056       if (s2 < 0)
1057         {
1058           s1 = ZV_BYTE - BEGV_BYTE;
1059           s2 = 0;
1060         }
1061       re_match_object = Qnil;
1062
1063       while (n < 0)
1064         {
1065           int val;
1066           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1067                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1068                              &search_regs,
1069                              /* Don't allow match past current point */
1070                              pos_byte - BEGV_BYTE);
1071           if (val == -2)
1072             {
1073               matcher_overflow ();
1074             }
1075           if (val >= 0)
1076             {
1077               pos_byte = search_regs.start[0] + BEGV_BYTE;
1078               for (i = 0; i < search_regs.num_regs; i++)
1079                 if (search_regs.start[i] >= 0)
1080                   {
1081                     search_regs.start[i]
1082                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1083                     search_regs.end[i]
1084                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1085                   }
1086               XSETBUFFER (last_thing_searched, current_buffer);
1087               /* Set pos to the new position. */
1088               pos = search_regs.start[0];
1089             }
1090           else
1091             {
1092               immediate_quit = 0;
1093               return (n);
1094             }
1095           n++;
1096         }
1097       while (n > 0)
1098         {
1099           int val;
1100           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1101                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1102                              &search_regs,
1103                              lim_byte - BEGV_BYTE);
1104           if (val == -2)
1105             {
1106               matcher_overflow ();
1107             }
1108           if (val >= 0)
1109             {
1110               pos_byte = search_regs.end[0] + BEGV_BYTE;
1111               for (i = 0; i < search_regs.num_regs; i++)
1112                 if (search_regs.start[i] >= 0)
1113                   {
1114                     search_regs.start[i]
1115                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1116                     search_regs.end[i]
1117                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1118                   }
1119               XSETBUFFER (last_thing_searched, current_buffer);
1120               pos = search_regs.end[0];
1121             }
1122           else
1123             {
1124               immediate_quit = 0;
1125               return (0 - n);
1126             }
1127           n--;
1128         }
1129       immediate_quit = 0;
1130       return (pos);
1131     }
1132   else                          /* non-RE case */
1133     {
1134       unsigned char *raw_pattern, *pat;
1135       int raw_pattern_size;
1136       int raw_pattern_size_byte;
1137       unsigned char *patbuf;
1138       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1139       unsigned char *base_pat = XSTRING (string)->data;
1140       int charset_base = -1;
1141       int boyer_moore_ok = 1;
1142
1143       /* MULTIBYTE says whether the text to be searched is multibyte.
1144          We must convert PATTERN to match that, or we will not really
1145          find things right.  */
1146
1147       if (multibyte == STRING_MULTIBYTE (string))
1148         {
1149           raw_pattern = (unsigned char *) XSTRING (string)->data;
1150           raw_pattern_size = XSTRING (string)->size;
1151           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1152         }
1153       else if (multibyte)
1154         {
1155           raw_pattern_size = XSTRING (string)->size;
1156           raw_pattern_size_byte
1157             = count_size_as_multibyte (XSTRING (string)->data,
1158                                        raw_pattern_size);
1159           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1160           copy_text (XSTRING (string)->data, raw_pattern,
1161                      XSTRING (string)->size, 0, 1);
1162         }
1163       else
1164         {
1165           /* Converting multibyte to single-byte.
1166
1167              ??? Perhaps this conversion should be done in a special way
1168              by subtracting nonascii-insert-offset from each non-ASCII char,
1169              so that only the multibyte chars which really correspond to
1170              the chosen single-byte character set can possibly match.  */
1171           raw_pattern_size = XSTRING (string)->size;
1172           raw_pattern_size_byte = XSTRING (string)->size;
1173           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1174           copy_text (XSTRING (string)->data, raw_pattern,
1175                      STRING_BYTES (XSTRING (string)), 1, 0);
1176         }
1177
1178       /* Copy and optionally translate the pattern.  */
1179       len = raw_pattern_size;
1180       len_byte = raw_pattern_size_byte;
1181       patbuf = (unsigned char *) alloca (len_byte);
1182       pat = patbuf;
1183       base_pat = raw_pattern;
1184       if (multibyte)
1185         {
1186           while (--len >= 0)
1187             {
1188               unsigned char str[MAX_MULTIBYTE_LENGTH];
1189               int c, translated, inverse;
1190               int in_charlen, charlen;
1191
1192               /* If we got here and the RE flag is set, it's because we're
1193                  dealing with a regexp known to be trivial, so the backslash
1194                  just quotes the next character.  */
1195               if (RE && *base_pat == '\\')
1196                 {
1197                   len--;
1198                   len_byte--;
1199                   base_pat++;
1200                 }
1201
1202               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1203
1204               /* Translate the character, if requested.  */
1205               TRANSLATE (translated, trt, c);
1206               /* If translation changed the byte-length, go back
1207                  to the original character.  */
1208               charlen = CHAR_STRING (translated, str);
1209               if (in_charlen != charlen)
1210                 {
1211                   translated = c;
1212                   charlen = CHAR_STRING (c, str);
1213                 }
1214
1215               /* If we are searching for something strange,
1216                  an invalid multibyte code, don't use boyer-moore.  */
1217               if (! ASCII_BYTE_P (translated)
1218                   && (charlen == 1 /* 8bit code */
1219                       || charlen != in_charlen /* invalid multibyte code */
1220                       ))
1221                 boyer_moore_ok = 0;
1222
1223               TRANSLATE (inverse, inverse_trt, c);
1224
1225               /* Did this char actually get translated?
1226                  Would any other char get translated into it?  */
1227               if (translated != c || inverse != c)
1228                 {
1229                   /* Keep track of which character set row
1230                      contains the characters that need translation.  */
1231                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1232                   if (charset_base == -1)
1233                     charset_base = charset_base_code;
1234                   else if (charset_base != charset_base_code)
1235                     /* If two different rows appear, needing translation,
1236                        then we cannot use boyer_moore search.  */
1237                     boyer_moore_ok = 0;
1238                 }
1239
1240               /* Store this character into the translated pattern.  */
1241               bcopy (str, pat, charlen);
1242               pat += charlen;
1243               base_pat += in_charlen;
1244               len_byte -= in_charlen;
1245             }
1246         }
1247       else
1248         {
1249           /* Unibyte buffer.  */
1250           charset_base = 0;
1251           while (--len >= 0)
1252             {
1253               int c, translated;
1254
1255               /* If we got here and the RE flag is set, it's because we're
1256                  dealing with a regexp known to be trivial, so the backslash
1257                  just quotes the next character.  */
1258               if (RE && *base_pat == '\\')
1259                 {
1260                   len--;
1261                   base_pat++;
1262                 }
1263               c = *base_pat++;
1264               TRANSLATE (translated, trt, c);
1265               *pat++ = translated;
1266             }
1267         }
1268
1269       len_byte = pat - patbuf;
1270       len = raw_pattern_size;
1271       pat = base_pat = patbuf;
1272
1273       if (boyer_moore_ok)
1274         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1275                             pos, pos_byte, lim, lim_byte,
1276                             charset_base);
1277       else
1278         return simple_search (n, pat, len, len_byte, trt,
1279                               pos, pos_byte, lim, lim_byte);
1280     }
1281 }
1282 \f
1283 /* Do a simple string search N times for the string PAT,
1284    whose length is LEN/LEN_BYTE,
1285    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1286    TRT is the translation table.
1287
1288    Return the character position where the match is found.
1289    Otherwise, if M matches remained to be found, return -M.
1290
1291    This kind of search works regardless of what is in PAT and
1292    regardless of what is in TRT.  It is used in cases where
1293    boyer_moore cannot work.  */
1294
1295 static int
1296 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1297      int n;
1298      unsigned char *pat;
1299      int len, len_byte;
1300      Lisp_Object trt;
1301      int pos, pos_byte;
1302      int lim, lim_byte;
1303 {
1304   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1305   int forward = n > 0;
1306
1307   if (lim > pos && multibyte)
1308     while (n > 0)
1309       {
1310         while (1)
1311           {
1312             /* Try matching at position POS.  */
1313             int this_pos = pos;
1314             int this_pos_byte = pos_byte;
1315             int this_len = len;
1316             int this_len_byte = len_byte;
1317             unsigned char *p = pat;
1318             if (pos + len > lim)
1319               goto stop;
1320
1321             while (this_len > 0)
1322               {
1323                 int charlen, buf_charlen;
1324                 int pat_ch, buf_ch;
1325
1326                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1327                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1328                                                  ZV_BYTE - this_pos_byte,
1329                                                  buf_charlen);
1330                 TRANSLATE (buf_ch, trt, buf_ch);
1331
1332                 if (buf_ch != pat_ch)
1333                   break;
1334
1335                 this_len_byte -= charlen;
1336                 this_len--;
1337                 p += charlen;
1338
1339                 this_pos_byte += buf_charlen;
1340                 this_pos++;
1341               }
1342
1343             if (this_len == 0)
1344               {
1345                 pos += len;
1346                 pos_byte += len_byte;
1347                 break;
1348               }
1349
1350             INC_BOTH (pos, pos_byte);
1351           }
1352
1353         n--;
1354       }
1355   else if (lim > pos)
1356     while (n > 0)
1357       {
1358         while (1)
1359           {
1360             /* Try matching at position POS.  */
1361             int this_pos = pos;
1362             int this_len = len;
1363             unsigned char *p = pat;
1364
1365             if (pos + len > lim)
1366               goto stop;
1367
1368             while (this_len > 0)
1369               {
1370                 int pat_ch = *p++;
1371                 int buf_ch = FETCH_BYTE (this_pos);
1372                 TRANSLATE (buf_ch, trt, buf_ch);
1373
1374                 if (buf_ch != pat_ch)
1375                   break;
1376
1377                 this_len--;
1378                 this_pos++;
1379               }
1380
1381             if (this_len == 0)
1382               {
1383                 pos += len;
1384                 break;
1385               }
1386
1387             pos++;
1388           }
1389
1390         n--;
1391       }
1392   /* Backwards search.  */
1393   else if (lim < pos && multibyte)
1394     while (n < 0)
1395       {
1396         while (1)
1397           {
1398             /* Try matching at position POS.  */
1399             int this_pos = pos - len;
1400             int this_pos_byte = pos_byte - len_byte;
1401             int this_len = len;
1402             int this_len_byte = len_byte;
1403             unsigned char *p = pat;
1404
1405             if (pos - len < lim)
1406               goto stop;
1407
1408             while (this_len > 0)
1409               {
1410                 int charlen, buf_charlen;
1411                 int pat_ch, buf_ch;
1412
1413                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1414                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1415                                                  ZV_BYTE - this_pos_byte,
1416                                                  buf_charlen);
1417                 TRANSLATE (buf_ch, trt, buf_ch);
1418
1419                 if (buf_ch != pat_ch)
1420                   break;
1421
1422                 this_len_byte -= charlen;
1423                 this_len--;
1424                 p += charlen;
1425                 this_pos_byte += buf_charlen;
1426                 this_pos++;
1427               }
1428
1429             if (this_len == 0)
1430               {
1431                 pos -= len;
1432                 pos_byte -= len_byte;
1433                 break;
1434               }
1435
1436             DEC_BOTH (pos, pos_byte);
1437           }
1438
1439         n++;
1440       }
1441   else if (lim < pos)
1442     while (n < 0)
1443       {
1444         while (1)
1445           {
1446             /* Try matching at position POS.  */
1447             int this_pos = pos - len;
1448             int this_len = len;
1449             unsigned char *p = pat;
1450
1451             if (pos - len < lim)
1452               goto stop;
1453
1454             while (this_len > 0)
1455               {
1456                 int pat_ch = *p++;
1457                 int buf_ch = FETCH_BYTE (this_pos);
1458                 TRANSLATE (buf_ch, trt, buf_ch);
1459
1460                 if (buf_ch != pat_ch)
1461                   break;
1462                 this_len--;
1463                 this_pos++;
1464               }
1465
1466             if (this_len == 0)
1467               {
1468                 pos -= len;
1469                 break;
1470               }
1471
1472             pos--;
1473           }
1474
1475         n++;
1476       }
1477
1478  stop:
1479   if (n == 0)
1480     {
1481       if (forward)
1482         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1483       else
1484         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1485
1486       return pos;
1487     }
1488   else if (n > 0)
1489     return -n;
1490   else
1491     return n;
1492 }
1493 \f
1494 /* Do Boyer-Moore search N times for the string PAT,
1495    whose length is LEN/LEN_BYTE,
1496    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1497    DIRECTION says which direction we search in.
1498    TRT and INVERSE_TRT are translation tables.
1499
1500    This kind of search works if all the characters in PAT that have
1501    nontrivial translation are the same aside from the last byte.  This
1502    makes it possible to translate just the last byte of a character,
1503    and do so after just a simple test of the context.
1504
1505    If that criterion is not satisfied, do not call this function.  */
1506
1507 static int
1508 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1509              pos, pos_byte, lim, lim_byte, charset_base)
1510      int n;
1511      unsigned char *base_pat;
1512      int len, len_byte;
1513      Lisp_Object trt;
1514      Lisp_Object inverse_trt;
1515      int pos, pos_byte;
1516      int lim, lim_byte;
1517      int charset_base;
1518 {
1519   int direction = ((n > 0) ? 1 : -1);
1520   register int dirlen;
1521   int infinity, limit, stride_for_teases = 0;
1522   register int *BM_tab;
1523   int *BM_tab_base;
1524   register unsigned char *cursor, *p_limit;
1525   register int i, j;
1526   unsigned char *pat, *pat_end;
1527   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1528
1529   unsigned char simple_translate[0400];
1530   int translate_prev_byte = 0;
1531   int translate_anteprev_byte = 0;
1532
1533 #ifdef C_ALLOCA
1534   int BM_tab_space[0400];
1535   BM_tab = &BM_tab_space[0];
1536 #else
1537   BM_tab = (int *) alloca (0400 * sizeof (int));
1538 #endif
1539   /* The general approach is that we are going to maintain that we know */
1540   /* the first (closest to the present position, in whatever direction */
1541   /* we're searching) character that could possibly be the last */
1542   /* (furthest from present position) character of a valid match.  We */
1543   /* advance the state of our knowledge by looking at that character */
1544   /* and seeing whether it indeed matches the last character of the */
1545   /* pattern.  If it does, we take a closer look.  If it does not, we */
1546   /* move our pointer (to putative last characters) as far as is */
1547   /* logically possible.  This amount of movement, which I call a */
1548   /* stride, will be the length of the pattern if the actual character */
1549   /* appears nowhere in the pattern, otherwise it will be the distance */
1550   /* from the last occurrence of that character to the end of the */
1551   /* pattern. */
1552   /* As a coding trick, an enormous stride is coded into the table for */
1553   /* characters that match the last character.  This allows use of only */
1554   /* a single test, a test for having gone past the end of the */
1555   /* permissible match region, to test for both possible matches (when */
1556   /* the stride goes past the end immediately) and failure to */
1557   /* match (where you get nudged past the end one stride at a time). */
1558
1559   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1560   /* is determined only by the last character of the putative match. */
1561   /* If that character does not match, we will stride the proper */
1562   /* distance to propose a match that superimposes it on the last */
1563   /* instance of a character that matches it (per trt), or misses */
1564   /* it entirely if there is none. */
1565
1566   dirlen = len_byte * direction;
1567   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1568
1569   /* Record position after the end of the pattern.  */
1570   pat_end = base_pat + len_byte;
1571   /* BASE_PAT points to a character that we start scanning from.
1572      It is the first character in a forward search,
1573      the last character in a backward search.  */
1574   if (direction < 0)
1575     base_pat = pat_end - 1;
1576
1577   BM_tab_base = BM_tab;
1578   BM_tab += 0400;
1579   j = dirlen;           /* to get it in a register */
1580   /* A character that does not appear in the pattern induces a */
1581   /* stride equal to the pattern length. */
1582   while (BM_tab_base != BM_tab)
1583     {
1584       *--BM_tab = j;
1585       *--BM_tab = j;
1586       *--BM_tab = j;
1587       *--BM_tab = j;
1588     }
1589
1590   /* We use this for translation, instead of TRT itself.
1591      We fill this in to handle the characters that actually
1592      occur in the pattern.  Others don't matter anyway!  */
1593   bzero (simple_translate, sizeof simple_translate);
1594   for (i = 0; i < 0400; i++)
1595     simple_translate[i] = i;
1596
1597   i = 0;
1598   while (i != infinity)
1599     {
1600       unsigned char *ptr = base_pat + i;
1601       i += direction;
1602       if (i == dirlen)
1603         i = infinity;
1604       if (! NILP (trt))
1605         {
1606           int ch;
1607           int untranslated;
1608           int this_translated = 1;
1609
1610           if (multibyte
1611               /* Is *PTR the last byte of a character?  */
1612               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1613             {
1614               unsigned char *charstart = ptr;
1615               while (! CHAR_HEAD_P (*charstart))
1616                 charstart--;
1617               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1618               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1619                 {
1620                   TRANSLATE (ch, trt, untranslated);
1621                   if (! CHAR_HEAD_P (*ptr))
1622                     {
1623                       translate_prev_byte = ptr[-1];
1624                       if (! CHAR_HEAD_P (translate_prev_byte))
1625                         translate_anteprev_byte = ptr[-2];
1626                     }
1627                 }
1628               else
1629                 {
1630                   this_translated = 0;
1631                   ch = *ptr;
1632                 }
1633             }
1634           else if (!multibyte)
1635             TRANSLATE (ch, trt, *ptr);
1636           else
1637             {
1638               ch = *ptr;
1639               this_translated = 0;
1640             }
1641
1642           if (ch > 0400)
1643             j = ((unsigned char) ch) | 0200;
1644           else
1645             j = (unsigned char) ch;
1646
1647           if (i == infinity)
1648             stride_for_teases = BM_tab[j];
1649
1650           BM_tab[j] = dirlen - i;
1651           /* A translation table is accompanied by its inverse -- see */
1652           /* comment following downcase_table for details */
1653           if (this_translated)
1654             {
1655               int starting_ch = ch;
1656               int starting_j = j;
1657               while (1)
1658                 {
1659                   TRANSLATE (ch, inverse_trt, ch);
1660                   if (ch > 0400)
1661                     j = ((unsigned char) ch) | 0200;
1662                   else
1663                     j = (unsigned char) ch;
1664
1665                   /* For all the characters that map into CH,
1666                      set up simple_translate to map the last byte
1667                      into STARTING_J.  */
1668                   simple_translate[j] = starting_j;
1669                   if (ch == starting_ch)
1670                     break;
1671                   BM_tab[j] = dirlen - i;
1672                 }
1673             }
1674         }
1675       else
1676         {
1677           j = *ptr;
1678
1679           if (i == infinity)
1680             stride_for_teases = BM_tab[j];
1681           BM_tab[j] = dirlen - i;
1682         }
1683       /* stride_for_teases tells how much to stride if we get a */
1684       /* match on the far character but are subsequently */
1685       /* disappointed, by recording what the stride would have been */
1686       /* for that character if the last character had been */
1687       /* different. */
1688     }
1689   infinity = dirlen - infinity;
1690   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1691   /* loop invariant - POS_BYTE points at where last char (first
1692      char if reverse) of pattern would align in a possible match.  */
1693   while (n != 0)
1694     {
1695       int tail_end;
1696       unsigned char *tail_end_ptr;
1697
1698       /* It's been reported that some (broken) compiler thinks that
1699          Boolean expressions in an arithmetic context are unsigned.
1700          Using an explicit ?1:0 prevents this.  */
1701       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1702           < 0)
1703         return (n * (0 - direction));
1704       /* First we do the part we can by pointers (maybe nothing) */
1705       QUIT;
1706       pat = base_pat;
1707       limit = pos_byte - dirlen + direction;
1708       if (direction > 0)
1709         {
1710           limit = BUFFER_CEILING_OF (limit);
1711           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1712              can take on without hitting edge of buffer or the gap.  */
1713           limit = min (limit, pos_byte + 20000);
1714           limit = min (limit, lim_byte - 1);
1715         }
1716       else
1717         {
1718           limit = BUFFER_FLOOR_OF (limit);
1719           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1720              can take on without hitting edge of buffer or the gap.  */
1721           limit = max (limit, pos_byte - 20000);
1722           limit = max (limit, lim_byte);
1723         }
1724       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1725       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1726
1727       if ((limit - pos_byte) * direction > 20)
1728         {
1729           unsigned char *p2;
1730
1731           p_limit = BYTE_POS_ADDR (limit);
1732           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1733           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1734           while (1)             /* use one cursor setting as long as i can */
1735             {
1736               if (direction > 0) /* worth duplicating */
1737                 {
1738                   /* Use signed comparison if appropriate
1739                      to make cursor+infinity sure to be > p_limit.
1740                      Assuming that the buffer lies in a range of addresses
1741                      that are all "positive" (as ints) or all "negative",
1742                      either kind of comparison will work as long
1743                      as we don't step by infinity.  So pick the kind
1744                      that works when we do step by infinity.  */
1745                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1746                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1747                       cursor += BM_tab[*cursor];
1748                   else
1749                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1750                       cursor += BM_tab[*cursor];
1751                 }
1752               else
1753                 {
1754                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1755                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1756                       cursor += BM_tab[*cursor];
1757                   else
1758                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1759                       cursor += BM_tab[*cursor];
1760                 }
1761 /* If you are here, cursor is beyond the end of the searched region. */
1762 /* This can happen if you match on the far character of the pattern, */
1763 /* because the "stride" of that character is infinity, a number able */
1764 /* to throw you well beyond the end of the search.  It can also */
1765 /* happen if you fail to match within the permitted region and would */
1766 /* otherwise try a character beyond that region */
1767               if ((cursor - p_limit) * direction <= len_byte)
1768                 break;  /* a small overrun is genuine */
1769               cursor -= infinity; /* large overrun = hit */
1770               i = dirlen - direction;
1771               if (! NILP (trt))
1772                 {
1773                   while ((i -= direction) + direction != 0)
1774                     {
1775                       int ch;
1776                       cursor -= direction;
1777                       /* Translate only the last byte of a character.  */
1778                       if (! multibyte
1779                           || ((cursor == tail_end_ptr
1780                                || CHAR_HEAD_P (cursor[1]))
1781                               && (CHAR_HEAD_P (cursor[0])
1782                                   || (translate_prev_byte == cursor[-1]
1783                                       && (CHAR_HEAD_P (translate_prev_byte)
1784                                           || translate_anteprev_byte == cursor[-2])))))
1785                         ch = simple_translate[*cursor];
1786                       else
1787                         ch = *cursor;
1788                       if (pat[i] != ch)
1789                         break;
1790                     }
1791                 }
1792               else
1793                 {
1794                   while ((i -= direction) + direction != 0)
1795                     {
1796                       cursor -= direction;
1797                       if (pat[i] != *cursor)
1798                         break;
1799                     }
1800                 }
1801               cursor += dirlen - i - direction; /* fix cursor */
1802               if (i + direction == 0)
1803                 {
1804                   int position;
1805
1806                   cursor -= direction;
1807
1808                   position = pos_byte + cursor - p2 + ((direction > 0)
1809                                                        ? 1 - len_byte : 0);
1810                   set_search_regs (position, len_byte);
1811
1812                   if ((n -= direction) != 0)
1813                     cursor += dirlen; /* to resume search */
1814                   else
1815                     return ((direction > 0)
1816                             ? search_regs.end[0] : search_regs.start[0]);
1817                 }
1818               else
1819                 cursor += stride_for_teases; /* <sigh> we lose -  */
1820             }
1821           pos_byte += cursor - p2;
1822         }
1823       else
1824         /* Now we'll pick up a clump that has to be done the hard */
1825         /* way because it covers a discontinuity */
1826         {
1827           limit = ((direction > 0)
1828                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1829                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1830           limit = ((direction > 0)
1831                    ? min (limit + len_byte, lim_byte - 1)
1832                    : max (limit - len_byte, lim_byte));
1833           /* LIMIT is now the last value POS_BYTE can have
1834              and still be valid for a possible match.  */
1835           while (1)
1836             {
1837               /* This loop can be coded for space rather than */
1838               /* speed because it will usually run only once. */
1839               /* (the reach is at most len + 21, and typically */
1840               /* does not exceed len) */
1841               while ((limit - pos_byte) * direction >= 0)
1842                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1843               /* now run the same tests to distinguish going off the */
1844               /* end, a match or a phony match. */
1845               if ((pos_byte - limit) * direction <= len_byte)
1846                 break;  /* ran off the end */
1847               /* Found what might be a match.
1848                  Set POS_BYTE back to last (first if reverse) pos.  */
1849               pos_byte -= infinity;
1850               i = dirlen - direction;
1851               while ((i -= direction) + direction != 0)
1852                 {
1853                   int ch;
1854                   unsigned char *ptr;
1855                   pos_byte -= direction;
1856                   ptr = BYTE_POS_ADDR (pos_byte);
1857                   /* Translate only the last byte of a character.  */
1858                   if (! multibyte
1859                       || ((ptr == tail_end_ptr
1860                            || CHAR_HEAD_P (ptr[1]))
1861                           && (CHAR_HEAD_P (ptr[0])
1862                               || (translate_prev_byte == ptr[-1]
1863                                   && (CHAR_HEAD_P (translate_prev_byte)
1864                                       || translate_anteprev_byte == ptr[-2])))))
1865                     ch = simple_translate[*ptr];
1866                   else
1867                     ch = *ptr;
1868                   if (pat[i] != ch)
1869                     break;
1870                 }
1871               /* Above loop has moved POS_BYTE part or all the way
1872                  back to the first pos (last pos if reverse).
1873                  Set it once again at the last (first if reverse) char.  */
1874               pos_byte += dirlen - i- direction;
1875               if (i + direction == 0)
1876                 {
1877                   int position;
1878                   pos_byte -= direction;
1879
1880                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1881
1882                   set_search_regs (position, len_byte);
1883
1884                   if ((n -= direction) != 0)
1885                     pos_byte += dirlen; /* to resume search */
1886                   else
1887                     return ((direction > 0)
1888                             ? search_regs.end[0] : search_regs.start[0]);
1889                 }
1890               else
1891                 pos_byte += stride_for_teases;
1892             }
1893           }
1894       /* We have done one clump.  Can we continue? */
1895       if ((lim_byte - pos_byte) * direction < 0)
1896         return ((0 - n) * direction);
1897     }
1898   return BYTE_TO_CHAR (pos_byte);
1899 }
1900
1901 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1902    for the overall match just found in the current buffer.
1903    Also clear out the match data for registers 1 and up.  */
1904
1905 static void
1906 set_search_regs (beg_byte, nbytes)
1907      int beg_byte, nbytes;
1908 {
1909   int i;
1910
1911   /* Make sure we have registers in which to store
1912      the match position.  */
1913   if (search_regs.num_regs == 0)
1914     {
1915       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1916       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1917       search_regs.num_regs = 2;
1918     }
1919
1920   /* Clear out the other registers.  */
1921   for (i = 1; i < search_regs.num_regs; i++)
1922     {
1923       search_regs.start[i] = -1;
1924       search_regs.end[i] = -1;
1925     }
1926
1927   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1928   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1929   XSETBUFFER (last_thing_searched, current_buffer);
1930 }
1931 \f
1932 /* Given a string of words separated by word delimiters,
1933   compute a regexp that matches those exact words
1934   separated by arbitrary punctuation.  */
1935
1936 static Lisp_Object
1937 wordify (string)
1938      Lisp_Object string;
1939 {
1940   register unsigned char *p, *o;
1941   register int i, i_byte, len, punct_count = 0, word_count = 0;
1942   Lisp_Object val;
1943   int prev_c = 0;
1944   int adjust;
1945
1946   CHECK_STRING (string);
1947   p = XSTRING (string)->data;
1948   len = XSTRING (string)->size;
1949
1950   for (i = 0, i_byte = 0; i < len; )
1951     {
1952       int c;
1953
1954       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1955
1956       if (SYNTAX (c) != Sword)
1957         {
1958           punct_count++;
1959           if (i > 0 && SYNTAX (prev_c) == Sword)
1960             word_count++;
1961         }
1962
1963       prev_c = c;
1964     }
1965
1966   if (SYNTAX (prev_c) == Sword)
1967     word_count++;
1968   if (!word_count)
1969     return empty_string;
1970
1971   adjust = - punct_count + 5 * (word_count - 1) + 4;
1972   if (STRING_MULTIBYTE (string))
1973     val = make_uninit_multibyte_string (len + adjust,
1974                                         STRING_BYTES (XSTRING (string))
1975                                         + adjust);
1976   else
1977     val = make_uninit_string (len + adjust);
1978
1979   o = XSTRING (val)->data;
1980   *o++ = '\\';
1981   *o++ = 'b';
1982   prev_c = 0;
1983
1984   for (i = 0, i_byte = 0; i < len; )
1985     {
1986       int c;
1987       int i_byte_orig = i_byte;
1988
1989       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1990
1991       if (SYNTAX (c) == Sword)
1992         {
1993           bcopy (&XSTRING (string)->data[i_byte_orig], o,
1994                  i_byte - i_byte_orig);
1995           o += i_byte - i_byte_orig;
1996         }
1997       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1998         {
1999           *o++ = '\\';
2000           *o++ = 'W';
2001           *o++ = '\\';
2002           *o++ = 'W';
2003           *o++ = '*';
2004         }
2005
2006       prev_c = c;
2007     }
2008
2009   *o++ = '\\';
2010   *o++ = 'b';
2011
2012   return val;
2013 }
2014 \f
2015 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2016        "MSearch backward: ",
2017        doc: /* Search backward from point for STRING.
2018 Set point to the beginning of the occurrence found, and return point.
2019 An optional second argument bounds the search; it is a buffer position.
2020 The match found must not extend before that position.
2021 Optional third argument, if t, means if fail just return nil (no error).
2022  If not nil and not t, position at limit of search and return nil.
2023 Optional fourth argument is repeat count--search for successive occurrences.
2024
2025 Search case-sensitivity is determined by the value of the variable
2026 `case-fold-search', which see.
2027
2028 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2029      (string, bound, noerror, count)
2030      Lisp_Object string, bound, noerror, count;
2031 {
2032   return search_command (string, bound, noerror, count, -1, 0, 0);
2033 }
2034
2035 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2036        doc: /* Search forward from point for STRING.
2037 Set point to the end of the occurrence found, and return point.
2038 An optional second argument bounds the search; it is a buffer position.
2039 The match found must not extend after that position.  nil is equivalent
2040   to (point-max).
2041 Optional third argument, if t, means if fail just return nil (no error).
2042   If not nil and not t, move to limit of search and return nil.
2043 Optional fourth argument is repeat count--search for successive occurrences.
2044
2045 Search case-sensitivity is determined by the value of the variable
2046 `case-fold-search', which see.
2047
2048 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2049      (string, bound, noerror, count)
2050      Lisp_Object string, bound, noerror, count;
2051 {
2052   return search_command (string, bound, noerror, count, 1, 0, 0);
2053 }
2054
2055 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2056        "sWord search backward: ",
2057        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2058 Set point to the beginning of the occurrence found, and return point.
2059 An optional second argument bounds the search; it is a buffer position.
2060 The match found must not extend before that position.
2061 Optional third argument, if t, means if fail just return nil (no error).
2062   If not nil and not t, move to limit of search and return nil.
2063 Optional fourth argument is repeat count--search for successive occurrences.  */)
2064      (string, bound, noerror, count)
2065      Lisp_Object string, bound, noerror, count;
2066 {
2067   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2068 }
2069
2070 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2071        "sWord search: ",
2072        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2073 Set point to the end of the occurrence found, and return point.
2074 An optional second argument bounds the search; it is a buffer position.
2075 The match found must not extend after that position.
2076 Optional third argument, if t, means if fail just return nil (no error).
2077   If not nil and not t, move to limit of search and return nil.
2078 Optional fourth argument is repeat count--search for successive occurrences.  */)
2079      (string, bound, noerror, count)
2080      Lisp_Object string, bound, noerror, count;
2081 {
2082   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2083 }
2084
2085 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2086        "sRE search backward: ",
2087        doc: /* Search backward from point for match for regular expression REGEXP.
2088 Set point to the beginning of the match, and return point.
2089 The match found is the one starting last in the buffer
2090 and yet ending before the origin of the search.
2091 An optional second argument bounds the search; it is a buffer position.
2092 The match found must start at or after that position.
2093 Optional third argument, if t, means if fail just return nil (no error).
2094   If not nil and not t, move to limit of search and return nil.
2095 Optional fourth argument is repeat count--search for successive occurrences.
2096 See also the functions `match-beginning', `match-end', `match-string',
2097 and `replace-match'.  */)
2098      (regexp, bound, noerror, count)
2099      Lisp_Object regexp, bound, noerror, count;
2100 {
2101   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2102 }
2103
2104 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2105        "sRE search: ",
2106        doc: /* Search forward from point for regular expression REGEXP.
2107 Set point to the end of the occurrence found, and return point.
2108 An optional second argument bounds the search; it is a buffer position.
2109 The match found must not extend after that position.
2110 Optional third argument, if t, means if fail just return nil (no error).
2111   If not nil and not t, move to limit of search and return nil.
2112 Optional fourth argument is repeat count--search for successive occurrences.
2113 See also the functions `match-beginning', `match-end', `match-string',
2114 and `replace-match'.  */)
2115      (regexp, bound, noerror, count)
2116      Lisp_Object regexp, bound, noerror, count;
2117 {
2118   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2119 }
2120
2121 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2122        "sPosix search backward: ",
2123        doc: /* Search backward from point for match for regular expression REGEXP.
2124 Find the longest match in accord with Posix regular expression rules.
2125 Set point to the beginning of the match, and return point.
2126 The match found is the one starting last in the buffer
2127 and yet ending before the origin of the search.
2128 An optional second argument bounds the search; it is a buffer position.
2129 The match found must start at or after that position.
2130 Optional third argument, if t, means if fail just return nil (no error).
2131   If not nil and not t, move to limit of search and return nil.
2132 Optional fourth argument is repeat count--search for successive occurrences.
2133 See also the functions `match-beginning', `match-end', `match-string',
2134 and `replace-match'.  */)
2135      (regexp, bound, noerror, count)
2136      Lisp_Object regexp, bound, noerror, count;
2137 {
2138   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2139 }
2140
2141 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2142        "sPosix search: ",
2143        doc: /* Search forward from point for regular expression REGEXP.
2144 Find the longest match in accord with Posix regular expression rules.
2145 Set point to the end of the occurrence found, and return point.
2146 An optional second argument bounds the search; it is a buffer position.
2147 The match found must not extend after that position.
2148 Optional third argument, if t, means if fail just return nil (no error).
2149   If not nil and not t, move to limit of search and return nil.
2150 Optional fourth argument is repeat count--search for successive occurrences.
2151 See also the functions `match-beginning', `match-end', `match-string',
2152 and `replace-match'.  */)
2153      (regexp, bound, noerror, count)
2154      Lisp_Object regexp, bound, noerror, count;
2155 {
2156   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2157 }
2158 \f
2159 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2160        doc: /* Replace text matched by last search with NEWTEXT.
2161 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2162 Otherwise maybe capitalize the whole text, or maybe just word initials,
2163 based on the replaced text.
2164 If the replaced text has only capital letters
2165 and has at least one multiletter word, convert NEWTEXT to all caps.
2166 If the replaced text has at least one word starting with a capital letter,
2167 then capitalize each word in NEWTEXT.
2168
2169 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2170 Otherwise treat `\\' as special:
2171   `\\&' in NEWTEXT means substitute original matched text.
2172   `\\N' means substitute what matched the Nth `\\(...\\)'.
2173        If Nth parens didn't match, substitute nothing.
2174   `\\\\' means insert one `\\'.
2175 FIXEDCASE and LITERAL are optional arguments.
2176 Leaves point at end of replacement text.
2177
2178 The optional fourth argument STRING can be a string to modify.
2179 This is meaningful when the previous match was done against STRING,
2180 using `string-match'.  When used this way, `replace-match'
2181 creates and returns a new string made by copying STRING and replacing
2182 the part of STRING that was matched.
2183
2184 The optional fifth argument SUBEXP specifies a subexpression;
2185 it says to replace just that subexpression with NEWTEXT,
2186 rather than replacing the entire matched text.
2187 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2188 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2189 NEWTEXT in place of subexp N.
2190 This is useful only after a regular expression search or match,
2191 since only regular expressions have distinguished subexpressions.  */)
2192      (newtext, fixedcase, literal, string, subexp)
2193      Lisp_Object newtext, fixedcase, literal, string, subexp;
2194 {
2195   enum { nochange, all_caps, cap_initial } case_action;
2196   register int pos, pos_byte;
2197   int some_multiletter_word;
2198   int some_lowercase;
2199   int some_uppercase;
2200   int some_nonuppercase_initial;
2201   register int c, prevc;
2202   int sub;
2203   int opoint, newpoint;
2204
2205   CHECK_STRING (newtext);
2206
2207   if (! NILP (string))
2208     CHECK_STRING (string);
2209
2210   case_action = nochange;       /* We tried an initialization */
2211                                 /* but some C compilers blew it */
2212
2213   if (search_regs.num_regs <= 0)
2214     error ("replace-match called before any match found");
2215
2216   if (NILP (subexp))
2217     sub = 0;
2218   else
2219     {
2220       CHECK_NUMBER (subexp);
2221       sub = XINT (subexp);
2222       if (sub < 0 || sub >= search_regs.num_regs)
2223         args_out_of_range (subexp, make_number (search_regs.num_regs));
2224     }
2225
2226   if (NILP (string))
2227     {
2228       if (search_regs.start[sub] < BEGV
2229           || search_regs.start[sub] > search_regs.end[sub]
2230           || search_regs.end[sub] > ZV)
2231         args_out_of_range (make_number (search_regs.start[sub]),
2232                            make_number (search_regs.end[sub]));
2233     }
2234   else
2235     {
2236       if (search_regs.start[sub] < 0
2237           || search_regs.start[sub] > search_regs.end[sub]
2238           || search_regs.end[sub] > XSTRING (string)->size)
2239         args_out_of_range (make_number (search_regs.start[sub]),
2240                            make_number (search_regs.end[sub]));
2241     }
2242
2243   if (NILP (fixedcase))
2244     {
2245       /* Decide how to casify by examining the matched text. */
2246       int last;
2247
2248       pos = search_regs.start[sub];
2249       last = search_regs.end[sub];
2250
2251       if (NILP (string))
2252         pos_byte = CHAR_TO_BYTE (pos);
2253       else
2254         pos_byte = string_char_to_byte (string, pos);
2255
2256       prevc = '\n';
2257       case_action = all_caps;
2258
2259       /* some_multiletter_word is set nonzero if any original word
2260          is more than one letter long. */
2261       some_multiletter_word = 0;
2262       some_lowercase = 0;
2263       some_nonuppercase_initial = 0;
2264       some_uppercase = 0;
2265
2266       while (pos < last)
2267         {
2268           if (NILP (string))
2269             {
2270               c = FETCH_CHAR (pos_byte);
2271               INC_BOTH (pos, pos_byte);
2272             }
2273           else
2274             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2275
2276           if (LOWERCASEP (c))
2277             {
2278               /* Cannot be all caps if any original char is lower case */
2279
2280               some_lowercase = 1;
2281               if (SYNTAX (prevc) != Sword)
2282                 some_nonuppercase_initial = 1;
2283               else
2284                 some_multiletter_word = 1;
2285             }
2286           else if (!NOCASEP (c))
2287             {
2288               some_uppercase = 1;
2289               if (SYNTAX (prevc) != Sword)
2290                 ;
2291               else
2292                 some_multiletter_word = 1;
2293             }
2294           else
2295             {
2296               /* If the initial is a caseless word constituent,
2297                  treat that like a lowercase initial.  */
2298               if (SYNTAX (prevc) != Sword)
2299                 some_nonuppercase_initial = 1;
2300             }
2301
2302           prevc = c;
2303         }
2304
2305       /* Convert to all caps if the old text is all caps
2306          and has at least one multiletter word.  */
2307       if (! some_lowercase && some_multiletter_word)
2308         case_action = all_caps;
2309       /* Capitalize each word, if the old text has all capitalized words.  */
2310       else if (!some_nonuppercase_initial && some_multiletter_word)
2311         case_action = cap_initial;
2312       else if (!some_nonuppercase_initial && some_uppercase)
2313         /* Should x -> yz, operating on X, give Yz or YZ?
2314            We'll assume the latter.  */
2315         case_action = all_caps;
2316       else
2317         case_action = nochange;
2318     }
2319
2320   /* Do replacement in a string.  */
2321   if (!NILP (string))
2322     {
2323       Lisp_Object before, after;
2324
2325       before = Fsubstring (string, make_number (0),
2326                            make_number (search_regs.start[sub]));
2327       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2328
2329       /* Substitute parts of the match into NEWTEXT
2330          if desired.  */
2331       if (NILP (literal))
2332         {
2333           int lastpos = 0;
2334           int lastpos_byte = 0;
2335           /* We build up the substituted string in ACCUM.  */
2336           Lisp_Object accum;
2337           Lisp_Object middle;
2338           int length = STRING_BYTES (XSTRING (newtext));
2339
2340           accum = Qnil;
2341
2342           for (pos_byte = 0, pos = 0; pos_byte < length;)
2343             {
2344               int substart = -1;
2345               int subend = 0;
2346               int delbackslash = 0;
2347
2348               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2349
2350               if (c == '\\')
2351                 {
2352                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2353
2354                   if (c == '&')
2355                     {
2356                       substart = search_regs.start[sub];
2357                       subend = search_regs.end[sub];
2358                     }
2359                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2360                     {
2361                       if (search_regs.start[c - '0'] >= 0)
2362                         {
2363                           substart = search_regs.start[c - '0'];
2364                           subend = search_regs.end[c - '0'];
2365                         }
2366                     }
2367                   else if (c == '\\')
2368                     delbackslash = 1;
2369                   else
2370                     error ("Invalid use of `\\' in replacement text");
2371                 }
2372               if (substart >= 0)
2373                 {
2374                   if (pos - 2 != lastpos)
2375                     middle = substring_both (newtext, lastpos,
2376                                              lastpos_byte,
2377                                              pos - 2, pos_byte - 2);
2378                   else
2379                     middle = Qnil;
2380                   accum = concat3 (accum, middle,
2381                                    Fsubstring (string,
2382                                                make_number (substart),
2383                                                make_number (subend)));
2384                   lastpos = pos;
2385                   lastpos_byte = pos_byte;
2386                 }
2387               else if (delbackslash)
2388                 {
2389                   middle = substring_both (newtext, lastpos,
2390                                            lastpos_byte,
2391                                            pos - 1, pos_byte - 1);
2392
2393                   accum = concat2 (accum, middle);
2394                   lastpos = pos;
2395                   lastpos_byte = pos_byte;
2396                 }
2397             }
2398
2399           if (pos != lastpos)
2400             middle = substring_both (newtext, lastpos,
2401                                      lastpos_byte,
2402                                      pos, pos_byte);
2403           else
2404             middle = Qnil;
2405
2406           newtext = concat2 (accum, middle);
2407         }
2408
2409       /* Do case substitution in NEWTEXT if desired.  */
2410       if (case_action == all_caps)
2411         newtext = Fupcase (newtext);
2412       else if (case_action == cap_initial)
2413         newtext = Fupcase_initials (newtext);
2414
2415       return concat3 (before, newtext, after);
2416     }
2417
2418   /* Record point, then move (quietly) to the start of the match.  */
2419   if (PT >= search_regs.end[sub])
2420     opoint = PT - ZV;
2421   else if (PT > search_regs.start[sub])
2422     opoint = search_regs.end[sub] - ZV;
2423   else
2424     opoint = PT;
2425
2426   /* If we want non-literal replacement,
2427      perform substitution on the replacement string.  */
2428   if (NILP (literal))
2429     {
2430       int length = STRING_BYTES (XSTRING (newtext));
2431       unsigned char *substed;
2432       int substed_alloc_size, substed_len;
2433       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2434       int str_multibyte = STRING_MULTIBYTE (newtext);
2435       Lisp_Object rev_tbl;
2436       int really_changed = 0;
2437
2438       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2439                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2440                                           make_number (0))
2441                 : Qnil);
2442
2443       substed_alloc_size = length * 2 + 100;
2444       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2445       substed_len = 0;
2446
2447       /* Go thru NEWTEXT, producing the actual text to insert in
2448          SUBSTED while adjusting multibyteness to that of the current
2449          buffer.  */
2450
2451       for (pos_byte = 0, pos = 0; pos_byte < length;)
2452         {
2453           unsigned char str[MAX_MULTIBYTE_LENGTH];
2454           unsigned char *add_stuff = NULL;
2455           int add_len = 0;
2456           int idx = -1;
2457
2458           if (str_multibyte)
2459             {
2460               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2461               if (!buf_multibyte)
2462                 c = multibyte_char_to_unibyte (c, rev_tbl);
2463             }
2464           else
2465             {
2466               /* Note that we don't have to increment POS.  */
2467               c = XSTRING (newtext)->data[pos_byte++];
2468               if (buf_multibyte)
2469                 c = unibyte_char_to_multibyte (c);
2470             }
2471
2472           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2473              or set IDX to a match index, which means put that part
2474              of the buffer text into SUBSTED.  */
2475
2476           if (c == '\\')
2477             {
2478               really_changed = 1;
2479
2480               if (str_multibyte)
2481                 {
2482                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2483                                                       pos, pos_byte);
2484                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2485                     c = multibyte_char_to_unibyte (c, rev_tbl);
2486                 }
2487               else
2488                 {
2489                   c = XSTRING (newtext)->data[pos_byte++];
2490                   if (buf_multibyte)
2491                     c = unibyte_char_to_multibyte (c);
2492                 }
2493
2494               if (c == '&')
2495                 idx = sub;
2496               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2497                 {
2498                   if (search_regs.start[c - '0'] >= 1)
2499                     idx = c - '0';
2500                 }
2501               else if (c == '\\')
2502                 add_len = 1, add_stuff = "\\";
2503               else
2504                 {
2505                   xfree (substed);
2506                   error ("Invalid use of `\\' in replacement text");
2507                 }
2508             }
2509           else
2510             {
2511               add_len = CHAR_STRING (c, str);
2512               add_stuff = str;
2513             }
2514
2515           /* If we want to copy part of a previous match,
2516              set up ADD_STUFF and ADD_LEN to point to it.  */
2517           if (idx >= 0)
2518             {
2519               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2520               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2521               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2522                 move_gap (search_regs.start[idx]);
2523               add_stuff = BYTE_POS_ADDR (begbyte);
2524             }
2525
2526           /* Now the stuff we want to add to SUBSTED
2527              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2528
2529           /* Make sure SUBSTED is big enough.  */
2530           if (substed_len + add_len >= substed_alloc_size)
2531             {
2532               substed_alloc_size = substed_len + add_len + 500;
2533               substed = (unsigned char *) xrealloc (substed,
2534                                                     substed_alloc_size + 1);
2535             }
2536
2537           /* Now add to the end of SUBSTED.  */
2538           if (add_stuff)
2539             {
2540               bcopy (add_stuff, substed + substed_len, add_len);
2541               substed_len += add_len;
2542             }
2543         }
2544
2545       if (really_changed)
2546         newtext = make_string (substed, substed_len);
2547
2548       xfree (substed);
2549     }
2550
2551   /* Replace the old text with the new in the cleanest possible way.  */
2552   replace_range (search_regs.start[sub], search_regs.end[sub],
2553                  newtext, 1, 0, 1);
2554   newpoint = search_regs.start[sub] + XSTRING (newtext)->size;
2555
2556   if (case_action == all_caps)
2557     Fupcase_region (make_number (search_regs.start[sub]),
2558                     make_number (newpoint));
2559   else if (case_action == cap_initial)
2560     Fupcase_initials_region (make_number (search_regs.start[sub]),
2561                              make_number (newpoint));
2562
2563   /* Put point back where it was in the text.  */
2564   if (opoint <= 0)
2565     TEMP_SET_PT (opoint + ZV);
2566   else
2567     TEMP_SET_PT (opoint);
2568
2569   /* Now move point "officially" to the start of the inserted replacement.  */
2570   move_if_not_intangible (newpoint);
2571
2572   return Qnil;
2573 }
2574 \f
2575 static Lisp_Object
2576 match_limit (num, beginningp)
2577      Lisp_Object num;
2578      int beginningp;
2579 {
2580   register int n;
2581
2582   CHECK_NUMBER (num);
2583   n = XINT (num);
2584   if (n < 0 || n >= search_regs.num_regs)
2585     args_out_of_range (num, make_number (search_regs.num_regs));
2586   if (search_regs.num_regs <= 0
2587       || search_regs.start[n] < 0)
2588     return Qnil;
2589   return (make_number ((beginningp) ? search_regs.start[n]
2590                                     : search_regs.end[n]));
2591 }
2592
2593 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2594        doc: /* Return position of start of text matched by last search.
2595 SUBEXP, a number, specifies which parenthesized expression in the last
2596   regexp.
2597 Value is nil if SUBEXPth pair didn't match, or there were less than
2598   SUBEXP pairs.
2599 Zero means the entire text matched by the whole regexp or whole string.  */)
2600      (subexp)
2601      Lisp_Object subexp;
2602 {
2603   return match_limit (subexp, 1);
2604 }
2605
2606 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2607        doc: /* Return position of end of text matched by last search.
2608 SUBEXP, a number, specifies which parenthesized expression in the last
2609   regexp.
2610 Value is nil if SUBEXPth pair didn't match, or there were less than
2611   SUBEXP pairs.
2612 Zero means the entire text matched by the whole regexp or whole string.  */)
2613      (subexp)
2614      Lisp_Object subexp;
2615 {
2616   return match_limit (subexp, 0);
2617 }
2618
2619 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2620        doc: /* Return a list containing all info on what the last search matched.
2621 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2622 All the elements are markers or nil (nil if the Nth pair didn't match)
2623 if the last match was on a buffer; integers or nil if a string was matched.
2624 Use `store-match-data' to reinstate the data in this list.
2625
2626 If INTEGERS (the optional first argument) is non-nil, always use integers
2627 \(rather than markers) to represent buffer positions.
2628 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2629 to hold all the values, and if INTEGERS is non-nil, no consing is done.  */)
2630      (integers, reuse)
2631      Lisp_Object integers, reuse;
2632 {
2633   Lisp_Object tail, prev;
2634   Lisp_Object *data;
2635   int i, len;
2636
2637   if (NILP (last_thing_searched))
2638     return Qnil;
2639
2640   prev = Qnil;
2641
2642   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2643                                  * sizeof (Lisp_Object));
2644
2645   len = -1;
2646   for (i = 0; i < search_regs.num_regs; i++)
2647     {
2648       int start = search_regs.start[i];
2649       if (start >= 0)
2650         {
2651           if (EQ (last_thing_searched, Qt)
2652               || ! NILP (integers))
2653             {
2654               XSETFASTINT (data[2 * i], start);
2655               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2656             }
2657           else if (BUFFERP (last_thing_searched))
2658             {
2659               data[2 * i] = Fmake_marker ();
2660               Fset_marker (data[2 * i],
2661                            make_number (start),
2662                            last_thing_searched);
2663               data[2 * i + 1] = Fmake_marker ();
2664               Fset_marker (data[2 * i + 1],
2665                            make_number (search_regs.end[i]),
2666                            last_thing_searched);
2667             }
2668           else
2669             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2670             abort ();
2671
2672           len = i;
2673         }
2674       else
2675         data[2 * i] = data [2 * i + 1] = Qnil;
2676     }
2677
2678   /* If REUSE is not usable, cons up the values and return them.  */
2679   if (! CONSP (reuse))
2680     return Flist (2 * len + 2, data);
2681
2682   /* If REUSE is a list, store as many value elements as will fit
2683      into the elements of REUSE.  */
2684   for (i = 0, tail = reuse; CONSP (tail);
2685        i++, tail = XCDR (tail))
2686     {
2687       if (i < 2 * len + 2)
2688         XSETCAR (tail, data[i]);
2689       else
2690         XSETCAR (tail, Qnil);
2691       prev = tail;
2692     }
2693
2694   /* If we couldn't fit all value elements into REUSE,
2695      cons up the rest of them and add them to the end of REUSE.  */
2696   if (i < 2 * len + 2)
2697     XSETCDR (prev, Flist (2 * len + 2 - i, data + i));
2698
2699   return reuse;
2700 }
2701
2702
2703 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2704        doc: /* Set internal data on last search match from elements of LIST.
2705 LIST should have been created by calling `match-data' previously.  */)
2706      (list)
2707      register Lisp_Object list;
2708 {
2709   register int i;
2710   register Lisp_Object marker;
2711
2712   if (running_asynch_code)
2713     save_search_regs ();
2714
2715   if (!CONSP (list) && !NILP (list))
2716     list = wrong_type_argument (Qconsp, list);
2717
2718   /* Unless we find a marker with a buffer in LIST, assume that this
2719      match data came from a string.  */
2720   last_thing_searched = Qt;
2721
2722   /* Allocate registers if they don't already exist.  */
2723   {
2724     int length = XFASTINT (Flength (list)) / 2;
2725
2726     if (length > search_regs.num_regs)
2727       {
2728         if (search_regs.num_regs == 0)
2729           {
2730             search_regs.start
2731               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2732             search_regs.end
2733               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2734           }
2735         else
2736           {
2737             search_regs.start
2738               = (regoff_t *) xrealloc (search_regs.start,
2739                                        length * sizeof (regoff_t));
2740             search_regs.end
2741               = (regoff_t *) xrealloc (search_regs.end,
2742                                        length * sizeof (regoff_t));
2743           }
2744
2745         for (i = search_regs.num_regs; i < length; i++)
2746           search_regs.start[i] = -1;
2747
2748         search_regs.num_regs = length;
2749       }
2750   }
2751
2752   for (i = 0; i < search_regs.num_regs; i++)
2753     {
2754       marker = Fcar (list);
2755       if (NILP (marker))
2756         {
2757           search_regs.start[i] = -1;
2758           list = Fcdr (list);
2759         }
2760       else
2761         {
2762           int from;
2763
2764           if (MARKERP (marker))
2765             {
2766               if (XMARKER (marker)->buffer == 0)
2767                 XSETFASTINT (marker, 0);
2768               else
2769                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2770             }
2771
2772           CHECK_NUMBER_COERCE_MARKER (marker);
2773           from = XINT (marker);
2774           list = Fcdr (list);
2775
2776           marker = Fcar (list);
2777           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2778             XSETFASTINT (marker, 0);
2779
2780           CHECK_NUMBER_COERCE_MARKER (marker);
2781           search_regs.start[i] = from;
2782           search_regs.end[i] = XINT (marker);
2783         }
2784       list = Fcdr (list);
2785     }
2786
2787   return Qnil;
2788 }
2789
2790 /* If non-zero the match data have been saved in saved_search_regs
2791    during the execution of a sentinel or filter. */
2792 static int search_regs_saved;
2793 static struct re_registers saved_search_regs;
2794
2795 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2796    if asynchronous code (filter or sentinel) is running. */
2797 static void
2798 save_search_regs ()
2799 {
2800   if (!search_regs_saved)
2801     {
2802       saved_search_regs.num_regs = search_regs.num_regs;
2803       saved_search_regs.start = search_regs.start;
2804       saved_search_regs.end = search_regs.end;
2805       search_regs.num_regs = 0;
2806       search_regs.start = 0;
2807       search_regs.end = 0;
2808
2809       search_regs_saved = 1;
2810     }
2811 }
2812
2813 /* Called upon exit from filters and sentinels. */
2814 void
2815 restore_match_data ()
2816 {
2817   if (search_regs_saved)
2818     {
2819       if (search_regs.num_regs > 0)
2820         {
2821           xfree (search_regs.start);
2822           xfree (search_regs.end);
2823         }
2824       search_regs.num_regs = saved_search_regs.num_regs;
2825       search_regs.start = saved_search_regs.start;
2826       search_regs.end = saved_search_regs.end;
2827
2828       search_regs_saved = 0;
2829     }
2830 }
2831
2832 /* Quote a string to inactivate reg-expr chars */
2833
2834 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2835        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2836      (string)
2837      Lisp_Object string;
2838 {
2839   register unsigned char *in, *out, *end;
2840   register unsigned char *temp;
2841   int backslashes_added = 0;
2842
2843   CHECK_STRING (string);
2844
2845   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2846
2847   /* Now copy the data into the new string, inserting escapes. */
2848
2849   in = XSTRING (string)->data;
2850   end = in + STRING_BYTES (XSTRING (string));
2851   out = temp;
2852
2853   for (; in != end; in++)
2854     {
2855       if (*in == '[' || *in == ']'
2856           || *in == '*' || *in == '.' || *in == '\\'
2857           || *in == '?' || *in == '+'
2858           || *in == '^' || *in == '$')
2859         *out++ = '\\', backslashes_added++;
2860       *out++ = *in;
2861     }
2862
2863   return make_specified_string (temp,
2864                                 XSTRING (string)->size + backslashes_added,
2865                                 out - temp,
2866                                 STRING_MULTIBYTE (string));
2867 }
2868 \f
2869 void
2870 syms_of_search ()
2871 {
2872   register int i;
2873
2874   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2875     {
2876       searchbufs[i].buf.allocated = 100;
2877       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2878       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2879       searchbufs[i].regexp = Qnil;
2880       staticpro (&searchbufs[i].regexp);
2881       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2882     }
2883   searchbuf_head = &searchbufs[0];
2884
2885   Qsearch_failed = intern ("search-failed");
2886   staticpro (&Qsearch_failed);
2887   Qinvalid_regexp = intern ("invalid-regexp");
2888   staticpro (&Qinvalid_regexp);
2889
2890   Fput (Qsearch_failed, Qerror_conditions,
2891         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2892   Fput (Qsearch_failed, Qerror_message,
2893         build_string ("Search failed"));
2894
2895   Fput (Qinvalid_regexp, Qerror_conditions,
2896         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2897   Fput (Qinvalid_regexp, Qerror_message,
2898         build_string ("Invalid regexp"));
2899
2900   last_thing_searched = Qnil;
2901   staticpro (&last_thing_searched);
2902
2903   defsubr (&Slooking_at);
2904   defsubr (&Sposix_looking_at);
2905   defsubr (&Sstring_match);
2906   defsubr (&Sposix_string_match);
2907   defsubr (&Ssearch_forward);
2908   defsubr (&Ssearch_backward);
2909   defsubr (&Sword_search_forward);
2910   defsubr (&Sword_search_backward);
2911   defsubr (&Sre_search_forward);
2912   defsubr (&Sre_search_backward);
2913   defsubr (&Sposix_search_forward);
2914   defsubr (&Sposix_search_backward);
2915   defsubr (&Sreplace_match);
2916   defsubr (&Smatch_beginning);
2917   defsubr (&Smatch_end);
2918   defsubr (&Smatch_data);
2919   defsubr (&Sset_match_data);
2920   defsubr (&Sregexp_quote);
2921 }