src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
   3                  2004, 2005, 2006 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 /* Error condition used for failing searches */
  87 Lisp_Object Qsearch_failed;
  88
  89 Lisp_Object Vsearch_spaces_regexp;
  90
  91 static void set_search_regs ();
  92 static void save_search_regs ();
  93 static int simple_search ();
  94 static int boyer_moore ();
  95 static int search_buffer ();
  96 static void matcher_overflow () NO_RETURN;
  97
  98 static void
  99 matcher_overflow ()
 100 {
 101   error ("Stack overflow in regexp matcher");
 102 }
 103
 104 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 105    PATTERN is the pattern to compile.
 106    CP is the place to put the result.
 107    TRANSLATE is a translation table for ignoring case, or nil for none.
 108    REGP is the structure that says where to store the "register"
 109    values that will result from matching this pattern.
 110    If it is 0, we should compile the pattern not to record any
 111    subexpression bounds.
 112    POSIX is nonzero if we want full backtracking (POSIX style)
 113    for this pattern.  0 means backtrack only enough to get a valid match.
 114    MULTIBYTE is nonzero if we want to handle multibyte characters in
 115    PATTERN.  0 means all multibyte characters are recognized just as
 116    sequences of binary data.
 117
 118    The behavior also depends on Vsearch_spaces_regexp.  */
 119
 120 static void
 121 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 122      struct regexp_cache *cp;
 123      Lisp_Object pattern;
 124      Lisp_Object translate;
 125      struct re_registers *regp;
 126      int posix;
 127      int multibyte;
 128 {
 129   unsigned char *raw_pattern;
 130   int raw_pattern_size;
 131   char *val;
 132   reg_syntax_t old;
 133
 134   /* MULTIBYTE says whether the text to be searched is multibyte.
 135      We must convert PATTERN to match that, or we will not really
 136      find things right.  */
 137
 138   if (multibyte == STRING_MULTIBYTE (pattern))
 139     {
 140       raw_pattern = (unsigned char *) SDATA (pattern);
 141       raw_pattern_size = SBYTES (pattern);
 142     }
 143   else if (multibyte)
 144     {
 145       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 146                                                   SCHARS (pattern));
 147       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 148       copy_text (SDATA (pattern), raw_pattern,
 149                  SCHARS (pattern), 0, 1);
 150     }
 151   else
 152     {
 153       /* Converting multibyte to single-byte.
 154
 155          ??? Perhaps this conversion should be done in a special way
 156          by subtracting nonascii-insert-offset from each non-ASCII char,
 157          so that only the multibyte chars which really correspond to
 158          the chosen single-byte character set can possibly match.  */
 159       raw_pattern_size = SCHARS (pattern);
 160       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 161       copy_text (SDATA (pattern), raw_pattern,
 162                  SBYTES (pattern), 1, 0);
 163     }
 164
 165   cp->regexp = Qnil;
 166   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 167   cp->posix = posix;
 168   cp->buf.multibyte = multibyte;
 169   cp->whitespace_regexp = Vsearch_spaces_regexp;
 170   BLOCK_INPUT;
 171   old = re_set_syntax (RE_SYNTAX_EMACS
 172                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 173
 174   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 175                             : SDATA (Vsearch_spaces_regexp));
 176
 177   val = (char *) re_compile_pattern ((char *)raw_pattern,
 178                                      raw_pattern_size, &cp->buf);
 179
 180   re_set_whitespace_regexp (NULL);
 181
 182   re_set_syntax (old);
 183   UNBLOCK_INPUT;
 184   if (val)
 185     xsignal1 (Qinvalid_regexp, build_string (val));
 186
 187   cp->regexp = Fcopy_sequence (pattern);
 188 }
 189
 190 /* Shrink each compiled regexp buffer in the cache
 191    to the size actually used right now.
 192    This is called from garbage collection.  */
 193
 194 void
 195 shrink_regexp_cache ()
 196 {
 197   struct regexp_cache *cp;
 198
 199   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 200     {
 201       cp->buf.allocated = cp->buf.used;
 202       cp->buf.buffer
 203         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 204     }
 205 }
 206
 207 /* Compile a regexp if necessary, but first check to see if there's one in
 208    the cache.
 209    PATTERN is the pattern to compile.
 210    TRANSLATE is a translation table for ignoring case, or nil for none.
 211    REGP is the structure that says where to store the "register"
 212    values that will result from matching this pattern.
 213    If it is 0, we should compile the pattern not to record any
 214    subexpression bounds.
 215    POSIX is nonzero if we want full backtracking (POSIX style)
 216    for this pattern.  0 means backtrack only enough to get a valid match.  */
 217
 218 struct re_pattern_buffer *
 219 compile_pattern (pattern, regp, translate, posix, multibyte)
 220      Lisp_Object pattern;
 221      struct re_registers *regp;
 222      Lisp_Object translate;
 223      int posix, multibyte;
 224 {
 225   struct regexp_cache *cp, **cpp;
 226
 227   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 228     {
 229       cp = *cpp;
 230       /* Entries are initialized to nil, and may be set to nil by
 231          compile_pattern_1 if the pattern isn't valid.  Don't apply
 232          string accessors in those cases.  However, compile_pattern_1
 233          is only applied to the cache entry we pick here to reuse.  So
 234          nil should never appear before a non-nil entry.  */
 235       if (NILP (cp->regexp))
 236         goto compile_it;
 237       if (SCHARS (cp->regexp) == SCHARS (pattern)
 238           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 239           && !NILP (Fstring_equal (cp->regexp, pattern))
 240           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 241           && cp->posix == posix
 242           && cp->buf.multibyte == multibyte
 243           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 244         break;
 245
 246       /* If we're at the end of the cache, compile into the nil cell
 247          we found, or the last (least recently used) cell with a
 248          string value.  */
 249       if (cp->next == 0)
 250         {
 251         compile_it:
 252           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 253           break;
 254         }
 255     }
 256
 257   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 258      either because we found it in the cache or because we just compiled it.
 259      Move it to the front of the queue to mark it as most recently used.  */
 260   *cpp = cp->next;
 261   cp->next = searchbuf_head;
 262   searchbuf_head = cp;
 263
 264   /* Advise the searching functions about the space we have allocated
 265      for register data.  */
 266   if (regp)
 267     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 268
 269   return &cp->buf;
 270 }
 271
 272 \f
 273 static Lisp_Object
 274 looking_at_1 (string, posix)
 275      Lisp_Object string;
 276      int posix;
 277 {
 278   Lisp_Object val;
 279   unsigned char *p1, *p2;
 280   int s1, s2;
 281   register int i;
 282   struct re_pattern_buffer *bufp;
 283
 284   if (running_asynch_code)
 285     save_search_regs ();
 286
 287   CHECK_STRING (string);
 288   bufp = compile_pattern (string, &search_regs,
 289                           (!NILP (current_buffer->case_fold_search)
 290                            ? current_buffer->case_canon_table : Qnil),
 291                           posix,
 292                           !NILP (current_buffer->enable_multibyte_characters));
 293
 294   immediate_quit = 1;
 295   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 296
 297   /* Get pointers and sizes of the two strings
 298      that make up the visible portion of the buffer. */
 299
 300   p1 = BEGV_ADDR;
 301   s1 = GPT_BYTE - BEGV_BYTE;
 302   p2 = GAP_END_ADDR;
 303   s2 = ZV_BYTE - GPT_BYTE;
 304   if (s1 < 0)
 305     {
 306       p2 = p1;
 307       s2 = ZV_BYTE - BEGV_BYTE;
 308       s1 = 0;
 309     }
 310   if (s2 < 0)
 311     {
 312       s1 = ZV_BYTE - BEGV_BYTE;
 313       s2 = 0;
 314     }
 315
 316   re_match_object = Qnil;
 317
 318   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 319                   PT_BYTE - BEGV_BYTE, &search_regs,
 320                   ZV_BYTE - BEGV_BYTE);
 321   immediate_quit = 0;
 322
 323   if (i == -2)
 324     matcher_overflow ();
 325
 326   val = (0 <= i ? Qt : Qnil);
 327   if (i >= 0)
 328     for (i = 0; i < search_regs.num_regs; i++)
 329       if (search_regs.start[i] >= 0)
 330         {
 331           search_regs.start[i]
 332             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 333           search_regs.end[i]
 334             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 335         }
 336   XSETBUFFER (last_thing_searched, current_buffer);
 337   return val;
 338 }
 339
 340 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 341        doc: /* Return t if text after point matches regular expression REGEXP.
 342 This function modifies the match data that `match-beginning',
 343 `match-end' and `match-data' access; save and restore the match
 344 data if you want to preserve them.  */)
 345      (regexp)
 346      Lisp_Object regexp;
 347 {
 348   return looking_at_1 (regexp, 0);
 349 }
 350
 351 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 352        doc: /* Return t if text after point matches regular expression REGEXP.
 353 Find the longest match, in accord with Posix regular expression rules.
 354 This function modifies the match data that `match-beginning',
 355 `match-end' and `match-data' access; save and restore the match
 356 data if you want to preserve them.  */)
 357      (regexp)
 358      Lisp_Object regexp;
 359 {
 360   return looking_at_1 (regexp, 1);
 361 }
 362 \f
 363 static Lisp_Object
 364 string_match_1 (regexp, string, start, posix)
 365      Lisp_Object regexp, string, start;
 366      int posix;
 367 {
 368   int val;
 369   struct re_pattern_buffer *bufp;
 370   int pos, pos_byte;
 371   int i;
 372
 373   if (running_asynch_code)
 374     save_search_regs ();
 375
 376   CHECK_STRING (regexp);
 377   CHECK_STRING (string);
 378
 379   if (NILP (start))
 380     pos = 0, pos_byte = 0;
 381   else
 382     {
 383       int len = SCHARS (string);
 384
 385       CHECK_NUMBER (start);
 386       pos = XINT (start);
 387       if (pos < 0 && -pos <= len)
 388         pos = len + pos;
 389       else if (0 > pos || pos > len)
 390         args_out_of_range (string, start);
 391       pos_byte = string_char_to_byte (string, pos);
 392     }
 393
 394   bufp = compile_pattern (regexp, &search_regs,
 395                           (!NILP (current_buffer->case_fold_search)
 396                            ? current_buffer->case_canon_table : Qnil),
 397                           posix,
 398                           STRING_MULTIBYTE (string));
 399   immediate_quit = 1;
 400   re_match_object = string;
 401
 402   val = re_search (bufp, (char *) SDATA (string),
 403                    SBYTES (string), pos_byte,
 404                    SBYTES (string) - pos_byte,
 405                    &search_regs);
 406   immediate_quit = 0;
 407   last_thing_searched = Qt;
 408   if (val == -2)
 409     matcher_overflow ();
 410   if (val < 0) return Qnil;
 411
 412   for (i = 0; i < search_regs.num_regs; i++)
 413     if (search_regs.start[i] >= 0)
 414       {
 415         search_regs.start[i]
 416           = string_byte_to_char (string, search_regs.start[i]);
 417         search_regs.end[i]
 418           = string_byte_to_char (string, search_regs.end[i]);
 419       }
 420
 421   return make_number (string_byte_to_char (string, val));
 422 }
 423
 424 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 425        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 426 Matching ignores case if `case-fold-search' is non-nil.
 427 If third arg START is non-nil, start search at that index in STRING.
 428 For index of first char beyond the match, do (match-end 0).
 429 `match-end' and `match-beginning' also give indices of substrings
 430 matched by parenthesis constructs in the pattern.
 431
 432 You can use the function `match-string' to extract the substrings
 433 matched by the parenthesis constructions in REGEXP. */)
 434      (regexp, string, start)
 435      Lisp_Object regexp, string, start;
 436 {
 437   return string_match_1 (regexp, string, start, 0);
 438 }
 439
 440 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 441        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 442 Find the longest match, in accord with Posix regular expression rules.
 443 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 444 If third arg START is non-nil, start search at that index in STRING.
 445 For index of first char beyond the match, do (match-end 0).
 446 `match-end' and `match-beginning' also give indices of substrings
 447 matched by parenthesis constructs in the pattern.  */)
 448      (regexp, string, start)
 449      Lisp_Object regexp, string, start;
 450 {
 451   return string_match_1 (regexp, string, start, 1);
 452 }
 453
 454 /* Match REGEXP against STRING, searching all of STRING,
 455    and return the index of the match, or negative on failure.
 456    This does not clobber the match data.  */
 457
 458 int
 459 fast_string_match (regexp, string)
 460      Lisp_Object regexp, string;
 461 {
 462   int val;
 463   struct re_pattern_buffer *bufp;
 464
 465   bufp = compile_pattern (regexp, 0, Qnil,
 466                           0, STRING_MULTIBYTE (string));
 467   immediate_quit = 1;
 468   re_match_object = string;
 469
 470   val = re_search (bufp, (char *) SDATA (string),
 471                    SBYTES (string), 0,
 472                    SBYTES (string), 0);
 473   immediate_quit = 0;
 474   return val;
 475 }
 476
 477 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 478    and return the index of the match, or negative on failure.
 479    This does not clobber the match data.
 480    We assume that STRING contains single-byte characters.  */
 481
 482 extern Lisp_Object Vascii_downcase_table;
 483
 484 int
 485 fast_c_string_match_ignore_case (regexp, string)
 486      Lisp_Object regexp;
 487      const char *string;
 488 {
 489   int val;
 490   struct re_pattern_buffer *bufp;
 491   int len = strlen (string);
 492
 493   regexp = string_make_unibyte (regexp);
 494   re_match_object = Qt;
 495   bufp = compile_pattern (regexp, 0,
 496                           Vascii_canon_table, 0,
 497                           0);
 498   immediate_quit = 1;
 499   val = re_search (bufp, string, len, 0, len, 0);
 500   immediate_quit = 0;
 501   return val;
 502 }
 503
 504 /* Like fast_string_match but ignore case.  */
 505
 506 int
 507 fast_string_match_ignore_case (regexp, string)
 508      Lisp_Object regexp, string;
 509 {
 510   int val;
 511   struct re_pattern_buffer *bufp;
 512
 513   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 514                           0, STRING_MULTIBYTE (string));
 515   immediate_quit = 1;
 516   re_match_object = string;
 517
 518   val = re_search (bufp, (char *) SDATA (string),
 519                    SBYTES (string), 0,
 520                    SBYTES (string), 0);
 521   immediate_quit = 0;
 522   return val;
 523 }
 524 \f
 525 /* The newline cache: remembering which sections of text have no newlines.  */
 526
 527 /* If the user has requested newline caching, make sure it's on.
 528    Otherwise, make sure it's off.
 529    This is our cheezy way of associating an action with the change of
 530    state of a buffer-local variable.  */
 531 static void
 532 newline_cache_on_off (buf)
 533      struct buffer *buf;
 534 {
 535   if (NILP (buf->cache_long_line_scans))
 536     {
 537       /* It should be off.  */
 538       if (buf->newline_cache)
 539         {
 540           free_region_cache (buf->newline_cache);
 541           buf->newline_cache = 0;
 542         }
 543     }
 544   else
 545     {
 546       /* It should be on.  */
 547       if (buf->newline_cache == 0)
 548         buf->newline_cache = new_region_cache ();
 549     }
 550 }
 551
 552 \f
 553 /* Search for COUNT instances of the character TARGET between START and END.
 554
 555    If COUNT is positive, search forwards; END must be >= START.
 556    If COUNT is negative, search backwards for the -COUNTth instance;
 557       END must be <= START.
 558    If COUNT is zero, do anything you please; run rogue, for all I care.
 559
 560    If END is zero, use BEGV or ZV instead, as appropriate for the
 561    direction indicated by COUNT.
 562
 563    If we find COUNT instances, set *SHORTAGE to zero, and return the
 564    position past the COUNTth match.  Note that for reverse motion
 565    this is not the same as the usual convention for Emacs motion commands.
 566
 567    If we don't find COUNT instances before reaching END, set *SHORTAGE
 568    to the number of TARGETs left unfound, and return END.
 569
 570    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 571    except when inside redisplay.  */
 572
 573 int
 574 scan_buffer (target, start, end, count, shortage, allow_quit)
 575      register int target;
 576      int start, end;
 577      int count;
 578      int *shortage;
 579      int allow_quit;
 580 {
 581   struct region_cache *newline_cache;
 582   int direction;
 583
 584   if (count > 0)
 585     {
 586       direction = 1;
 587       if (! end) end = ZV;
 588     }
 589   else
 590     {
 591       direction = -1;
 592       if (! end) end = BEGV;
 593     }
 594
 595   newline_cache_on_off (current_buffer);
 596   newline_cache = current_buffer->newline_cache;
 597
 598   if (shortage != 0)
 599     *shortage = 0;
 600
 601   immediate_quit = allow_quit;
 602
 603   if (count > 0)
 604     while (start != end)
 605       {
 606         /* Our innermost scanning loop is very simple; it doesn't know
 607            about gaps, buffer ends, or the newline cache.  ceiling is
 608            the position of the last character before the next such
 609            obstacle --- the last character the dumb search loop should
 610            examine.  */
 611         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 612         int start_byte = CHAR_TO_BYTE (start);
 613         int tem;
 614
 615         /* If we're looking for a newline, consult the newline cache
 616            to see where we can avoid some scanning.  */
 617         if (target == '\n' && newline_cache)
 618           {
 619             int next_change;
 620             immediate_quit = 0;
 621             while (region_cache_forward
 622                    (current_buffer, newline_cache, start_byte, &next_change))
 623               start_byte = next_change;
 624             immediate_quit = allow_quit;
 625
 626             /* START should never be after END.  */
 627             if (start_byte > ceiling_byte)
 628               start_byte = ceiling_byte;
 629
 630             /* Now the text after start is an unknown region, and
 631                next_change is the position of the next known region. */
 632             ceiling_byte = min (next_change - 1, ceiling_byte);
 633           }
 634
 635         /* The dumb loop can only scan text stored in contiguous
 636            bytes. BUFFER_CEILING_OF returns the last character
 637            position that is contiguous, so the ceiling is the
 638            position after that.  */
 639         tem = BUFFER_CEILING_OF (start_byte);
 640         ceiling_byte = min (tem, ceiling_byte);
 641
 642         {
 643           /* The termination address of the dumb loop.  */
 644           register unsigned char *ceiling_addr
 645             = BYTE_POS_ADDR (ceiling_byte) + 1;
 646           register unsigned char *cursor
 647             = BYTE_POS_ADDR (start_byte);
 648           unsigned char *base = cursor;
 649
 650           while (cursor < ceiling_addr)
 651             {
 652               unsigned char *scan_start = cursor;
 653
 654               /* The dumb loop.  */
 655               while (*cursor != target && ++cursor < ceiling_addr)
 656                 ;
 657
 658               /* If we're looking for newlines, cache the fact that
 659                  the region from start to cursor is free of them. */
 660               if (target == '\n' && newline_cache)
 661                 know_region_cache (current_buffer, newline_cache,
 662                                    start_byte + scan_start - base,
 663                                    start_byte + cursor - base);
 664
 665               /* Did we find the target character?  */
 666               if (cursor < ceiling_addr)
 667                 {
 668                   if (--count == 0)
 669                     {
 670                       immediate_quit = 0;
 671                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 672                     }
 673                   cursor++;
 674                 }
 675             }
 676
 677           start = BYTE_TO_CHAR (start_byte + cursor - base);
 678         }
 679       }
 680   else
 681     while (start > end)
 682       {
 683         /* The last character to check before the next obstacle.  */
 684         int ceiling_byte = CHAR_TO_BYTE (end);
 685         int start_byte = CHAR_TO_BYTE (start);
 686         int tem;
 687
 688         /* Consult the newline cache, if appropriate.  */
 689         if (target == '\n' && newline_cache)
 690           {
 691             int next_change;
 692             immediate_quit = 0;
 693             while (region_cache_backward
 694                    (current_buffer, newline_cache, start_byte, &next_change))
 695               start_byte = next_change;
 696             immediate_quit = allow_quit;
 697
 698             /* Start should never be at or before end.  */
 699             if (start_byte <= ceiling_byte)
 700               start_byte = ceiling_byte + 1;
 701
 702             /* Now the text before start is an unknown region, and
 703                next_change is the position of the next known region. */
 704             ceiling_byte = max (next_change, ceiling_byte);
 705           }
 706
 707         /* Stop scanning before the gap.  */
 708         tem = BUFFER_FLOOR_OF (start_byte - 1);
 709         ceiling_byte = max (tem, ceiling_byte);
 710
 711         {
 712           /* The termination address of the dumb loop.  */
 713           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 714           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 715           unsigned char *base = cursor;
 716
 717           while (cursor >= ceiling_addr)
 718             {
 719               unsigned char *scan_start = cursor;
 720
 721               while (*cursor != target && --cursor >= ceiling_addr)
 722                 ;
 723
 724               /* If we're looking for newlines, cache the fact that
 725                  the region from after the cursor to start is free of them.  */
 726               if (target == '\n' && newline_cache)
 727                 know_region_cache (current_buffer, newline_cache,
 728                                    start_byte + cursor - base,
 729                                    start_byte + scan_start - base);
 730
 731               /* Did we find the target character?  */
 732               if (cursor >= ceiling_addr)
 733                 {
 734                   if (++count >= 0)
 735                     {
 736                       immediate_quit = 0;
 737                       return BYTE_TO_CHAR (start_byte + cursor - base);
 738                     }
 739                   cursor--;
 740                 }
 741             }
 742
 743           start = BYTE_TO_CHAR (start_byte + cursor - base);
 744         }
 745       }
 746
 747   immediate_quit = 0;
 748   if (shortage != 0)
 749     *shortage = count * direction;
 750   return start;
 751 }
 752 \f
 753 /* Search for COUNT instances of a line boundary, which means either a
 754    newline or (if selective display enabled) a carriage return.
 755    Start at START.  If COUNT is negative, search backwards.
 756
 757    We report the resulting position by calling TEMP_SET_PT_BOTH.
 758
 759    If we find COUNT instances. we position after (always after,
 760    even if scanning backwards) the COUNTth match, and return 0.
 761
 762    If we don't find COUNT instances before reaching the end of the
 763    buffer (or the beginning, if scanning backwards), we return
 764    the number of line boundaries left unfound, and position at
 765    the limit we bumped up against.
 766
 767    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 768    except in special cases.  */
 769
 770 int
 771 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 772      int start, start_byte;
 773      int limit, limit_byte;
 774      register int count;
 775      int allow_quit;
 776 {
 777   int direction = ((count > 0) ? 1 : -1);
 778
 779   register unsigned char *cursor;
 780   unsigned char *base;
 781
 782   register int ceiling;
 783   register unsigned char *ceiling_addr;
 784
 785   int old_immediate_quit = immediate_quit;
 786
 787   /* The code that follows is like scan_buffer
 788      but checks for either newline or carriage return.  */
 789
 790   if (allow_quit)
 791     immediate_quit++;
 792
 793   start_byte = CHAR_TO_BYTE (start);
 794
 795   if (count > 0)
 796     {
 797       while (start_byte < limit_byte)
 798         {
 799           ceiling =  BUFFER_CEILING_OF (start_byte);
 800           ceiling = min (limit_byte - 1, ceiling);
 801           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 802           base = (cursor = BYTE_POS_ADDR (start_byte));
 803           while (1)
 804             {
 805               while (*cursor != '\n' && ++cursor != ceiling_addr)
 806                 ;
 807
 808               if (cursor != ceiling_addr)
 809                 {
 810                   if (--count == 0)
 811                     {
 812                       immediate_quit = old_immediate_quit;
 813                       start_byte = start_byte + cursor - base + 1;
 814                       start = BYTE_TO_CHAR (start_byte);
 815                       TEMP_SET_PT_BOTH (start, start_byte);
 816                       return 0;
 817                     }
 818                   else
 819                     if (++cursor == ceiling_addr)
 820                       break;
 821                 }
 822               else
 823                 break;
 824             }
 825           start_byte += cursor - base;
 826         }
 827     }
 828   else
 829     {
 830       while (start_byte > limit_byte)
 831         {
 832           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 833           ceiling = max (limit_byte, ceiling);
 834           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 835           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 836           while (1)
 837             {
 838               while (--cursor != ceiling_addr && *cursor != '\n')
 839                 ;
 840
 841               if (cursor != ceiling_addr)
 842                 {
 843                   if (++count == 0)
 844                     {
 845                       immediate_quit = old_immediate_quit;
 846                       /* Return the position AFTER the match we found.  */
 847                       start_byte = start_byte + cursor - base + 1;
 848                       start = BYTE_TO_CHAR (start_byte);
 849                       TEMP_SET_PT_BOTH (start, start_byte);
 850                       return 0;
 851                     }
 852                 }
 853               else
 854                 break;
 855             }
 856           /* Here we add 1 to compensate for the last decrement
 857              of CURSOR, which took it past the valid range.  */
 858           start_byte += cursor - base + 1;
 859         }
 860     }
 861
 862   TEMP_SET_PT_BOTH (limit, limit_byte);
 863   immediate_quit = old_immediate_quit;
 864
 865   return count * direction;
 866 }
 867
 868 int
 869 find_next_newline_no_quit (from, cnt)
 870      register int from, cnt;
 871 {
 872   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 873 }
 874
 875 /* Like find_next_newline, but returns position before the newline,
 876    not after, and only search up to TO.  This isn't just
 877    find_next_newline (...)-1, because you might hit TO.  */
 878
 879 int
 880 find_before_next_newline (from, to, cnt)
 881      int from, to, cnt;
 882 {
 883   int shortage;
 884   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 885
 886   if (shortage == 0)
 887     pos--;
 888
 889   return pos;
 890 }
 891 \f
 892 /* Subroutines of Lisp buffer search functions. */
 893
 894 static Lisp_Object
 895 search_command (string, bound, noerror, count, direction, RE, posix)
 896      Lisp_Object string, bound, noerror, count;
 897      int direction;
 898      int RE;
 899      int posix;
 900 {
 901   register int np;
 902   int lim, lim_byte;
 903   int n = direction;
 904
 905   if (!NILP (count))
 906     {
 907       CHECK_NUMBER (count);
 908       n *= XINT (count);
 909     }
 910
 911   CHECK_STRING (string);
 912   if (NILP (bound))
 913     {
 914       if (n > 0)
 915         lim = ZV, lim_byte = ZV_BYTE;
 916       else
 917         lim = BEGV, lim_byte = BEGV_BYTE;
 918     }
 919   else
 920     {
 921       CHECK_NUMBER_COERCE_MARKER (bound);
 922       lim = XINT (bound);
 923       if (n > 0 ? lim < PT : lim > PT)
 924         error ("Invalid search bound (wrong side of point)");
 925       if (lim > ZV)
 926         lim = ZV, lim_byte = ZV_BYTE;
 927       else if (lim < BEGV)
 928         lim = BEGV, lim_byte = BEGV_BYTE;
 929       else
 930         lim_byte = CHAR_TO_BYTE (lim);
 931     }
 932
 933   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 934                       (!NILP (current_buffer->case_fold_search)
 935                        ? current_buffer->case_canon_table
 936                        : Qnil),
 937                       (!NILP (current_buffer->case_fold_search)
 938                        ? current_buffer->case_eqv_table
 939                        : Qnil),
 940                       posix);
 941   if (np <= 0)
 942     {
 943       if (NILP (noerror))
 944         xsignal1 (Qsearch_failed, string);
 945
 946       if (!EQ (noerror, Qt))
 947         {
 948           if (lim < BEGV || lim > ZV)
 949             abort ();
 950           SET_PT_BOTH (lim, lim_byte);
 951           return Qnil;
 952 #if 0 /* This would be clean, but maybe programs depend on
 953          a value of nil here.  */
 954           np = lim;
 955 #endif
 956         }
 957       else
 958         return Qnil;
 959     }
 960
 961   if (np < BEGV || np > ZV)
 962     abort ();
 963
 964   SET_PT (np);
 965
 966   return make_number (np);
 967 }
 968 \f
 969 /* Return 1 if REGEXP it matches just one constant string.  */
 970
 971 static int
 972 trivial_regexp_p (regexp)
 973      Lisp_Object regexp;
 974 {
 975   int len = SBYTES (regexp);
 976   unsigned char *s = SDATA (regexp);
 977   while (--len >= 0)
 978     {
 979       switch (*s++)
 980         {
 981         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 982           return 0;
 983         case '\\':
 984           if (--len < 0)
 985             return 0;
 986           switch (*s++)
 987             {
 988             case '|': case '(': case ')': case '`': case '\'': case 'b':
 989             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 990             case 'S': case '=': case '{': case '}': case '_':
 991             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 992             case '1': case '2': case '3': case '4': case '5':
 993             case '6': case '7': case '8': case '9':
 994               return 0;
 995             }
 996         }
 997     }
 998   return 1;
 999 }
1000
1001 /* Search for the n'th occurrence of STRING in the current buffer,
1002    starting at position POS and stopping at position LIM,
1003    treating STRING as a literal string if RE is false or as
1004    a regular expression if RE is true.
1005
1006    If N is positive, searching is forward and LIM must be greater than POS.
1007    If N is negative, searching is backward and LIM must be less than POS.
1008
1009    Returns -x if x occurrences remain to be found (x > 0),
1010    or else the position at the beginning of the Nth occurrence
1011    (if searching backward) or the end (if searching forward).
1012
1013    POSIX is nonzero if we want full backtracking (POSIX style)
1014    for this pattern.  0 means backtrack only enough to get a valid match.  */
1015
1016 #define TRANSLATE(out, trt, d)                  \
1017 do                                              \
1018   {                                             \
1019     if (! NILP (trt))                           \
1020       {                                         \
1021         Lisp_Object temp;                       \
1022         temp = Faref (trt, make_number (d));    \
1023         if (INTEGERP (temp))                    \
1024           out = XINT (temp);                    \
1025         else                                    \
1026           out = d;                              \
1027       }                                         \
1028     else                                        \
1029       out = d;                                  \
1030   }                                             \
1031 while (0)
1032
1033 static int
1034 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1035                RE, trt, inverse_trt, posix)
1036      Lisp_Object string;
1037      int pos;
1038      int pos_byte;
1039      int lim;
1040      int lim_byte;
1041      int n;
1042      int RE;
1043      Lisp_Object trt;
1044      Lisp_Object inverse_trt;
1045      int posix;
1046 {
1047   int len = SCHARS (string);
1048   int len_byte = SBYTES (string);
1049   register int i;
1050
1051   if (running_asynch_code)
1052     save_search_regs ();
1053
1054   /* Searching 0 times means don't move.  */
1055   /* Null string is found at starting position.  */
1056   if (len == 0 || n == 0)
1057     {
1058       set_search_regs (pos_byte, 0);
1059       return pos;
1060     }
1061
1062   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1063     {
1064       unsigned char *p1, *p2;
1065       int s1, s2;
1066       struct re_pattern_buffer *bufp;
1067
1068       bufp = compile_pattern (string, &search_regs, trt, posix,
1069                               !NILP (current_buffer->enable_multibyte_characters));
1070
1071       immediate_quit = 1;       /* Quit immediately if user types ^G,
1072                                    because letting this function finish
1073                                    can take too long. */
1074       QUIT;                     /* Do a pending quit right away,
1075                                    to avoid paradoxical behavior */
1076       /* Get pointers and sizes of the two strings
1077          that make up the visible portion of the buffer. */
1078
1079       p1 = BEGV_ADDR;
1080       s1 = GPT_BYTE - BEGV_BYTE;
1081       p2 = GAP_END_ADDR;
1082       s2 = ZV_BYTE - GPT_BYTE;
1083       if (s1 < 0)
1084         {
1085           p2 = p1;
1086           s2 = ZV_BYTE - BEGV_BYTE;
1087           s1 = 0;
1088         }
1089       if (s2 < 0)
1090         {
1091           s1 = ZV_BYTE - BEGV_BYTE;
1092           s2 = 0;
1093         }
1094       re_match_object = Qnil;
1095
1096       while (n < 0)
1097         {
1098           int val;
1099           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1100                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1101                              &search_regs,
1102                              /* Don't allow match past current point */
1103                              pos_byte - BEGV_BYTE);
1104           if (val == -2)
1105             {
1106               matcher_overflow ();
1107             }
1108           if (val >= 0)
1109             {
1110               pos_byte = search_regs.start[0] + BEGV_BYTE;
1111               for (i = 0; i < search_regs.num_regs; i++)
1112                 if (search_regs.start[i] >= 0)
1113                   {
1114                     search_regs.start[i]
1115                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1116                     search_regs.end[i]
1117                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1118                   }
1119               XSETBUFFER (last_thing_searched, current_buffer);
1120               /* Set pos to the new position. */
1121               pos = search_regs.start[0];
1122             }
1123           else
1124             {
1125               immediate_quit = 0;
1126               return (n);
1127             }
1128           n++;
1129         }
1130       while (n > 0)
1131         {
1132           int val;
1133           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1134                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1135                              &search_regs,
1136                              lim_byte - BEGV_BYTE);
1137           if (val == -2)
1138             {
1139               matcher_overflow ();
1140             }
1141           if (val >= 0)
1142             {
1143               pos_byte = search_regs.end[0] + BEGV_BYTE;
1144               for (i = 0; i < search_regs.num_regs; i++)
1145                 if (search_regs.start[i] >= 0)
1146                   {
1147                     search_regs.start[i]
1148                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1149                     search_regs.end[i]
1150                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1151                   }
1152               XSETBUFFER (last_thing_searched, current_buffer);
1153               pos = search_regs.end[0];
1154             }
1155           else
1156             {
1157               immediate_quit = 0;
1158               return (0 - n);
1159             }
1160           n--;
1161         }
1162       immediate_quit = 0;
1163       return (pos);
1164     }
1165   else                          /* non-RE case */
1166     {
1167       unsigned char *raw_pattern, *pat;
1168       int raw_pattern_size;
1169       int raw_pattern_size_byte;
1170       unsigned char *patbuf;
1171       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1172       unsigned char *base_pat;
1173       /* Set to positive if we find a non-ASCII char that need
1174          translation.  Otherwise set to zero later.  */
1175       int charset_base = -1;
1176       int boyer_moore_ok = 1;
1177
1178       /* MULTIBYTE says whether the text to be searched is multibyte.
1179          We must convert PATTERN to match that, or we will not really
1180          find things right.  */
1181
1182       if (multibyte == STRING_MULTIBYTE (string))
1183         {
1184           raw_pattern = (unsigned char *) SDATA (string);
1185           raw_pattern_size = SCHARS (string);
1186           raw_pattern_size_byte = SBYTES (string);
1187         }
1188       else if (multibyte)
1189         {
1190           raw_pattern_size = SCHARS (string);
1191           raw_pattern_size_byte
1192             = count_size_as_multibyte (SDATA (string),
1193                                        raw_pattern_size);
1194           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1195           copy_text (SDATA (string), raw_pattern,
1196                      SCHARS (string), 0, 1);
1197         }
1198       else
1199         {
1200           /* Converting multibyte to single-byte.
1201
1202              ??? Perhaps this conversion should be done in a special way
1203              by subtracting nonascii-insert-offset from each non-ASCII char,
1204              so that only the multibyte chars which really correspond to
1205              the chosen single-byte character set can possibly match.  */
1206           raw_pattern_size = SCHARS (string);
1207           raw_pattern_size_byte = SCHARS (string);
1208           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1209           copy_text (SDATA (string), raw_pattern,
1210                      SBYTES (string), 1, 0);
1211         }
1212
1213       /* Copy and optionally translate the pattern.  */
1214       len = raw_pattern_size;
1215       len_byte = raw_pattern_size_byte;
1216       patbuf = (unsigned char *) alloca (len_byte);
1217       pat = patbuf;
1218       base_pat = raw_pattern;
1219       if (multibyte)
1220         {
1221           /* Fill patbuf by translated characters in STRING while
1222              checking if we can use boyer-moore search.  If TRT is
1223              non-nil, we can use boyer-moore search only if TRT can be
1224              represented by the byte array of 256 elements.  For that,
1225              all non-ASCII case-equivalents of all case-senstive
1226              characters in STRING must belong to the same charset and
1227              row.  */
1228
1229           while (--len >= 0)
1230             {
1231               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1232               int c, translated, inverse;
1233               int in_charlen, charlen;
1234
1235               /* If we got here and the RE flag is set, it's because we're
1236                  dealing with a regexp known to be trivial, so the backslash
1237                  just quotes the next character.  */
1238               if (RE && *base_pat == '\\')
1239                 {
1240                   len--;
1241                   raw_pattern_size--;
1242                   len_byte--;
1243                   base_pat++;
1244                 }
1245
1246               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1247
1248               if (NILP (trt))
1249                 {
1250                   str = base_pat;
1251                   charlen = in_charlen;
1252                 }
1253               else
1254                 {
1255                   /* Translate the character.  */
1256                   TRANSLATE (translated, trt, c);
1257                   charlen = CHAR_STRING (translated, str_base);
1258                   str = str_base;
1259
1260                   /* Check if C has any other case-equivalents.  */
1261                   TRANSLATE (inverse, inverse_trt, c);
1262                   /* If so, check if we can use boyer-moore.  */
1263                   if (c != inverse && boyer_moore_ok)
1264                     {
1265                       /* Check if all equivalents belong to the same
1266                          charset & row.  Note that the check of C
1267                          itself is done by the last iteration.  Note
1268                          also that we don't have to check ASCII
1269                          characters because boyer-moore search can
1270                          always handle their translation.  */
1271                       while (1)
1272                         {
1273                           if (ASCII_BYTE_P (inverse))
1274                             {
1275                               if (charset_base > 0)
1276                                 {
1277                                   boyer_moore_ok = 0;
1278                                   break;
1279                                 }
1280                               charset_base = 0;
1281                             }
1282                           else if (SINGLE_BYTE_CHAR_P (inverse))
1283                             {
1284                               /* Boyer-moore search can't handle a
1285                                  translation of an eight-bit
1286                                  character.  */
1287                               boyer_moore_ok = 0;
1288                               break;
1289                             }
1290                           else if (charset_base < 0)
1291                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1292                           else if ((inverse & ~CHAR_FIELD3_MASK)
1293                                    != charset_base)
1294                             {
1295                               boyer_moore_ok = 0;
1296                               break;
1297                             }
1298                           if (c == inverse)
1299                             break;
1300                           TRANSLATE (inverse, inverse_trt, inverse);
1301                         }
1302                     }
1303                 }
1304               if (charset_base < 0)
1305                 charset_base = 0;
1306
1307               /* Store this character into the translated pattern.  */
1308               bcopy (str, pat, charlen);
1309               pat += charlen;
1310               base_pat += in_charlen;
1311               len_byte -= in_charlen;
1312             }
1313         }
1314       else
1315         {
1316           /* Unibyte buffer.  */
1317           charset_base = 0;
1318           while (--len >= 0)
1319             {
1320               int c, translated;
1321
1322               /* If we got here and the RE flag is set, it's because we're
1323                  dealing with a regexp known to be trivial, so the backslash
1324                  just quotes the next character.  */
1325               if (RE && *base_pat == '\\')
1326                 {
1327                   len--;
1328                   raw_pattern_size--;
1329                   base_pat++;
1330                 }
1331               c = *base_pat++;
1332               TRANSLATE (translated, trt, c);
1333               *pat++ = translated;
1334             }
1335         }
1336
1337       len_byte = pat - patbuf;
1338       len = raw_pattern_size;
1339       pat = base_pat = patbuf;
1340
1341       if (boyer_moore_ok)
1342         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1343                             pos, pos_byte, lim, lim_byte,
1344                             charset_base);
1345       else
1346         return simple_search (n, pat, len, len_byte, trt,
1347                               pos, pos_byte, lim, lim_byte);
1348     }
1349 }
1350 \f
1351 /* Do a simple string search N times for the string PAT,
1352    whose length is LEN/LEN_BYTE,
1353    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1354    TRT is the translation table.
1355
1356    Return the character position where the match is found.
1357    Otherwise, if M matches remained to be found, return -M.
1358
1359    This kind of search works regardless of what is in PAT and
1360    regardless of what is in TRT.  It is used in cases where
1361    boyer_moore cannot work.  */
1362
1363 static int
1364 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1365      int n;
1366      unsigned char *pat;
1367      int len, len_byte;
1368      Lisp_Object trt;
1369      int pos, pos_byte;
1370      int lim, lim_byte;
1371 {
1372   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1373   int forward = n > 0;
1374
1375   if (lim > pos && multibyte)
1376     while (n > 0)
1377       {
1378         while (1)
1379           {
1380             /* Try matching at position POS.  */
1381             int this_pos = pos;
1382             int this_pos_byte = pos_byte;
1383             int this_len = len;
1384             int this_len_byte = len_byte;
1385             unsigned char *p = pat;
1386             if (pos + len > lim)
1387               goto stop;
1388
1389             while (this_len > 0)
1390               {
1391                 int charlen, buf_charlen;
1392                 int pat_ch, buf_ch;
1393
1394                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1395                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1396                                                  ZV_BYTE - this_pos_byte,
1397                                                  buf_charlen);
1398                 TRANSLATE (buf_ch, trt, buf_ch);
1399
1400                 if (buf_ch != pat_ch)
1401                   break;
1402
1403                 this_len_byte -= charlen;
1404                 this_len--;
1405                 p += charlen;
1406
1407                 this_pos_byte += buf_charlen;
1408                 this_pos++;
1409               }
1410
1411             if (this_len == 0)
1412               {
1413                 pos += len;
1414                 pos_byte += len_byte;
1415                 break;
1416               }
1417
1418             INC_BOTH (pos, pos_byte);
1419           }
1420
1421         n--;
1422       }
1423   else if (lim > pos)
1424     while (n > 0)
1425       {
1426         while (1)
1427           {
1428             /* Try matching at position POS.  */
1429             int this_pos = pos;
1430             int this_len = len;
1431             unsigned char *p = pat;
1432
1433             if (pos + len > lim)
1434               goto stop;
1435
1436             while (this_len > 0)
1437               {
1438                 int pat_ch = *p++;
1439                 int buf_ch = FETCH_BYTE (this_pos);
1440                 TRANSLATE (buf_ch, trt, buf_ch);
1441
1442                 if (buf_ch != pat_ch)
1443                   break;
1444
1445                 this_len--;
1446                 this_pos++;
1447               }
1448
1449             if (this_len == 0)
1450               {
1451                 pos += len;
1452                 break;
1453               }
1454
1455             pos++;
1456           }
1457
1458         n--;
1459       }
1460   /* Backwards search.  */
1461   else if (lim < pos && multibyte)
1462     while (n < 0)
1463       {
1464         while (1)
1465           {
1466             /* Try matching at position POS.  */
1467             int this_pos = pos - len;
1468             int this_pos_byte = pos_byte - len_byte;
1469             int this_len = len;
1470             int this_len_byte = len_byte;
1471             unsigned char *p = pat;
1472
1473             if (pos - len < lim)
1474               goto stop;
1475
1476             while (this_len > 0)
1477               {
1478                 int charlen, buf_charlen;
1479                 int pat_ch, buf_ch;
1480
1481                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1482                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1483                                                  ZV_BYTE - this_pos_byte,
1484                                                  buf_charlen);
1485                 TRANSLATE (buf_ch, trt, buf_ch);
1486
1487                 if (buf_ch != pat_ch)
1488                   break;
1489
1490                 this_len_byte -= charlen;
1491                 this_len--;
1492                 p += charlen;
1493                 this_pos_byte += buf_charlen;
1494                 this_pos++;
1495               }
1496
1497             if (this_len == 0)
1498               {
1499                 pos -= len;
1500                 pos_byte -= len_byte;
1501                 break;
1502               }
1503
1504             DEC_BOTH (pos, pos_byte);
1505           }
1506
1507         n++;
1508       }
1509   else if (lim < pos)
1510     while (n < 0)
1511       {
1512         while (1)
1513           {
1514             /* Try matching at position POS.  */
1515             int this_pos = pos - len;
1516             int this_len = len;
1517             unsigned char *p = pat;
1518
1519             if (pos - len < lim)
1520               goto stop;
1521
1522             while (this_len > 0)
1523               {
1524                 int pat_ch = *p++;
1525                 int buf_ch = FETCH_BYTE (this_pos);
1526                 TRANSLATE (buf_ch, trt, buf_ch);
1527
1528                 if (buf_ch != pat_ch)
1529                   break;
1530                 this_len--;
1531                 this_pos++;
1532               }
1533
1534             if (this_len == 0)
1535               {
1536                 pos -= len;
1537                 break;
1538               }
1539
1540             pos--;
1541           }
1542
1543         n++;
1544       }
1545
1546  stop:
1547   if (n == 0)
1548     {
1549       if (forward)
1550         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1551       else
1552         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1553
1554       return pos;
1555     }
1556   else if (n > 0)
1557     return -n;
1558   else
1559     return n;
1560 }
1561 \f
1562 /* Do Boyer-Moore search N times for the string BASE_PAT,
1563    whose length is LEN/LEN_BYTE,
1564    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1565    DIRECTION says which direction we search in.
1566    TRT and INVERSE_TRT are translation tables.
1567    Characters in PAT are already translated by TRT.
1568
1569    This kind of search works if all the characters in BASE_PAT that
1570    have nontrivial translation are the same aside from the last byte.
1571    This makes it possible to translate just the last byte of a
1572    character, and do so after just a simple test of the context.
1573    CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1574
1575    If that criterion is not satisfied, do not call this function.  */
1576
1577 static int
1578 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1579              pos, pos_byte, lim, lim_byte, charset_base)
1580      int n;
1581      unsigned char *base_pat;
1582      int len, len_byte;
1583      Lisp_Object trt;
1584      Lisp_Object inverse_trt;
1585      int pos, pos_byte;
1586      int lim, lim_byte;
1587      int charset_base;
1588 {
1589   int direction = ((n > 0) ? 1 : -1);
1590   register int dirlen;
1591   int infinity, limit, stride_for_teases = 0;
1592   register int *BM_tab;
1593   int *BM_tab_base;
1594   register unsigned char *cursor, *p_limit;
1595   register int i, j;
1596   unsigned char *pat, *pat_end;
1597   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1598
1599   unsigned char simple_translate[0400];
1600   /* These are set to the preceding bytes of a byte to be translated
1601      if charset_base is nonzero.  As the maximum byte length of a
1602      multibyte character is 4, we have to check at most three previous
1603      bytes.  */
1604   int translate_prev_byte1 = 0;
1605   int translate_prev_byte2 = 0;
1606   int translate_prev_byte3 = 0;
1607
1608 #ifdef C_ALLOCA
1609   int BM_tab_space[0400];
1610   BM_tab = &BM_tab_space[0];
1611 #else
1612   BM_tab = (int *) alloca (0400 * sizeof (int));
1613 #endif
1614   /* The general approach is that we are going to maintain that we know */
1615   /* the first (closest to the present position, in whatever direction */
1616   /* we're searching) character that could possibly be the last */
1617   /* (furthest from present position) character of a valid match.  We */
1618   /* advance the state of our knowledge by looking at that character */
1619   /* and seeing whether it indeed matches the last character of the */
1620   /* pattern.  If it does, we take a closer look.  If it does not, we */
1621   /* move our pointer (to putative last characters) as far as is */
1622   /* logically possible.  This amount of movement, which I call a */
1623   /* stride, will be the length of the pattern if the actual character */
1624   /* appears nowhere in the pattern, otherwise it will be the distance */
1625   /* from the last occurrence of that character to the end of the */
1626   /* pattern. */
1627   /* As a coding trick, an enormous stride is coded into the table for */
1628   /* characters that match the last character.  This allows use of only */
1629   /* a single test, a test for having gone past the end of the */
1630   /* permissible match region, to test for both possible matches (when */
1631   /* the stride goes past the end immediately) and failure to */
1632   /* match (where you get nudged past the end one stride at a time). */
1633
1634   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1635   /* is determined only by the last character of the putative match. */
1636   /* If that character does not match, we will stride the proper */
1637   /* distance to propose a match that superimposes it on the last */
1638   /* instance of a character that matches it (per trt), or misses */
1639   /* it entirely if there is none. */
1640
1641   dirlen = len_byte * direction;
1642   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1643
1644   /* Record position after the end of the pattern.  */
1645   pat_end = base_pat + len_byte;
1646   /* BASE_PAT points to a character that we start scanning from.
1647      It is the first character in a forward search,
1648      the last character in a backward search.  */
1649   if (direction < 0)
1650     base_pat = pat_end - 1;
1651
1652   BM_tab_base = BM_tab;
1653   BM_tab += 0400;
1654   j = dirlen;           /* to get it in a register */
1655   /* A character that does not appear in the pattern induces a */
1656   /* stride equal to the pattern length. */
1657   while (BM_tab_base != BM_tab)
1658     {
1659       *--BM_tab = j;
1660       *--BM_tab = j;
1661       *--BM_tab = j;
1662       *--BM_tab = j;
1663     }
1664
1665   /* We use this for translation, instead of TRT itself.
1666      We fill this in to handle the characters that actually
1667      occur in the pattern.  Others don't matter anyway!  */
1668   bzero (simple_translate, sizeof simple_translate);
1669   for (i = 0; i < 0400; i++)
1670     simple_translate[i] = i;
1671
1672   if (charset_base)
1673     {
1674       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1675          byte following them are the target of translation.  */
1676       int sample_char = charset_base | 0x20;
1677       unsigned char str[MAX_MULTIBYTE_LENGTH];
1678       int len = CHAR_STRING (sample_char, str);
1679
1680       translate_prev_byte1 = str[len - 2];
1681       if (len > 2)
1682         {
1683           translate_prev_byte2 = str[len - 3];
1684           if (len > 3)
1685             translate_prev_byte3 = str[len - 4];
1686         }
1687     }
1688
1689   i = 0;
1690   while (i != infinity)
1691     {
1692       unsigned char *ptr = base_pat + i;
1693       i += direction;
1694       if (i == dirlen)
1695         i = infinity;
1696       if (! NILP (trt))
1697         {
1698           /* If the byte currently looking at is the last of a
1699              character to check case-equivalents, set CH to that
1700              character.  An ASCII character and a non-ASCII character
1701              matching with CHARSET_BASE are to be checked.  */
1702           int ch = -1;
1703
1704           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1705             ch = *ptr;
1706           else if (charset_base
1707                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1708             {
1709               unsigned char *charstart = ptr - 1;
1710
1711               while (! (CHAR_HEAD_P (*charstart)))
1712                 charstart--;
1713               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1714               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1715                 ch = -1;
1716             }
1717
1718           if (ch >= 0400)
1719             j = ((unsigned char) ch) | 0200;
1720           else
1721             j = *ptr;
1722
1723           if (i == infinity)
1724             stride_for_teases = BM_tab[j];
1725
1726           BM_tab[j] = dirlen - i;
1727           /* A translation table is accompanied by its inverse -- see */
1728           /* comment following downcase_table for details */
1729           if (ch >= 0)
1730             {
1731               int starting_ch = ch;
1732               int starting_j = j;
1733
1734               while (1)
1735                 {
1736                   TRANSLATE (ch, inverse_trt, ch);
1737                   if (ch >= 0400)
1738                     j = ((unsigned char) ch) | 0200;
1739                   else
1740                     j = (unsigned char) ch;
1741
1742                   /* For all the characters that map into CH,
1743                      set up simple_translate to map the last byte
1744                      into STARTING_J.  */
1745                   simple_translate[j] = starting_j;
1746                   if (ch == starting_ch)
1747                     break;
1748                   BM_tab[j] = dirlen - i;
1749                 }
1750             }
1751         }
1752       else
1753         {
1754           j = *ptr;
1755
1756           if (i == infinity)
1757             stride_for_teases = BM_tab[j];
1758           BM_tab[j] = dirlen - i;
1759         }
1760       /* stride_for_teases tells how much to stride if we get a */
1761       /* match on the far character but are subsequently */
1762       /* disappointed, by recording what the stride would have been */
1763       /* for that character if the last character had been */
1764       /* different. */
1765     }
1766   infinity = dirlen - infinity;
1767   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1768   /* loop invariant - POS_BYTE points at where last char (first
1769      char if reverse) of pattern would align in a possible match.  */
1770   while (n != 0)
1771     {
1772       int tail_end;
1773       unsigned char *tail_end_ptr;
1774
1775       /* It's been reported that some (broken) compiler thinks that
1776          Boolean expressions in an arithmetic context are unsigned.
1777          Using an explicit ?1:0 prevents this.  */
1778       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1779           < 0)
1780         return (n * (0 - direction));
1781       /* First we do the part we can by pointers (maybe nothing) */
1782       QUIT;
1783       pat = base_pat;
1784       limit = pos_byte - dirlen + direction;
1785       if (direction > 0)
1786         {
1787           limit = BUFFER_CEILING_OF (limit);
1788           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1789              can take on without hitting edge of buffer or the gap.  */
1790           limit = min (limit, pos_byte + 20000);
1791           limit = min (limit, lim_byte - 1);
1792         }
1793       else
1794         {
1795           limit = BUFFER_FLOOR_OF (limit);
1796           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1797              can take on without hitting edge of buffer or the gap.  */
1798           limit = max (limit, pos_byte - 20000);
1799           limit = max (limit, lim_byte);
1800         }
1801       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1802       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1803
1804       if ((limit - pos_byte) * direction > 20)
1805         {
1806           unsigned char *p2;
1807
1808           p_limit = BYTE_POS_ADDR (limit);
1809           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1810           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1811           while (1)             /* use one cursor setting as long as i can */
1812             {
1813               if (direction > 0) /* worth duplicating */
1814                 {
1815                   /* Use signed comparison if appropriate
1816                      to make cursor+infinity sure to be > p_limit.
1817                      Assuming that the buffer lies in a range of addresses
1818                      that are all "positive" (as ints) or all "negative",
1819                      either kind of comparison will work as long
1820                      as we don't step by infinity.  So pick the kind
1821                      that works when we do step by infinity.  */
1822                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1823                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1824                       cursor += BM_tab[*cursor];
1825                   else
1826                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1827                       cursor += BM_tab[*cursor];
1828                 }
1829               else
1830                 {
1831                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1832                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1833                       cursor += BM_tab[*cursor];
1834                   else
1835                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1836                       cursor += BM_tab[*cursor];
1837                 }
1838 /* If you are here, cursor is beyond the end of the searched region. */
1839 /* This can happen if you match on the far character of the pattern, */
1840 /* because the "stride" of that character is infinity, a number able */
1841 /* to throw you well beyond the end of the search.  It can also */
1842 /* happen if you fail to match within the permitted region and would */
1843 /* otherwise try a character beyond that region */
1844               if ((cursor - p_limit) * direction <= len_byte)
1845                 break;  /* a small overrun is genuine */
1846               cursor -= infinity; /* large overrun = hit */
1847               i = dirlen - direction;
1848               if (! NILP (trt))
1849                 {
1850                   while ((i -= direction) + direction != 0)
1851                     {
1852                       int ch;
1853                       cursor -= direction;
1854                       /* Translate only the last byte of a character.  */
1855                       if (! multibyte
1856                           || ((cursor == tail_end_ptr
1857                                || CHAR_HEAD_P (cursor[1]))
1858                               && (CHAR_HEAD_P (cursor[0])
1859                                   /* Check if this is the last byte of
1860                                      a translable character.  */
1861                                   || (translate_prev_byte1 == cursor[-1]
1862                                       && (CHAR_HEAD_P (translate_prev_byte1)
1863                                           || (translate_prev_byte2 == cursor[-2]
1864                                               && (CHAR_HEAD_P (translate_prev_byte2)
1865                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1866                         ch = simple_translate[*cursor];
1867                       else
1868                         ch = *cursor;
1869                       if (pat[i] != ch)
1870                         break;
1871                     }
1872                 }
1873               else
1874                 {
1875                   while ((i -= direction) + direction != 0)
1876                     {
1877                       cursor -= direction;
1878                       if (pat[i] != *cursor)
1879                         break;
1880                     }
1881                 }
1882               cursor += dirlen - i - direction; /* fix cursor */
1883               if (i + direction == 0)
1884                 {
1885                   int position;
1886
1887                   cursor -= direction;
1888
1889                   position = pos_byte + cursor - p2 + ((direction > 0)
1890                                                        ? 1 - len_byte : 0);
1891                   set_search_regs (position, len_byte);
1892
1893                   if ((n -= direction) != 0)
1894                     cursor += dirlen; /* to resume search */
1895                   else
1896                     return ((direction > 0)
1897                             ? search_regs.end[0] : search_regs.start[0]);
1898                 }
1899               else
1900                 cursor += stride_for_teases; /* <sigh> we lose -  */
1901             }
1902           pos_byte += cursor - p2;
1903         }
1904       else
1905         /* Now we'll pick up a clump that has to be done the hard */
1906         /* way because it covers a discontinuity */
1907         {
1908           limit = ((direction > 0)
1909                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1910                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1911           limit = ((direction > 0)
1912                    ? min (limit + len_byte, lim_byte - 1)
1913                    : max (limit - len_byte, lim_byte));
1914           /* LIMIT is now the last value POS_BYTE can have
1915              and still be valid for a possible match.  */
1916           while (1)
1917             {
1918               /* This loop can be coded for space rather than */
1919               /* speed because it will usually run only once. */
1920               /* (the reach is at most len + 21, and typically */
1921               /* does not exceed len) */
1922               while ((limit - pos_byte) * direction >= 0)
1923                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1924               /* now run the same tests to distinguish going off the */
1925               /* end, a match or a phony match. */
1926               if ((pos_byte - limit) * direction <= len_byte)
1927                 break;  /* ran off the end */
1928               /* Found what might be a match.
1929                  Set POS_BYTE back to last (first if reverse) pos.  */
1930               pos_byte -= infinity;
1931               i = dirlen - direction;
1932               while ((i -= direction) + direction != 0)
1933                 {
1934                   int ch;
1935                   unsigned char *ptr;
1936                   pos_byte -= direction;
1937                   ptr = BYTE_POS_ADDR (pos_byte);
1938                   /* Translate only the last byte of a character.  */
1939                   if (! multibyte
1940                       || ((ptr == tail_end_ptr
1941                            || CHAR_HEAD_P (ptr[1]))
1942                           && (CHAR_HEAD_P (ptr[0])
1943                               /* Check if this is the last byte of a
1944                                  translable character.  */
1945                               || (translate_prev_byte1 == ptr[-1]
1946                                   && (CHAR_HEAD_P (translate_prev_byte1)
1947                                       || (translate_prev_byte2 == ptr[-2]
1948                                           && (CHAR_HEAD_P (translate_prev_byte2)
1949                                               || translate_prev_byte3 == ptr[-3])))))))
1950                     ch = simple_translate[*ptr];
1951                   else
1952                     ch = *ptr;
1953                   if (pat[i] != ch)
1954                     break;
1955                 }
1956               /* Above loop has moved POS_BYTE part or all the way
1957                  back to the first pos (last pos if reverse).
1958                  Set it once again at the last (first if reverse) char.  */
1959               pos_byte += dirlen - i- direction;
1960               if (i + direction == 0)
1961                 {
1962                   int position;
1963                   pos_byte -= direction;
1964
1965                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1966
1967                   set_search_regs (position, len_byte);
1968
1969                   if ((n -= direction) != 0)
1970                     pos_byte += dirlen; /* to resume search */
1971                   else
1972                     return ((direction > 0)
1973                             ? search_regs.end[0] : search_regs.start[0]);
1974                 }
1975               else
1976                 pos_byte += stride_for_teases;
1977             }
1978           }
1979       /* We have done one clump.  Can we continue? */
1980       if ((lim_byte - pos_byte) * direction < 0)
1981         return ((0 - n) * direction);
1982     }
1983   return BYTE_TO_CHAR (pos_byte);
1984 }
1985
1986 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1987    for the overall match just found in the current buffer.
1988    Also clear out the match data for registers 1 and up.  */
1989
1990 static void
1991 set_search_regs (beg_byte, nbytes)
1992      int beg_byte, nbytes;
1993 {
1994   int i;
1995
1996   /* Make sure we have registers in which to store
1997      the match position.  */
1998   if (search_regs.num_regs == 0)
1999     {
2000       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2001       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2002       search_regs.num_regs = 2;
2003     }
2004
2005   /* Clear out the other registers.  */
2006   for (i = 1; i < search_regs.num_regs; i++)
2007     {
2008       search_regs.start[i] = -1;
2009       search_regs.end[i] = -1;
2010     }
2011
2012   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2013   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2014   XSETBUFFER (last_thing_searched, current_buffer);
2015 }
2016 \f
2017 /* Given a string of words separated by word delimiters,
2018   compute a regexp that matches those exact words
2019   separated by arbitrary punctuation.  */
2020
2021 static Lisp_Object
2022 wordify (string)
2023      Lisp_Object string;
2024 {
2025   register unsigned char *p, *o;
2026   register int i, i_byte, len, punct_count = 0, word_count = 0;
2027   Lisp_Object val;
2028   int prev_c = 0;
2029   int adjust;
2030
2031   CHECK_STRING (string);
2032   p = SDATA (string);
2033   len = SCHARS (string);
2034
2035   for (i = 0, i_byte = 0; i < len; )
2036     {
2037       int c;
2038
2039       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2040
2041       if (SYNTAX (c) != Sword)
2042         {
2043           punct_count++;
2044           if (i > 0 && SYNTAX (prev_c) == Sword)
2045             word_count++;
2046         }
2047
2048       prev_c = c;
2049     }
2050
2051   if (SYNTAX (prev_c) == Sword)
2052     word_count++;
2053   if (!word_count)
2054     return empty_string;
2055
2056   adjust = - punct_count + 5 * (word_count - 1) + 4;
2057   if (STRING_MULTIBYTE (string))
2058     val = make_uninit_multibyte_string (len + adjust,
2059                                         SBYTES (string)
2060                                         + adjust);
2061   else
2062     val = make_uninit_string (len + adjust);
2063
2064   o = SDATA (val);
2065   *o++ = '\\';
2066   *o++ = 'b';
2067   prev_c = 0;
2068
2069   for (i = 0, i_byte = 0; i < len; )
2070     {
2071       int c;
2072       int i_byte_orig = i_byte;
2073
2074       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2075
2076       if (SYNTAX (c) == Sword)
2077         {
2078           bcopy (SDATA (string) + i_byte_orig, o,
2079                  i_byte - i_byte_orig);
2080           o += i_byte - i_byte_orig;
2081         }
2082       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2083         {
2084           *o++ = '\\';
2085           *o++ = 'W';
2086           *o++ = '\\';
2087           *o++ = 'W';
2088           *o++ = '*';
2089         }
2090
2091       prev_c = c;
2092     }
2093
2094   *o++ = '\\';
2095   *o++ = 'b';
2096
2097   return val;
2098 }
2099 \f
2100 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2101        "MSearch backward: ",
2102        doc: /* Search backward from point for STRING.
2103 Set point to the beginning of the occurrence found, and return point.
2104 An optional second argument bounds the search; it is a buffer position.
2105 The match found must not extend before that position.
2106 Optional third argument, if t, means if fail just return nil (no error).
2107  If not nil and not t, position at limit of search and return nil.
2108 Optional fourth argument is repeat count--search for successive occurrences.
2109
2110 Search case-sensitivity is determined by the value of the variable
2111 `case-fold-search', which see.
2112
2113 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2114      (string, bound, noerror, count)
2115      Lisp_Object string, bound, noerror, count;
2116 {
2117   return search_command (string, bound, noerror, count, -1, 0, 0);
2118 }
2119
2120 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2121        doc: /* Search forward from point for STRING.
2122 Set point to the end of the occurrence found, and return point.
2123 An optional second argument bounds the search; it is a buffer position.
2124 The match found must not extend after that position.  nil is equivalent
2125   to (point-max).
2126 Optional third argument, if t, means if fail just return nil (no error).
2127   If not nil and not t, move to limit of search and return nil.
2128 Optional fourth argument is repeat count--search for successive occurrences.
2129
2130 Search case-sensitivity is determined by the value of the variable
2131 `case-fold-search', which see.
2132
2133 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2134      (string, bound, noerror, count)
2135      Lisp_Object string, bound, noerror, count;
2136 {
2137   return search_command (string, bound, noerror, count, 1, 0, 0);
2138 }
2139
2140 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2141        "sWord search backward: ",
2142        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2143 Set point to the beginning of the occurrence found, and return point.
2144 An optional second argument bounds the search; it is a buffer position.
2145 The match found must not extend before that position.
2146 Optional third argument, if t, means if fail just return nil (no error).
2147   If not nil and not t, move to limit of search and return nil.
2148 Optional fourth argument is repeat count--search for successive occurrences.  */)
2149      (string, bound, noerror, count)
2150      Lisp_Object string, bound, noerror, count;
2151 {
2152   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2153 }
2154
2155 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2156        "sWord search: ",
2157        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2158 Set point to the end of the occurrence found, and return point.
2159 An optional second argument bounds the search; it is a buffer position.
2160 The match found must not extend after that position.
2161 Optional third argument, if t, means if fail just return nil (no error).
2162   If not nil and not t, move to limit of search and return nil.
2163 Optional fourth argument is repeat count--search for successive occurrences.  */)
2164      (string, bound, noerror, count)
2165      Lisp_Object string, bound, noerror, count;
2166 {
2167   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2168 }
2169
2170 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2171        "sRE search backward: ",
2172        doc: /* Search backward from point for match for regular expression REGEXP.
2173 Set point to the beginning of the match, and return point.
2174 The match found is the one starting last in the buffer
2175 and yet ending before the origin of the search.
2176 An optional second argument bounds the search; it is a buffer position.
2177 The match found must start at or after that position.
2178 Optional third argument, if t, means if fail just return nil (no error).
2179   If not nil and not t, move to limit of search and return nil.
2180 Optional fourth argument is repeat count--search for successive occurrences.
2181 See also the functions `match-beginning', `match-end', `match-string',
2182 and `replace-match'.  */)
2183      (regexp, bound, noerror, count)
2184      Lisp_Object regexp, bound, noerror, count;
2185 {
2186   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2187 }
2188
2189 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2190        "sRE search: ",
2191        doc: /* Search forward from point for regular expression REGEXP.
2192 Set point to the end of the occurrence found, and return point.
2193 An optional second argument bounds the search; it is a buffer position.
2194 The match found must not extend after that position.
2195 Optional third argument, if t, means if fail just return nil (no error).
2196   If not nil and not t, move to limit of search and return nil.
2197 Optional fourth argument is repeat count--search for successive occurrences.
2198 See also the functions `match-beginning', `match-end', `match-string',
2199 and `replace-match'.  */)
2200      (regexp, bound, noerror, count)
2201      Lisp_Object regexp, bound, noerror, count;
2202 {
2203   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2204 }
2205
2206 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2207        "sPosix search backward: ",
2208        doc: /* Search backward from point for match for regular expression REGEXP.
2209 Find the longest match in accord with Posix regular expression rules.
2210 Set point to the beginning of the match, and return point.
2211 The match found is the one starting last in the buffer
2212 and yet ending before the origin of the search.
2213 An optional second argument bounds the search; it is a buffer position.
2214 The match found must start at or after that position.
2215 Optional third argument, if t, means if fail just return nil (no error).
2216   If not nil and not t, move to limit of search and return nil.
2217 Optional fourth argument is repeat count--search for successive occurrences.
2218 See also the functions `match-beginning', `match-end', `match-string',
2219 and `replace-match'.  */)
2220      (regexp, bound, noerror, count)
2221      Lisp_Object regexp, bound, noerror, count;
2222 {
2223   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2224 }
2225
2226 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2227        "sPosix search: ",
2228        doc: /* Search forward from point for regular expression REGEXP.
2229 Find the longest match in accord with Posix regular expression rules.
2230 Set point to the end of the occurrence found, and return point.
2231 An optional second argument bounds the search; it is a buffer position.
2232 The match found must not extend after that position.
2233 Optional third argument, if t, means if fail just return nil (no error).
2234   If not nil and not t, move to limit of search and return nil.
2235 Optional fourth argument is repeat count--search for successive occurrences.
2236 See also the functions `match-beginning', `match-end', `match-string',
2237 and `replace-match'.  */)
2238      (regexp, bound, noerror, count)
2239      Lisp_Object regexp, bound, noerror, count;
2240 {
2241   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2242 }
2243 \f
2244 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2245        doc: /* Replace text matched by last search with NEWTEXT.
2246 Leave point at the end of the replacement text.
2247
2248 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2249 Otherwise maybe capitalize the whole text, or maybe just word initials,
2250 based on the replaced text.
2251 If the replaced text has only capital letters
2252 and has at least one multiletter word, convert NEWTEXT to all caps.
2253 Otherwise if all words are capitalized in the replaced text,
2254 capitalize each word in NEWTEXT.
2255
2256 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2257 Otherwise treat `\\' as special:
2258   `\\&' in NEWTEXT means substitute original matched text.
2259   `\\N' means substitute what matched the Nth `\\(...\\)'.
2260        If Nth parens didn't match, substitute nothing.
2261   `\\\\' means insert one `\\'.
2262 Case conversion does not apply to these substitutions.
2263
2264 FIXEDCASE and LITERAL are optional arguments.
2265
2266 The optional fourth argument STRING can be a string to modify.
2267 This is meaningful when the previous match was done against STRING,
2268 using `string-match'.  When used this way, `replace-match'
2269 creates and returns a new string made by copying STRING and replacing
2270 the part of STRING that was matched.
2271
2272 The optional fifth argument SUBEXP specifies a subexpression;
2273 it says to replace just that subexpression with NEWTEXT,
2274 rather than replacing the entire matched text.
2275 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2276 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2277 NEWTEXT in place of subexp N.
2278 This is useful only after a regular expression search or match,
2279 since only regular expressions have distinguished subexpressions.  */)
2280      (newtext, fixedcase, literal, string, subexp)
2281      Lisp_Object newtext, fixedcase, literal, string, subexp;
2282 {
2283   enum { nochange, all_caps, cap_initial } case_action;
2284   register int pos, pos_byte;
2285   int some_multiletter_word;
2286   int some_lowercase;
2287   int some_uppercase;
2288   int some_nonuppercase_initial;
2289   register int c, prevc;
2290   int sub;
2291   int opoint, newpoint;
2292
2293   CHECK_STRING (newtext);
2294
2295   if (! NILP (string))
2296     CHECK_STRING (string);
2297
2298   case_action = nochange;       /* We tried an initialization */
2299                                 /* but some C compilers blew it */
2300
2301   if (search_regs.num_regs <= 0)
2302     error ("`replace-match' called before any match found");
2303
2304   if (NILP (subexp))
2305     sub = 0;
2306   else
2307     {
2308       CHECK_NUMBER (subexp);
2309       sub = XINT (subexp);
2310       if (sub < 0 || sub >= search_regs.num_regs)
2311         args_out_of_range (subexp, make_number (search_regs.num_regs));
2312     }
2313
2314   if (NILP (string))
2315     {
2316       if (search_regs.start[sub] < BEGV
2317           || search_regs.start[sub] > search_regs.end[sub]
2318           || search_regs.end[sub] > ZV)
2319         args_out_of_range (make_number (search_regs.start[sub]),
2320                            make_number (search_regs.end[sub]));
2321     }
2322   else
2323     {
2324       if (search_regs.start[sub] < 0
2325           || search_regs.start[sub] > search_regs.end[sub]
2326           || search_regs.end[sub] > SCHARS (string))
2327         args_out_of_range (make_number (search_regs.start[sub]),
2328                            make_number (search_regs.end[sub]));
2329     }
2330
2331   if (NILP (fixedcase))
2332     {
2333       /* Decide how to casify by examining the matched text. */
2334       int last;
2335
2336       pos = search_regs.start[sub];
2337       last = search_regs.end[sub];
2338
2339       if (NILP (string))
2340         pos_byte = CHAR_TO_BYTE (pos);
2341       else
2342         pos_byte = string_char_to_byte (string, pos);
2343
2344       prevc = '\n';
2345       case_action = all_caps;
2346
2347       /* some_multiletter_word is set nonzero if any original word
2348          is more than one letter long. */
2349       some_multiletter_word = 0;
2350       some_lowercase = 0;
2351       some_nonuppercase_initial = 0;
2352       some_uppercase = 0;
2353
2354       while (pos < last)
2355         {
2356           if (NILP (string))
2357             {
2358               c = FETCH_CHAR (pos_byte);
2359               INC_BOTH (pos, pos_byte);
2360             }
2361           else
2362             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2363
2364           if (LOWERCASEP (c))
2365             {
2366               /* Cannot be all caps if any original char is lower case */
2367
2368               some_lowercase = 1;
2369               if (SYNTAX (prevc) != Sword)
2370                 some_nonuppercase_initial = 1;
2371               else
2372                 some_multiletter_word = 1;
2373             }
2374           else if (UPPERCASEP (c))
2375             {
2376               some_uppercase = 1;
2377               if (SYNTAX (prevc) != Sword)
2378                 ;
2379               else
2380                 some_multiletter_word = 1;
2381             }
2382           else
2383             {
2384               /* If the initial is a caseless word constituent,
2385                  treat that like a lowercase initial.  */
2386               if (SYNTAX (prevc) != Sword)
2387                 some_nonuppercase_initial = 1;
2388             }
2389
2390           prevc = c;
2391         }
2392
2393       /* Convert to all caps if the old text is all caps
2394          and has at least one multiletter word.  */
2395       if (! some_lowercase && some_multiletter_word)
2396         case_action = all_caps;
2397       /* Capitalize each word, if the old text has all capitalized words.  */
2398       else if (!some_nonuppercase_initial && some_multiletter_word)
2399         case_action = cap_initial;
2400       else if (!some_nonuppercase_initial && some_uppercase)
2401         /* Should x -> yz, operating on X, give Yz or YZ?
2402            We'll assume the latter.  */
2403         case_action = all_caps;
2404       else
2405         case_action = nochange;
2406     }
2407
2408   /* Do replacement in a string.  */
2409   if (!NILP (string))
2410     {
2411       Lisp_Object before, after;
2412
2413       before = Fsubstring (string, make_number (0),
2414                            make_number (search_regs.start[sub]));
2415       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2416
2417       /* Substitute parts of the match into NEWTEXT
2418          if desired.  */
2419       if (NILP (literal))
2420         {
2421           int lastpos = 0;
2422           int lastpos_byte = 0;
2423           /* We build up the substituted string in ACCUM.  */
2424           Lisp_Object accum;
2425           Lisp_Object middle;
2426           int length = SBYTES (newtext);
2427
2428           accum = Qnil;
2429
2430           for (pos_byte = 0, pos = 0; pos_byte < length;)
2431             {
2432               int substart = -1;
2433               int subend = 0;
2434               int delbackslash = 0;
2435
2436               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2437
2438               if (c == '\\')
2439                 {
2440                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2441
2442                   if (c == '&')
2443                     {
2444                       substart = search_regs.start[sub];
2445                       subend = search_regs.end[sub];
2446                     }
2447                   else if (c >= '1' && c <= '9')
2448                     {
2449                       if (search_regs.start[c - '0'] >= 0
2450                           && c <= search_regs.num_regs + '0')
2451                         {
2452                           substart = search_regs.start[c - '0'];
2453                           subend = search_regs.end[c - '0'];
2454                         }
2455                       else
2456                         {
2457                           /* If that subexp did not match,
2458                              replace \\N with nothing.  */
2459                           substart = 0;
2460                           subend = 0;
2461                         }
2462                     }
2463                   else if (c == '\\')
2464                     delbackslash = 1;
2465                   else
2466                     error ("Invalid use of `\\' in replacement text");
2467                 }
2468               if (substart >= 0)
2469                 {
2470                   if (pos - 2 != lastpos)
2471                     middle = substring_both (newtext, lastpos,
2472                                              lastpos_byte,
2473                                              pos - 2, pos_byte - 2);
2474                   else
2475                     middle = Qnil;
2476                   accum = concat3 (accum, middle,
2477                                    Fsubstring (string,
2478                                                make_number (substart),
2479                                                make_number (subend)));
2480                   lastpos = pos;
2481                   lastpos_byte = pos_byte;
2482                 }
2483               else if (delbackslash)
2484                 {
2485                   middle = substring_both (newtext, lastpos,
2486                                            lastpos_byte,
2487                                            pos - 1, pos_byte - 1);
2488
2489                   accum = concat2 (accum, middle);
2490                   lastpos = pos;
2491                   lastpos_byte = pos_byte;
2492                 }
2493             }
2494
2495           if (pos != lastpos)
2496             middle = substring_both (newtext, lastpos,
2497                                      lastpos_byte,
2498                                      pos, pos_byte);
2499           else
2500             middle = Qnil;
2501
2502           newtext = concat2 (accum, middle);
2503         }
2504
2505       /* Do case substitution in NEWTEXT if desired.  */
2506       if (case_action == all_caps)
2507         newtext = Fupcase (newtext);
2508       else if (case_action == cap_initial)
2509         newtext = Fupcase_initials (newtext);
2510
2511       return concat3 (before, newtext, after);
2512     }
2513
2514   /* Record point, then move (quietly) to the start of the match.  */
2515   if (PT >= search_regs.end[sub])
2516     opoint = PT - ZV;
2517   else if (PT > search_regs.start[sub])
2518     opoint = search_regs.end[sub] - ZV;
2519   else
2520     opoint = PT;
2521
2522   /* If we want non-literal replacement,
2523      perform substitution on the replacement string.  */
2524   if (NILP (literal))
2525     {
2526       int length = SBYTES (newtext);
2527       unsigned char *substed;
2528       int substed_alloc_size, substed_len;
2529       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2530       int str_multibyte = STRING_MULTIBYTE (newtext);
2531       Lisp_Object rev_tbl;
2532       int really_changed = 0;
2533
2534       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2535                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2536                                           make_number (0))
2537                 : Qnil);
2538
2539       substed_alloc_size = length * 2 + 100;
2540       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2541       substed_len = 0;
2542
2543       /* Go thru NEWTEXT, producing the actual text to insert in
2544          SUBSTED while adjusting multibyteness to that of the current
2545          buffer.  */
2546
2547       for (pos_byte = 0, pos = 0; pos_byte < length;)
2548         {
2549           unsigned char str[MAX_MULTIBYTE_LENGTH];
2550           unsigned char *add_stuff = NULL;
2551           int add_len = 0;
2552           int idx = -1;
2553
2554           if (str_multibyte)
2555             {
2556               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2557               if (!buf_multibyte)
2558                 c = multibyte_char_to_unibyte (c, rev_tbl);
2559             }
2560           else
2561             {
2562               /* Note that we don't have to increment POS.  */
2563               c = SREF (newtext, pos_byte++);
2564               if (buf_multibyte)
2565                 c = unibyte_char_to_multibyte (c);
2566             }
2567
2568           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2569              or set IDX to a match index, which means put that part
2570              of the buffer text into SUBSTED.  */
2571
2572           if (c == '\\')
2573             {
2574               really_changed = 1;
2575
2576               if (str_multibyte)
2577                 {
2578                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2579                                                       pos, pos_byte);
2580                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2581                     c = multibyte_char_to_unibyte (c, rev_tbl);
2582                 }
2583               else
2584                 {
2585                   c = SREF (newtext, pos_byte++);
2586                   if (buf_multibyte)
2587                     c = unibyte_char_to_multibyte (c);
2588                 }
2589
2590               if (c == '&')
2591                 idx = sub;
2592               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2593                 {
2594                   if (search_regs.start[c - '0'] >= 1)
2595                     idx = c - '0';
2596                 }
2597               else if (c == '\\')
2598                 add_len = 1, add_stuff = "\\";
2599               else
2600                 {
2601                   xfree (substed);
2602                   error ("Invalid use of `\\' in replacement text");
2603                 }
2604             }
2605           else
2606             {
2607               add_len = CHAR_STRING (c, str);
2608               add_stuff = str;
2609             }
2610
2611           /* If we want to copy part of a previous match,
2612              set up ADD_STUFF and ADD_LEN to point to it.  */
2613           if (idx >= 0)
2614             {
2615               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2616               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2617               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2618                 move_gap (search_regs.start[idx]);
2619               add_stuff = BYTE_POS_ADDR (begbyte);
2620             }
2621
2622           /* Now the stuff we want to add to SUBSTED
2623              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2624
2625           /* Make sure SUBSTED is big enough.  */
2626           if (substed_len + add_len >= substed_alloc_size)
2627             {
2628               substed_alloc_size = substed_len + add_len + 500;
2629               substed = (unsigned char *) xrealloc (substed,
2630                                                     substed_alloc_size + 1);
2631             }
2632
2633           /* Now add to the end of SUBSTED.  */
2634           if (add_stuff)
2635             {
2636               bcopy (add_stuff, substed + substed_len, add_len);
2637               substed_len += add_len;
2638             }
2639         }
2640
2641       if (really_changed)
2642         {
2643           if (buf_multibyte)
2644             {
2645               int nchars = multibyte_chars_in_text (substed, substed_len);
2646
2647               newtext = make_multibyte_string (substed, nchars, substed_len);
2648             }
2649           else
2650             newtext = make_unibyte_string (substed, substed_len);
2651         }
2652       xfree (substed);
2653     }
2654
2655   /* Replace the old text with the new in the cleanest possible way.  */
2656   replace_range (search_regs.start[sub], search_regs.end[sub],
2657                  newtext, 1, 0, 1);
2658   newpoint = search_regs.start[sub] + SCHARS (newtext);
2659
2660   if (case_action == all_caps)
2661     Fupcase_region (make_number (search_regs.start[sub]),
2662                     make_number (newpoint));
2663   else if (case_action == cap_initial)
2664     Fupcase_initials_region (make_number (search_regs.start[sub]),
2665                              make_number (newpoint));
2666
2667   /* Adjust search data for this change.  */
2668   {
2669     int oldend = search_regs.end[sub];
2670     int oldstart = search_regs.start[sub];
2671     int change = newpoint - search_regs.end[sub];
2672     int i;
2673
2674     for (i = 0; i < search_regs.num_regs; i++)
2675       {
2676         if (search_regs.start[i] >= oldend)
2677           search_regs.start[i] += change;
2678         else if (search_regs.start[i] > oldstart)
2679           search_regs.start[i] = oldstart;
2680         if (search_regs.end[i] >= oldend)
2681           search_regs.end[i] += change;
2682         else if (search_regs.end[i] > oldstart)
2683           search_regs.end[i] = oldstart;
2684       }
2685   }
2686
2687   /* Put point back where it was in the text.  */
2688   if (opoint <= 0)
2689     TEMP_SET_PT (opoint + ZV);
2690   else
2691     TEMP_SET_PT (opoint);
2692
2693   /* Now move point "officially" to the start of the inserted replacement.  */
2694   move_if_not_intangible (newpoint);
2695
2696   return Qnil;
2697 }
2698 \f
2699 static Lisp_Object
2700 match_limit (num, beginningp)
2701      Lisp_Object num;
2702      int beginningp;
2703 {
2704   register int n;
2705
2706   CHECK_NUMBER (num);
2707   n = XINT (num);
2708   if (n < 0)
2709     args_out_of_range (num, make_number (0));
2710   if (search_regs.num_regs <= 0)
2711     error ("No match data, because no search succeeded");
2712   if (n >= search_regs.num_regs
2713       || search_regs.start[n] < 0)
2714     return Qnil;
2715   return (make_number ((beginningp) ? search_regs.start[n]
2716                                     : search_regs.end[n]));
2717 }
2718
2719 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2720        doc: /* Return position of start of text matched by last search.
2721 SUBEXP, a number, specifies which parenthesized expression in the last
2722   regexp.
2723 Value is nil if SUBEXPth pair didn't match, or there were less than
2724   SUBEXP pairs.
2725 Zero means the entire text matched by the whole regexp or whole string.  */)
2726      (subexp)
2727      Lisp_Object subexp;
2728 {
2729   return match_limit (subexp, 1);
2730 }
2731
2732 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2733        doc: /* Return position of end of text matched by last search.
2734 SUBEXP, a number, specifies which parenthesized expression in the last
2735   regexp.
2736 Value is nil if SUBEXPth pair didn't match, or there were less than
2737   SUBEXP pairs.
2738 Zero means the entire text matched by the whole regexp or whole string.  */)
2739      (subexp)
2740      Lisp_Object subexp;
2741 {
2742   return match_limit (subexp, 0);
2743 }
2744
2745 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2746        doc: /* Return a list containing all info on what the last search matched.
2747 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2748 All the elements are markers or nil (nil if the Nth pair didn't match)
2749 if the last match was on a buffer; integers or nil if a string was matched.
2750 Use `store-match-data' to reinstate the data in this list.
2751
2752 If INTEGERS (the optional first argument) is non-nil, always use
2753 integers \(rather than markers) to represent buffer positions.  In
2754 this case, and if the last match was in a buffer, the buffer will get
2755 stored as one additional element at the end of the list.
2756
2757 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2758 enough to hold all the values, and if INTEGERS is non-nil, no consing
2759 is done.
2760
2761 If optional third arg RESEAT is non-nil, any previous markers on the
2762 REUSE list will be modified to point to nowhere.
2763
2764 Return value is undefined if the last search failed.  */)
2765   (integers, reuse, reseat)
2766      Lisp_Object integers, reuse, reseat;
2767 {
2768   Lisp_Object tail, prev;
2769   Lisp_Object *data;
2770   int i, len;
2771
2772   if (!NILP (reseat))
2773     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2774       if (MARKERP (XCAR (tail)))
2775         {
2776           unchain_marker (XMARKER (XCAR (tail)));
2777           XSETCAR (tail, Qnil);
2778         }
2779
2780   if (NILP (last_thing_searched))
2781     return Qnil;
2782
2783   prev = Qnil;
2784
2785   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2786                                  * sizeof (Lisp_Object));
2787
2788   len = 0;
2789   for (i = 0; i < search_regs.num_regs; i++)
2790     {
2791       int start = search_regs.start[i];
2792       if (start >= 0)
2793         {
2794           if (EQ (last_thing_searched, Qt)
2795               || ! NILP (integers))
2796             {
2797               XSETFASTINT (data[2 * i], start);
2798               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2799             }
2800           else if (BUFFERP (last_thing_searched))
2801             {
2802               data[2 * i] = Fmake_marker ();
2803               Fset_marker (data[2 * i],
2804                            make_number (start),
2805                            last_thing_searched);
2806               data[2 * i + 1] = Fmake_marker ();
2807               Fset_marker (data[2 * i + 1],
2808                            make_number (search_regs.end[i]),
2809                            last_thing_searched);
2810             }
2811           else
2812             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2813             abort ();
2814
2815           len = 2 * i + 2;
2816         }
2817       else
2818         data[2 * i] = data[2 * i + 1] = Qnil;
2819     }
2820
2821   if (BUFFERP (last_thing_searched) && !NILP (integers))
2822     {
2823       data[len] = last_thing_searched;
2824       len++;
2825     }
2826
2827   /* If REUSE is not usable, cons up the values and return them.  */
2828   if (! CONSP (reuse))
2829     return Flist (len, data);
2830
2831   /* If REUSE is a list, store as many value elements as will fit
2832      into the elements of REUSE.  */
2833   for (i = 0, tail = reuse; CONSP (tail);
2834        i++, tail = XCDR (tail))
2835     {
2836       if (i < len)
2837         XSETCAR (tail, data[i]);
2838       else
2839         XSETCAR (tail, Qnil);
2840       prev = tail;
2841     }
2842
2843   /* If we couldn't fit all value elements into REUSE,
2844      cons up the rest of them and add them to the end of REUSE.  */
2845   if (i < len)
2846     XSETCDR (prev, Flist (len - i, data + i));
2847
2848   return reuse;
2849 }
2850
2851 /* Internal usage only:
2852    If RESEAT is `evaporate', put the markers back on the free list
2853    immediately.  No other references to the markers must exist in this case,
2854    so it is used only internally on the unwind stack and save-match-data from
2855    Lisp.  */
2856
2857 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2858        doc: /* Set internal data on last search match from elements of LIST.
2859 LIST should have been created by calling `match-data' previously.
2860
2861 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2862     (list, reseat)
2863      register Lisp_Object list, reseat;
2864 {
2865   register int i;
2866   register Lisp_Object marker;
2867
2868   if (running_asynch_code)
2869     save_search_regs ();
2870
2871   CHECK_LIST (list);
2872
2873   /* Unless we find a marker with a buffer or an explicit buffer
2874      in LIST, assume that this match data came from a string.  */
2875   last_thing_searched = Qt;
2876
2877   /* Allocate registers if they don't already exist.  */
2878   {
2879     int length = XFASTINT (Flength (list)) / 2;
2880
2881     if (length > search_regs.num_regs)
2882       {
2883         if (search_regs.num_regs == 0)
2884           {
2885             search_regs.start
2886               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2887             search_regs.end
2888               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2889           }
2890         else
2891           {
2892             search_regs.start
2893               = (regoff_t *) xrealloc (search_regs.start,
2894                                        length * sizeof (regoff_t));
2895             search_regs.end
2896               = (regoff_t *) xrealloc (search_regs.end,
2897                                        length * sizeof (regoff_t));
2898           }
2899
2900         for (i = search_regs.num_regs; i < length; i++)
2901           search_regs.start[i] = -1;
2902
2903         search_regs.num_regs = length;
2904       }
2905
2906     for (i = 0; CONSP (list); i++)
2907       {
2908         marker = XCAR (list);
2909         if (BUFFERP (marker))
2910           {
2911             last_thing_searched = marker;
2912             break;
2913           }
2914         if (i >= length)
2915           break;
2916         if (NILP (marker))
2917           {
2918             search_regs.start[i] = -1;
2919             list = XCDR (list);
2920           }
2921         else
2922           {
2923             int from;
2924             Lisp_Object m;
2925
2926             m = marker;
2927             if (MARKERP (marker))
2928               {
2929                 if (XMARKER (marker)->buffer == 0)
2930                   XSETFASTINT (marker, 0);
2931                 else
2932                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2933               }
2934
2935             CHECK_NUMBER_COERCE_MARKER (marker);
2936             from = XINT (marker);
2937
2938             if (!NILP (reseat) && MARKERP (m))
2939               {
2940                 if (EQ (reseat, Qevaporate))
2941                   free_marker (m);
2942                 else
2943                   unchain_marker (XMARKER (m));
2944                 XSETCAR (list, Qnil);
2945               }
2946
2947             if ((list = XCDR (list), !CONSP (list)))
2948               break;
2949
2950             m = marker = XCAR (list);
2951
2952             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2953               XSETFASTINT (marker, 0);
2954
2955             CHECK_NUMBER_COERCE_MARKER (marker);
2956             search_regs.start[i] = from;
2957             search_regs.end[i] = XINT (marker);
2958
2959             if (!NILP (reseat) && MARKERP (m))
2960               {
2961                 if (EQ (reseat, Qevaporate))
2962                   free_marker (m);
2963                 else
2964                   unchain_marker (XMARKER (m));
2965                 XSETCAR (list, Qnil);
2966               }
2967           }
2968         list = XCDR (list);
2969       }
2970
2971     for (; i < search_regs.num_regs; i++)
2972       search_regs.start[i] = -1;
2973   }
2974
2975   return Qnil;
2976 }
2977
2978 /* If non-zero the match data have been saved in saved_search_regs
2979    during the execution of a sentinel or filter. */
2980 static int search_regs_saved;
2981 static struct re_registers saved_search_regs;
2982 static Lisp_Object saved_last_thing_searched;
2983
2984 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2985    if asynchronous code (filter or sentinel) is running. */
2986 static void
2987 save_search_regs ()
2988 {
2989   if (!search_regs_saved)
2990     {
2991       saved_search_regs.num_regs = search_regs.num_regs;
2992       saved_search_regs.start = search_regs.start;
2993       saved_search_regs.end = search_regs.end;
2994       saved_last_thing_searched = last_thing_searched;
2995       last_thing_searched = Qnil;
2996       search_regs.num_regs = 0;
2997       search_regs.start = 0;
2998       search_regs.end = 0;
2999
3000       search_regs_saved = 1;
3001     }
3002 }
3003
3004 /* Called upon exit from filters and sentinels. */
3005 void
3006 restore_search_regs ()
3007 {
3008   if (search_regs_saved)
3009     {
3010       if (search_regs.num_regs > 0)
3011         {
3012           xfree (search_regs.start);
3013           xfree (search_regs.end);
3014         }
3015       search_regs.num_regs = saved_search_regs.num_regs;
3016       search_regs.start = saved_search_regs.start;
3017       search_regs.end = saved_search_regs.end;
3018       last_thing_searched = saved_last_thing_searched;
3019       saved_last_thing_searched = Qnil;
3020       search_regs_saved = 0;
3021     }
3022 }
3023
3024 static Lisp_Object
3025 unwind_set_match_data (list)
3026      Lisp_Object list;
3027 {
3028   /* It is safe to free (evaporate) the markers immediately.  */
3029   return Fset_match_data (list, Qevaporate);
3030 }
3031
3032 /* Called to unwind protect the match data.  */
3033 void
3034 record_unwind_save_match_data ()
3035 {
3036   record_unwind_protect (unwind_set_match_data,
3037                          Fmatch_data (Qnil, Qnil, Qnil));
3038 }
3039
3040 /* Quote a string to inactivate reg-expr chars */
3041
3042 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3043        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3044      (string)
3045      Lisp_Object string;
3046 {
3047   register unsigned char *in, *out, *end;
3048   register unsigned char *temp;
3049   int backslashes_added = 0;
3050
3051   CHECK_STRING (string);
3052
3053   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3054
3055   /* Now copy the data into the new string, inserting escapes. */
3056
3057   in = SDATA (string);
3058   end = in + SBYTES (string);
3059   out = temp;
3060
3061   for (; in != end; in++)
3062     {
3063       if (*in == '['
3064           || *in == '*' || *in == '.' || *in == '\\'
3065           || *in == '?' || *in == '+'
3066           || *in == '^' || *in == '$')
3067         *out++ = '\\', backslashes_added++;
3068       *out++ = *in;
3069     }
3070
3071   return make_specified_string (temp,
3072                                 SCHARS (string) + backslashes_added,
3073                                 out - temp,
3074                                 STRING_MULTIBYTE (string));
3075 }
3076 \f
3077 void
3078 syms_of_search ()
3079 {
3080   register int i;
3081
3082   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3083     {
3084       searchbufs[i].buf.allocated = 100;
3085       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3086       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3087       searchbufs[i].regexp = Qnil;
3088       searchbufs[i].whitespace_regexp = Qnil;
3089       staticpro (&searchbufs[i].regexp);
3090       staticpro (&searchbufs[i].whitespace_regexp);
3091       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3092     }
3093   searchbuf_head = &searchbufs[0];
3094
3095   Qsearch_failed = intern ("search-failed");
3096   staticpro (&Qsearch_failed);
3097   Qinvalid_regexp = intern ("invalid-regexp");
3098   staticpro (&Qinvalid_regexp);
3099
3100   Fput (Qsearch_failed, Qerror_conditions,
3101         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3102   Fput (Qsearch_failed, Qerror_message,
3103         build_string ("Search failed"));
3104
3105   Fput (Qinvalid_regexp, Qerror_conditions,
3106         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3107   Fput (Qinvalid_regexp, Qerror_message,
3108         build_string ("Invalid regexp"));
3109
3110   last_thing_searched = Qnil;
3111   staticpro (&last_thing_searched);
3112
3113   saved_last_thing_searched = Qnil;
3114   staticpro (&saved_last_thing_searched);
3115
3116   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3117       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3118 Some commands use this for user-specified regexps.
3119 Spaces that occur inside character classes or repetition operators
3120 or other such regexp constructs are not replaced with this.
3121 A value of nil (which is the normal value) means treat spaces literally.  */);
3122   Vsearch_spaces_regexp = Qnil;
3123
3124   defsubr (&Slooking_at);
3125   defsubr (&Sposix_looking_at);
3126   defsubr (&Sstring_match);
3127   defsubr (&Sposix_string_match);
3128   defsubr (&Ssearch_forward);
3129   defsubr (&Ssearch_backward);
3130   defsubr (&Sword_search_forward);
3131   defsubr (&Sword_search_backward);
3132   defsubr (&Sre_search_forward);
3133   defsubr (&Sre_search_backward);
3134   defsubr (&Sposix_search_forward);
3135   defsubr (&Sposix_search_backward);
3136   defsubr (&Sreplace_match);
3137   defsubr (&Smatch_beginning);
3138   defsubr (&Smatch_end);
3139   defsubr (&Smatch_data);
3140   defsubr (&Sset_match_data);
3141   defsubr (&Sregexp_quote);
3142 }
3143
3144 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3145    (do not change this comment) */