src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs ();
 101 static void save_search_regs ();
 102 static int simple_search ();
 103 static int boyer_moore ();
 104 static int search_buffer ();
 105 static void matcher_overflow () NO_RETURN;
 106
 107 static void
 108 matcher_overflow ()
 109 {
 110   error ("Stack overflow in regexp matcher");
 111 }
 112
 113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 114    PATTERN is the pattern to compile.
 115    CP is the place to put the result.
 116    TRANSLATE is a translation table for ignoring case, or nil for none.
 117    REGP is the structure that says where to store the "register"
 118    values that will result from matching this pattern.
 119    If it is 0, we should compile the pattern not to record any
 120    subexpression bounds.
 121    POSIX is nonzero if we want full backtracking (POSIX style)
 122    for this pattern.  0 means backtrack only enough to get a valid match.
 123    MULTIBYTE is nonzero if we want to handle multibyte characters in
 124    PATTERN.  0 means all multibyte characters are recognized just as
 125    sequences of binary data.
 126
 127    The behavior also depends on Vsearch_spaces_regexp.  */
 128
 129 static void
 130 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 131      struct regexp_cache *cp;
 132      Lisp_Object pattern;
 133      Lisp_Object translate;
 134      struct re_registers *regp;
 135      int posix;
 136      int multibyte;
 137 {
 138   unsigned char *raw_pattern;
 139   int raw_pattern_size;
 140   char *val;
 141   reg_syntax_t old;
 142
 143   /* MULTIBYTE says whether the text to be searched is multibyte.
 144      We must convert PATTERN to match that, or we will not really
 145      find things right.  */
 146
 147   if (multibyte == STRING_MULTIBYTE (pattern))
 148     {
 149       raw_pattern = (unsigned char *) SDATA (pattern);
 150       raw_pattern_size = SBYTES (pattern);
 151     }
 152   else if (multibyte)
 153     {
 154       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 155                                                   SCHARS (pattern));
 156       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 157       copy_text (SDATA (pattern), raw_pattern,
 158                  SCHARS (pattern), 0, 1);
 159     }
 160   else
 161     {
 162       /* Converting multibyte to single-byte.
 163
 164          ??? Perhaps this conversion should be done in a special way
 165          by subtracting nonascii-insert-offset from each non-ASCII char,
 166          so that only the multibyte chars which really correspond to
 167          the chosen single-byte character set can possibly match.  */
 168       raw_pattern_size = SCHARS (pattern);
 169       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 170       copy_text (SDATA (pattern), raw_pattern,
 171                  SBYTES (pattern), 1, 0);
 172     }
 173
 174   cp->regexp = Qnil;
 175   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 176   cp->posix = posix;
 177   cp->buf.multibyte = multibyte;
 178   cp->whitespace_regexp = Vsearch_spaces_regexp;
 179   /* rms: I think BLOCK_INPUT is not needed here any more,
 180      because regex.c defines malloc to call xmalloc.
 181      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 182      So let's turn it off.  */
 183   /*  BLOCK_INPUT;  */
 184   old = re_set_syntax (RE_SYNTAX_EMACS
 185                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 186
 187   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 188                             : SDATA (Vsearch_spaces_regexp));
 189
 190   val = (char *) re_compile_pattern ((char *)raw_pattern,
 191                                      raw_pattern_size, &cp->buf);
 192
 193   /* If the compiled pattern hard codes some of the contents of the
 194      syntax-table, it can only be reused with *this* syntax table.  */
 195   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 196
 197   re_set_whitespace_regexp (NULL);
 198
 199   re_set_syntax (old);
 200   /* UNBLOCK_INPUT;  */
 201   if (val)
 202     xsignal1 (Qinvalid_regexp, build_string (val));
 203
 204   cp->regexp = Fcopy_sequence (pattern);
 205 }
 206
 207 /* Shrink each compiled regexp buffer in the cache
 208    to the size actually used right now.
 209    This is called from garbage collection.  */
 210
 211 void
 212 shrink_regexp_cache ()
 213 {
 214   struct regexp_cache *cp;
 215
 216   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 217     {
 218       cp->buf.allocated = cp->buf.used;
 219       cp->buf.buffer
 220         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 221     }
 222 }
 223
 224 /* Clear the regexp cache w.r.t. a particular syntax table,
 225    because it was changed.
 226    There is no danger of memory leak here because re_compile_pattern
 227    automagically manages the memory in each re_pattern_buffer struct,
 228    based on its `allocated' and `buffer' values.  */
 229 void
 230 clear_regexp_cache ()
 231 {
 232   int i;
 233
 234   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 235     /* It's tempting to compare with the syntax-table we've actually changd,
 236        but it's not sufficient because char-table inheritance mewans that
 237        modifying one syntax-table can change others at the same time.  */
 238     if (!EQ (searchbufs[i].syntax_table, Qt))
 239       searchbufs[i].regexp = Qnil;
 240 }
 241
 242 /* Compile a regexp if necessary, but first check to see if there's one in
 243    the cache.
 244    PATTERN is the pattern to compile.
 245    TRANSLATE is a translation table for ignoring case, or nil for none.
 246    REGP is the structure that says where to store the "register"
 247    values that will result from matching this pattern.
 248    If it is 0, we should compile the pattern not to record any
 249    subexpression bounds.
 250    POSIX is nonzero if we want full backtracking (POSIX style)
 251    for this pattern.  0 means backtrack only enough to get a valid match.  */
 252
 253 struct re_pattern_buffer *
 254 compile_pattern (pattern, regp, translate, posix, multibyte)
 255      Lisp_Object pattern;
 256      struct re_registers *regp;
 257      Lisp_Object translate;
 258      int posix, multibyte;
 259 {
 260   struct regexp_cache *cp, **cpp;
 261
 262   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 263     {
 264       cp = *cpp;
 265       /* Entries are initialized to nil, and may be set to nil by
 266          compile_pattern_1 if the pattern isn't valid.  Don't apply
 267          string accessors in those cases.  However, compile_pattern_1
 268          is only applied to the cache entry we pick here to reuse.  So
 269          nil should never appear before a non-nil entry.  */
 270       if (NILP (cp->regexp))
 271         goto compile_it;
 272       if (SCHARS (cp->regexp) == SCHARS (pattern)
 273           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 274           && !NILP (Fstring_equal (cp->regexp, pattern))
 275           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 276           && cp->posix == posix
 277           && cp->buf.multibyte == multibyte
 278           && (EQ (cp->syntax_table, Qt)
 279               || EQ (cp->syntax_table, current_buffer->syntax_table))
 280           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 281         break;
 282
 283       /* If we're at the end of the cache, compile into the nil cell
 284          we found, or the last (least recently used) cell with a
 285          string value.  */
 286       if (cp->next == 0)
 287         {
 288         compile_it:
 289           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 290           break;
 291         }
 292     }
 293
 294   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 295      either because we found it in the cache or because we just compiled it.
 296      Move it to the front of the queue to mark it as most recently used.  */
 297   *cpp = cp->next;
 298   cp->next = searchbuf_head;
 299   searchbuf_head = cp;
 300
 301   /* Advise the searching functions about the space we have allocated
 302      for register data.  */
 303   if (regp)
 304     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 305
 306   return &cp->buf;
 307 }
 308
 309 \f
 310 static Lisp_Object
 311 looking_at_1 (string, posix)
 312      Lisp_Object string;
 313      int posix;
 314 {
 315   Lisp_Object val;
 316   unsigned char *p1, *p2;
 317   int s1, s2;
 318   register int i;
 319   struct re_pattern_buffer *bufp;
 320
 321   if (running_asynch_code)
 322     save_search_regs ();
 323
 324   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 325   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 326     = current_buffer->case_eqv_table;
 327
 328   CHECK_STRING (string);
 329   bufp = compile_pattern (string,
 330                           (NILP (Vinhibit_changing_match_data)
 331                            ? &search_regs : NULL),
 332                           (!NILP (current_buffer->case_fold_search)
 333                            ? current_buffer->case_canon_table : Qnil),
 334                           posix,
 335                           !NILP (current_buffer->enable_multibyte_characters));
 336
 337   immediate_quit = 1;
 338   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 339
 340   /* Get pointers and sizes of the two strings
 341      that make up the visible portion of the buffer. */
 342
 343   p1 = BEGV_ADDR;
 344   s1 = GPT_BYTE - BEGV_BYTE;
 345   p2 = GAP_END_ADDR;
 346   s2 = ZV_BYTE - GPT_BYTE;
 347   if (s1 < 0)
 348     {
 349       p2 = p1;
 350       s2 = ZV_BYTE - BEGV_BYTE;
 351       s1 = 0;
 352     }
 353   if (s2 < 0)
 354     {
 355       s1 = ZV_BYTE - BEGV_BYTE;
 356       s2 = 0;
 357     }
 358
 359   re_match_object = Qnil;
 360
 361   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 362                   PT_BYTE - BEGV_BYTE,
 363                   (NILP (Vinhibit_changing_match_data)
 364                    ? &search_regs : NULL),
 365                   ZV_BYTE - BEGV_BYTE);
 366   immediate_quit = 0;
 367
 368   if (i == -2)
 369     matcher_overflow ();
 370
 371   val = (0 <= i ? Qt : Qnil);
 372   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 373     for (i = 0; i < search_regs.num_regs; i++)
 374       if (search_regs.start[i] >= 0)
 375         {
 376           search_regs.start[i]
 377             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 378           search_regs.end[i]
 379             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 380         }
 381
 382   /* Set last_thing_searched only when match data is changed.  */
 383   if (NILP (Vinhibit_changing_match_data))
 384     XSETBUFFER (last_thing_searched, current_buffer);
 385
 386   return val;
 387 }
 388
 389 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 390        doc: /* Return t if text after point matches regular expression REGEXP.
 391 This function modifies the match data that `match-beginning',
 392 `match-end' and `match-data' access; save and restore the match
 393 data if you want to preserve them.  */)
 394      (regexp)
 395      Lisp_Object regexp;
 396 {
 397   return looking_at_1 (regexp, 0);
 398 }
 399
 400 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 401        doc: /* Return t if text after point matches regular expression REGEXP.
 402 Find the longest match, in accord with Posix regular expression rules.
 403 This function modifies the match data that `match-beginning',
 404 `match-end' and `match-data' access; save and restore the match
 405 data if you want to preserve them.  */)
 406      (regexp)
 407      Lisp_Object regexp;
 408 {
 409   return looking_at_1 (regexp, 1);
 410 }
 411 \f
 412 static Lisp_Object
 413 string_match_1 (regexp, string, start, posix)
 414      Lisp_Object regexp, string, start;
 415      int posix;
 416 {
 417   int val;
 418   struct re_pattern_buffer *bufp;
 419   int pos, pos_byte;
 420   int i;
 421
 422   if (running_asynch_code)
 423     save_search_regs ();
 424
 425   CHECK_STRING (regexp);
 426   CHECK_STRING (string);
 427
 428   if (NILP (start))
 429     pos = 0, pos_byte = 0;
 430   else
 431     {
 432       int len = SCHARS (string);
 433
 434       CHECK_NUMBER (start);
 435       pos = XINT (start);
 436       if (pos < 0 && -pos <= len)
 437         pos = len + pos;
 438       else if (0 > pos || pos > len)
 439         args_out_of_range (string, start);
 440       pos_byte = string_char_to_byte (string, pos);
 441     }
 442
 443   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 444   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 445     = current_buffer->case_eqv_table;
 446
 447   bufp = compile_pattern (regexp,
 448                           (NILP (Vinhibit_changing_match_data)
 449                            ? &search_regs : NULL),
 450                           (!NILP (current_buffer->case_fold_search)
 451                            ? current_buffer->case_canon_table : Qnil),
 452                           posix,
 453                           STRING_MULTIBYTE (string));
 454   immediate_quit = 1;
 455   re_match_object = string;
 456
 457   val = re_search (bufp, (char *) SDATA (string),
 458                    SBYTES (string), pos_byte,
 459                    SBYTES (string) - pos_byte,
 460                    (NILP (Vinhibit_changing_match_data)
 461                     ? &search_regs : NULL));
 462   immediate_quit = 0;
 463
 464   /* Set last_thing_searched only when match data is changed.  */
 465   if (NILP (Vinhibit_changing_match_data))
 466     last_thing_searched = Qt;
 467
 468   if (val == -2)
 469     matcher_overflow ();
 470   if (val < 0) return Qnil;
 471
 472   if (NILP (Vinhibit_changing_match_data))
 473     for (i = 0; i < search_regs.num_regs; i++)
 474       if (search_regs.start[i] >= 0)
 475         {
 476           search_regs.start[i]
 477             = string_byte_to_char (string, search_regs.start[i]);
 478           search_regs.end[i]
 479             = string_byte_to_char (string, search_regs.end[i]);
 480         }
 481
 482   return make_number (string_byte_to_char (string, val));
 483 }
 484
 485 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 486        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 487 Matching ignores case if `case-fold-search' is non-nil.
 488 If third arg START is non-nil, start search at that index in STRING.
 489 For index of first char beyond the match, do (match-end 0).
 490 `match-end' and `match-beginning' also give indices of substrings
 491 matched by parenthesis constructs in the pattern.
 492
 493 You can use the function `match-string' to extract the substrings
 494 matched by the parenthesis constructions in REGEXP. */)
 495      (regexp, string, start)
 496      Lisp_Object regexp, string, start;
 497 {
 498   return string_match_1 (regexp, string, start, 0);
 499 }
 500
 501 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 502        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 503 Find the longest match, in accord with Posix regular expression rules.
 504 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 505 If third arg START is non-nil, start search at that index in STRING.
 506 For index of first char beyond the match, do (match-end 0).
 507 `match-end' and `match-beginning' also give indices of substrings
 508 matched by parenthesis constructs in the pattern.  */)
 509      (regexp, string, start)
 510      Lisp_Object regexp, string, start;
 511 {
 512   return string_match_1 (regexp, string, start, 1);
 513 }
 514
 515 /* Match REGEXP against STRING, searching all of STRING,
 516    and return the index of the match, or negative on failure.
 517    This does not clobber the match data.  */
 518
 519 int
 520 fast_string_match (regexp, string)
 521      Lisp_Object regexp, string;
 522 {
 523   int val;
 524   struct re_pattern_buffer *bufp;
 525
 526   bufp = compile_pattern (regexp, 0, Qnil,
 527                           0, STRING_MULTIBYTE (string));
 528   immediate_quit = 1;
 529   re_match_object = string;
 530
 531   val = re_search (bufp, (char *) SDATA (string),
 532                    SBYTES (string), 0,
 533                    SBYTES (string), 0);
 534   immediate_quit = 0;
 535   return val;
 536 }
 537
 538 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 539    and return the index of the match, or negative on failure.
 540    This does not clobber the match data.
 541    We assume that STRING contains single-byte characters.  */
 542
 543 extern Lisp_Object Vascii_downcase_table;
 544
 545 int
 546 fast_c_string_match_ignore_case (regexp, string)
 547      Lisp_Object regexp;
 548      const char *string;
 549 {
 550   int val;
 551   struct re_pattern_buffer *bufp;
 552   int len = strlen (string);
 553
 554   regexp = string_make_unibyte (regexp);
 555   re_match_object = Qt;
 556   bufp = compile_pattern (regexp, 0,
 557                           Vascii_canon_table, 0,
 558                           0);
 559   immediate_quit = 1;
 560   val = re_search (bufp, string, len, 0, len, 0);
 561   immediate_quit = 0;
 562   return val;
 563 }
 564
 565 /* Like fast_string_match but ignore case.  */
 566
 567 int
 568 fast_string_match_ignore_case (regexp, string)
 569      Lisp_Object regexp, string;
 570 {
 571   int val;
 572   struct re_pattern_buffer *bufp;
 573
 574   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 575                           0, STRING_MULTIBYTE (string));
 576   immediate_quit = 1;
 577   re_match_object = string;
 578
 579   val = re_search (bufp, (char *) SDATA (string),
 580                    SBYTES (string), 0,
 581                    SBYTES (string), 0);
 582   immediate_quit = 0;
 583   return val;
 584 }
 585 \f
 586 /* The newline cache: remembering which sections of text have no newlines.  */
 587
 588 /* If the user has requested newline caching, make sure it's on.
 589    Otherwise, make sure it's off.
 590    This is our cheezy way of associating an action with the change of
 591    state of a buffer-local variable.  */
 592 static void
 593 newline_cache_on_off (buf)
 594      struct buffer *buf;
 595 {
 596   if (NILP (buf->cache_long_line_scans))
 597     {
 598       /* It should be off.  */
 599       if (buf->newline_cache)
 600         {
 601           free_region_cache (buf->newline_cache);
 602           buf->newline_cache = 0;
 603         }
 604     }
 605   else
 606     {
 607       /* It should be on.  */
 608       if (buf->newline_cache == 0)
 609         buf->newline_cache = new_region_cache ();
 610     }
 611 }
 612
 613 \f
 614 /* Search for COUNT instances of the character TARGET between START and END.
 615
 616    If COUNT is positive, search forwards; END must be >= START.
 617    If COUNT is negative, search backwards for the -COUNTth instance;
 618       END must be <= START.
 619    If COUNT is zero, do anything you please; run rogue, for all I care.
 620
 621    If END is zero, use BEGV or ZV instead, as appropriate for the
 622    direction indicated by COUNT.
 623
 624    If we find COUNT instances, set *SHORTAGE to zero, and return the
 625    position past the COUNTth match.  Note that for reverse motion
 626    this is not the same as the usual convention for Emacs motion commands.
 627
 628    If we don't find COUNT instances before reaching END, set *SHORTAGE
 629    to the number of TARGETs left unfound, and return END.
 630
 631    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 632    except when inside redisplay.  */
 633
 634 int
 635 scan_buffer (target, start, end, count, shortage, allow_quit)
 636      register int target;
 637      int start, end;
 638      int count;
 639      int *shortage;
 640      int allow_quit;
 641 {
 642   struct region_cache *newline_cache;
 643   int direction;
 644
 645   if (count > 0)
 646     {
 647       direction = 1;
 648       if (! end) end = ZV;
 649     }
 650   else
 651     {
 652       direction = -1;
 653       if (! end) end = BEGV;
 654     }
 655
 656   newline_cache_on_off (current_buffer);
 657   newline_cache = current_buffer->newline_cache;
 658
 659   if (shortage != 0)
 660     *shortage = 0;
 661
 662   immediate_quit = allow_quit;
 663
 664   if (count > 0)
 665     while (start != end)
 666       {
 667         /* Our innermost scanning loop is very simple; it doesn't know
 668            about gaps, buffer ends, or the newline cache.  ceiling is
 669            the position of the last character before the next such
 670            obstacle --- the last character the dumb search loop should
 671            examine.  */
 672         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 673         int start_byte = CHAR_TO_BYTE (start);
 674         int tem;
 675
 676         /* If we're looking for a newline, consult the newline cache
 677            to see where we can avoid some scanning.  */
 678         if (target == '\n' && newline_cache)
 679           {
 680             int next_change;
 681             immediate_quit = 0;
 682             while (region_cache_forward
 683                    (current_buffer, newline_cache, start_byte, &next_change))
 684               start_byte = next_change;
 685             immediate_quit = allow_quit;
 686
 687             /* START should never be after END.  */
 688             if (start_byte > ceiling_byte)
 689               start_byte = ceiling_byte;
 690
 691             /* Now the text after start is an unknown region, and
 692                next_change is the position of the next known region. */
 693             ceiling_byte = min (next_change - 1, ceiling_byte);
 694           }
 695
 696         /* The dumb loop can only scan text stored in contiguous
 697            bytes. BUFFER_CEILING_OF returns the last character
 698            position that is contiguous, so the ceiling is the
 699            position after that.  */
 700         tem = BUFFER_CEILING_OF (start_byte);
 701         ceiling_byte = min (tem, ceiling_byte);
 702
 703         {
 704           /* The termination address of the dumb loop.  */
 705           register unsigned char *ceiling_addr
 706             = BYTE_POS_ADDR (ceiling_byte) + 1;
 707           register unsigned char *cursor
 708             = BYTE_POS_ADDR (start_byte);
 709           unsigned char *base = cursor;
 710
 711           while (cursor < ceiling_addr)
 712             {
 713               unsigned char *scan_start = cursor;
 714
 715               /* The dumb loop.  */
 716               while (*cursor != target && ++cursor < ceiling_addr)
 717                 ;
 718
 719               /* If we're looking for newlines, cache the fact that
 720                  the region from start to cursor is free of them. */
 721               if (target == '\n' && newline_cache)
 722                 know_region_cache (current_buffer, newline_cache,
 723                                    start_byte + scan_start - base,
 724                                    start_byte + cursor - base);
 725
 726               /* Did we find the target character?  */
 727               if (cursor < ceiling_addr)
 728                 {
 729                   if (--count == 0)
 730                     {
 731                       immediate_quit = 0;
 732                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 733                     }
 734                   cursor++;
 735                 }
 736             }
 737
 738           start = BYTE_TO_CHAR (start_byte + cursor - base);
 739         }
 740       }
 741   else
 742     while (start > end)
 743       {
 744         /* The last character to check before the next obstacle.  */
 745         int ceiling_byte = CHAR_TO_BYTE (end);
 746         int start_byte = CHAR_TO_BYTE (start);
 747         int tem;
 748
 749         /* Consult the newline cache, if appropriate.  */
 750         if (target == '\n' && newline_cache)
 751           {
 752             int next_change;
 753             immediate_quit = 0;
 754             while (region_cache_backward
 755                    (current_buffer, newline_cache, start_byte, &next_change))
 756               start_byte = next_change;
 757             immediate_quit = allow_quit;
 758
 759             /* Start should never be at or before end.  */
 760             if (start_byte <= ceiling_byte)
 761               start_byte = ceiling_byte + 1;
 762
 763             /* Now the text before start is an unknown region, and
 764                next_change is the position of the next known region. */
 765             ceiling_byte = max (next_change, ceiling_byte);
 766           }
 767
 768         /* Stop scanning before the gap.  */
 769         tem = BUFFER_FLOOR_OF (start_byte - 1);
 770         ceiling_byte = max (tem, ceiling_byte);
 771
 772         {
 773           /* The termination address of the dumb loop.  */
 774           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 775           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 776           unsigned char *base = cursor;
 777
 778           while (cursor >= ceiling_addr)
 779             {
 780               unsigned char *scan_start = cursor;
 781
 782               while (*cursor != target && --cursor >= ceiling_addr)
 783                 ;
 784
 785               /* If we're looking for newlines, cache the fact that
 786                  the region from after the cursor to start is free of them.  */
 787               if (target == '\n' && newline_cache)
 788                 know_region_cache (current_buffer, newline_cache,
 789                                    start_byte + cursor - base,
 790                                    start_byte + scan_start - base);
 791
 792               /* Did we find the target character?  */
 793               if (cursor >= ceiling_addr)
 794                 {
 795                   if (++count >= 0)
 796                     {
 797                       immediate_quit = 0;
 798                       return BYTE_TO_CHAR (start_byte + cursor - base);
 799                     }
 800                   cursor--;
 801                 }
 802             }
 803
 804           start = BYTE_TO_CHAR (start_byte + cursor - base);
 805         }
 806       }
 807
 808   immediate_quit = 0;
 809   if (shortage != 0)
 810     *shortage = count * direction;
 811   return start;
 812 }
 813 \f
 814 /* Search for COUNT instances of a line boundary, which means either a
 815    newline or (if selective display enabled) a carriage return.
 816    Start at START.  If COUNT is negative, search backwards.
 817
 818    We report the resulting position by calling TEMP_SET_PT_BOTH.
 819
 820    If we find COUNT instances. we position after (always after,
 821    even if scanning backwards) the COUNTth match, and return 0.
 822
 823    If we don't find COUNT instances before reaching the end of the
 824    buffer (or the beginning, if scanning backwards), we return
 825    the number of line boundaries left unfound, and position at
 826    the limit we bumped up against.
 827
 828    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 829    except in special cases.  */
 830
 831 int
 832 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 833      int start, start_byte;
 834      int limit, limit_byte;
 835      register int count;
 836      int allow_quit;
 837 {
 838   int direction = ((count > 0) ? 1 : -1);
 839
 840   register unsigned char *cursor;
 841   unsigned char *base;
 842
 843   register int ceiling;
 844   register unsigned char *ceiling_addr;
 845
 846   int old_immediate_quit = immediate_quit;
 847
 848   /* The code that follows is like scan_buffer
 849      but checks for either newline or carriage return.  */
 850
 851   if (allow_quit)
 852     immediate_quit++;
 853
 854   start_byte = CHAR_TO_BYTE (start);
 855
 856   if (count > 0)
 857     {
 858       while (start_byte < limit_byte)
 859         {
 860           ceiling =  BUFFER_CEILING_OF (start_byte);
 861           ceiling = min (limit_byte - 1, ceiling);
 862           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 863           base = (cursor = BYTE_POS_ADDR (start_byte));
 864           while (1)
 865             {
 866               while (*cursor != '\n' && ++cursor != ceiling_addr)
 867                 ;
 868
 869               if (cursor != ceiling_addr)
 870                 {
 871                   if (--count == 0)
 872                     {
 873                       immediate_quit = old_immediate_quit;
 874                       start_byte = start_byte + cursor - base + 1;
 875                       start = BYTE_TO_CHAR (start_byte);
 876                       TEMP_SET_PT_BOTH (start, start_byte);
 877                       return 0;
 878                     }
 879                   else
 880                     if (++cursor == ceiling_addr)
 881                       break;
 882                 }
 883               else
 884                 break;
 885             }
 886           start_byte += cursor - base;
 887         }
 888     }
 889   else
 890     {
 891       while (start_byte > limit_byte)
 892         {
 893           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 894           ceiling = max (limit_byte, ceiling);
 895           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 896           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 897           while (1)
 898             {
 899               while (--cursor != ceiling_addr && *cursor != '\n')
 900                 ;
 901
 902               if (cursor != ceiling_addr)
 903                 {
 904                   if (++count == 0)
 905                     {
 906                       immediate_quit = old_immediate_quit;
 907                       /* Return the position AFTER the match we found.  */
 908                       start_byte = start_byte + cursor - base + 1;
 909                       start = BYTE_TO_CHAR (start_byte);
 910                       TEMP_SET_PT_BOTH (start, start_byte);
 911                       return 0;
 912                     }
 913                 }
 914               else
 915                 break;
 916             }
 917           /* Here we add 1 to compensate for the last decrement
 918              of CURSOR, which took it past the valid range.  */
 919           start_byte += cursor - base + 1;
 920         }
 921     }
 922
 923   TEMP_SET_PT_BOTH (limit, limit_byte);
 924   immediate_quit = old_immediate_quit;
 925
 926   return count * direction;
 927 }
 928
 929 int
 930 find_next_newline_no_quit (from, cnt)
 931      register int from, cnt;
 932 {
 933   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 934 }
 935
 936 /* Like find_next_newline, but returns position before the newline,
 937    not after, and only search up to TO.  This isn't just
 938    find_next_newline (...)-1, because you might hit TO.  */
 939
 940 int
 941 find_before_next_newline (from, to, cnt)
 942      int from, to, cnt;
 943 {
 944   int shortage;
 945   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 946
 947   if (shortage == 0)
 948     pos--;
 949
 950   return pos;
 951 }
 952 \f
 953 /* Subroutines of Lisp buffer search functions. */
 954
 955 static Lisp_Object
 956 search_command (string, bound, noerror, count, direction, RE, posix)
 957      Lisp_Object string, bound, noerror, count;
 958      int direction;
 959      int RE;
 960      int posix;
 961 {
 962   register int np;
 963   int lim, lim_byte;
 964   int n = direction;
 965
 966   if (!NILP (count))
 967     {
 968       CHECK_NUMBER (count);
 969       n *= XINT (count);
 970     }
 971
 972   CHECK_STRING (string);
 973   if (NILP (bound))
 974     {
 975       if (n > 0)
 976         lim = ZV, lim_byte = ZV_BYTE;
 977       else
 978         lim = BEGV, lim_byte = BEGV_BYTE;
 979     }
 980   else
 981     {
 982       CHECK_NUMBER_COERCE_MARKER (bound);
 983       lim = XINT (bound);
 984       if (n > 0 ? lim < PT : lim > PT)
 985         error ("Invalid search bound (wrong side of point)");
 986       if (lim > ZV)
 987         lim = ZV, lim_byte = ZV_BYTE;
 988       else if (lim < BEGV)
 989         lim = BEGV, lim_byte = BEGV_BYTE;
 990       else
 991         lim_byte = CHAR_TO_BYTE (lim);
 992     }
 993
 994   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 995   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 996     = current_buffer->case_eqv_table;
 997
 998   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 999                       (!NILP (current_buffer->case_fold_search)
1000                        ? current_buffer->case_canon_table
1001                        : Qnil),
1002                       (!NILP (current_buffer->case_fold_search)
1003                        ? current_buffer->case_eqv_table
1004                        : Qnil),
1005                       posix);
1006   if (np <= 0)
1007     {
1008       if (NILP (noerror))
1009         xsignal1 (Qsearch_failed, string);
1010
1011       if (!EQ (noerror, Qt))
1012         {
1013           if (lim < BEGV || lim > ZV)
1014             abort ();
1015           SET_PT_BOTH (lim, lim_byte);
1016           return Qnil;
1017 #if 0 /* This would be clean, but maybe programs depend on
1018          a value of nil here.  */
1019           np = lim;
1020 #endif
1021         }
1022       else
1023         return Qnil;
1024     }
1025
1026   if (np < BEGV || np > ZV)
1027     abort ();
1028
1029   SET_PT (np);
1030
1031   return make_number (np);
1032 }
1033 \f
1034 /* Return 1 if REGEXP it matches just one constant string.  */
1035
1036 static int
1037 trivial_regexp_p (regexp)
1038      Lisp_Object regexp;
1039 {
1040   int len = SBYTES (regexp);
1041   unsigned char *s = SDATA (regexp);
1042   while (--len >= 0)
1043     {
1044       switch (*s++)
1045         {
1046         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1047           return 0;
1048         case '\\':
1049           if (--len < 0)
1050             return 0;
1051           switch (*s++)
1052             {
1053             case '|': case '(': case ')': case '`': case '\'': case 'b':
1054             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1055             case 'S': case '=': case '{': case '}': case '_':
1056             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1057             case '1': case '2': case '3': case '4': case '5':
1058             case '6': case '7': case '8': case '9':
1059               return 0;
1060             }
1061         }
1062     }
1063   return 1;
1064 }
1065
1066 /* Search for the n'th occurrence of STRING in the current buffer,
1067    starting at position POS and stopping at position LIM,
1068    treating STRING as a literal string if RE is false or as
1069    a regular expression if RE is true.
1070
1071    If N is positive, searching is forward and LIM must be greater than POS.
1072    If N is negative, searching is backward and LIM must be less than POS.
1073
1074    Returns -x if x occurrences remain to be found (x > 0),
1075    or else the position at the beginning of the Nth occurrence
1076    (if searching backward) or the end (if searching forward).
1077
1078    POSIX is nonzero if we want full backtracking (POSIX style)
1079    for this pattern.  0 means backtrack only enough to get a valid match.  */
1080
1081 #define TRANSLATE(out, trt, d)                  \
1082 do                                              \
1083   {                                             \
1084     if (! NILP (trt))                           \
1085       {                                         \
1086         Lisp_Object temp;                       \
1087         temp = Faref (trt, make_number (d));    \
1088         if (INTEGERP (temp))                    \
1089           out = XINT (temp);                    \
1090         else                                    \
1091           out = d;                              \
1092       }                                         \
1093     else                                        \
1094       out = d;                                  \
1095   }                                             \
1096 while (0)
1097
1098 /* Only used in search_buffer, to record the end position of the match
1099    when searching regexps and SEARCH_REGS should not be changed
1100    (i.e. Vinhibit_changing_match_data is non-nil).  */
1101 static struct re_registers search_regs_1;
1102
1103 static int
1104 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1105                RE, trt, inverse_trt, posix)
1106      Lisp_Object string;
1107      int pos;
1108      int pos_byte;
1109      int lim;
1110      int lim_byte;
1111      int n;
1112      int RE;
1113      Lisp_Object trt;
1114      Lisp_Object inverse_trt;
1115      int posix;
1116 {
1117   int len = SCHARS (string);
1118   int len_byte = SBYTES (string);
1119   register int i;
1120
1121   if (running_asynch_code)
1122     save_search_regs ();
1123
1124   /* Searching 0 times means don't move.  */
1125   /* Null string is found at starting position.  */
1126   if (len == 0 || n == 0)
1127     {
1128       set_search_regs (pos_byte, 0);
1129       return pos;
1130     }
1131
1132   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1133     {
1134       unsigned char *p1, *p2;
1135       int s1, s2;
1136       struct re_pattern_buffer *bufp;
1137
1138       bufp = compile_pattern (string,
1139                               (NILP (Vinhibit_changing_match_data)
1140                                ? &search_regs : &search_regs_1),
1141                               trt, posix,
1142                               !NILP (current_buffer->enable_multibyte_characters));
1143
1144       immediate_quit = 1;       /* Quit immediately if user types ^G,
1145                                    because letting this function finish
1146                                    can take too long. */
1147       QUIT;                     /* Do a pending quit right away,
1148                                    to avoid paradoxical behavior */
1149       /* Get pointers and sizes of the two strings
1150          that make up the visible portion of the buffer. */
1151
1152       p1 = BEGV_ADDR;
1153       s1 = GPT_BYTE - BEGV_BYTE;
1154       p2 = GAP_END_ADDR;
1155       s2 = ZV_BYTE - GPT_BYTE;
1156       if (s1 < 0)
1157         {
1158           p2 = p1;
1159           s2 = ZV_BYTE - BEGV_BYTE;
1160           s1 = 0;
1161         }
1162       if (s2 < 0)
1163         {
1164           s1 = ZV_BYTE - BEGV_BYTE;
1165           s2 = 0;
1166         }
1167       re_match_object = Qnil;
1168
1169       while (n < 0)
1170         {
1171           int val;
1172           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1173                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1174                              (NILP (Vinhibit_changing_match_data)
1175                               ? &search_regs : &search_regs_1),
1176                              /* Don't allow match past current point */
1177                              pos_byte - BEGV_BYTE);
1178           if (val == -2)
1179             {
1180               matcher_overflow ();
1181             }
1182           if (val >= 0)
1183             {
1184               if (NILP (Vinhibit_changing_match_data))
1185                 {
1186                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1187                   for (i = 0; i < search_regs.num_regs; i++)
1188                     if (search_regs.start[i] >= 0)
1189                       {
1190                         search_regs.start[i]
1191                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1192                         search_regs.end[i]
1193                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1194                       }
1195                   XSETBUFFER (last_thing_searched, current_buffer);
1196                   /* Set pos to the new position. */
1197                   pos = search_regs.start[0];
1198                 }
1199               else
1200                 {
1201                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1202                   /* Set pos to the new position.  */
1203                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1204                 }
1205             }
1206           else
1207             {
1208               immediate_quit = 0;
1209               return (n);
1210             }
1211           n++;
1212         }
1213       while (n > 0)
1214         {
1215           int val;
1216           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1217                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1218                              (NILP (Vinhibit_changing_match_data)
1219                               ? &search_regs : &search_regs_1),
1220                              lim_byte - BEGV_BYTE);
1221           if (val == -2)
1222             {
1223               matcher_overflow ();
1224             }
1225           if (val >= 0)
1226             {
1227               if (NILP (Vinhibit_changing_match_data))
1228                 {
1229                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1230                   for (i = 0; i < search_regs.num_regs; i++)
1231                     if (search_regs.start[i] >= 0)
1232                       {
1233                         search_regs.start[i]
1234                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1235                         search_regs.end[i]
1236                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1237                       }
1238                   XSETBUFFER (last_thing_searched, current_buffer);
1239                   pos = search_regs.end[0];
1240                 }
1241               else
1242                 {
1243                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1244                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1245                 }
1246             }
1247           else
1248             {
1249               immediate_quit = 0;
1250               return (0 - n);
1251             }
1252           n--;
1253         }
1254       immediate_quit = 0;
1255       return (pos);
1256     }
1257   else                          /* non-RE case */
1258     {
1259       unsigned char *raw_pattern, *pat;
1260       int raw_pattern_size;
1261       int raw_pattern_size_byte;
1262       unsigned char *patbuf;
1263       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1264       unsigned char *base_pat;
1265       /* Set to positive if we find a non-ASCII char that need
1266          translation.  Otherwise set to zero later.  */
1267       int charset_base = -1;
1268       int boyer_moore_ok = 1;
1269
1270       /* MULTIBYTE says whether the text to be searched is multibyte.
1271          We must convert PATTERN to match that, or we will not really
1272          find things right.  */
1273
1274       if (multibyte == STRING_MULTIBYTE (string))
1275         {
1276           raw_pattern = (unsigned char *) SDATA (string);
1277           raw_pattern_size = SCHARS (string);
1278           raw_pattern_size_byte = SBYTES (string);
1279         }
1280       else if (multibyte)
1281         {
1282           raw_pattern_size = SCHARS (string);
1283           raw_pattern_size_byte
1284             = count_size_as_multibyte (SDATA (string),
1285                                        raw_pattern_size);
1286           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1287           copy_text (SDATA (string), raw_pattern,
1288                      SCHARS (string), 0, 1);
1289         }
1290       else
1291         {
1292           /* Converting multibyte to single-byte.
1293
1294              ??? Perhaps this conversion should be done in a special way
1295              by subtracting nonascii-insert-offset from each non-ASCII char,
1296              so that only the multibyte chars which really correspond to
1297              the chosen single-byte character set can possibly match.  */
1298           raw_pattern_size = SCHARS (string);
1299           raw_pattern_size_byte = SCHARS (string);
1300           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1301           copy_text (SDATA (string), raw_pattern,
1302                      SBYTES (string), 1, 0);
1303         }
1304
1305       /* Copy and optionally translate the pattern.  */
1306       len = raw_pattern_size;
1307       len_byte = raw_pattern_size_byte;
1308       patbuf = (unsigned char *) alloca (len_byte);
1309       pat = patbuf;
1310       base_pat = raw_pattern;
1311       if (multibyte)
1312         {
1313           /* Fill patbuf by translated characters in STRING while
1314              checking if we can use boyer-moore search.  If TRT is
1315              non-nil, we can use boyer-moore search only if TRT can be
1316              represented by the byte array of 256 elements.  For that,
1317              all non-ASCII case-equivalents of all case-senstive
1318              characters in STRING must belong to the same charset and
1319              row.  */
1320
1321           while (--len >= 0)
1322             {
1323               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1324               int c, translated, inverse;
1325               int in_charlen, charlen;
1326
1327               /* If we got here and the RE flag is set, it's because we're
1328                  dealing with a regexp known to be trivial, so the backslash
1329                  just quotes the next character.  */
1330               if (RE && *base_pat == '\\')
1331                 {
1332                   len--;
1333                   raw_pattern_size--;
1334                   len_byte--;
1335                   base_pat++;
1336                 }
1337
1338               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1339
1340               if (NILP (trt))
1341                 {
1342                   str = base_pat;
1343                   charlen = in_charlen;
1344                 }
1345               else
1346                 {
1347                   /* Translate the character.  */
1348                   TRANSLATE (translated, trt, c);
1349                   charlen = CHAR_STRING (translated, str_base);
1350                   str = str_base;
1351
1352                   /* Check if C has any other case-equivalents.  */
1353                   TRANSLATE (inverse, inverse_trt, c);
1354                   /* If so, check if we can use boyer-moore.  */
1355                   if (c != inverse && boyer_moore_ok)
1356                     {
1357                       /* Check if all equivalents belong to the same
1358                          charset & row.  Note that the check of C
1359                          itself is done by the last iteration.  Note
1360                          also that we don't have to check ASCII
1361                          characters because boyer-moore search can
1362                          always handle their translation.  */
1363                       while (1)
1364                         {
1365                           if (ASCII_BYTE_P (inverse))
1366                             {
1367                               if (charset_base > 0)
1368                                 {
1369                                   boyer_moore_ok = 0;
1370                                   break;
1371                                 }
1372                               charset_base = 0;
1373                             }
1374                           else if (SINGLE_BYTE_CHAR_P (inverse))
1375                             {
1376                               /* Boyer-moore search can't handle a
1377                                  translation of an eight-bit
1378                                  character.  */
1379                               boyer_moore_ok = 0;
1380                               break;
1381                             }
1382                           else if (charset_base < 0)
1383                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1384                           else if ((inverse & ~CHAR_FIELD3_MASK)
1385                                    != charset_base)
1386                             {
1387                               boyer_moore_ok = 0;
1388                               break;
1389                             }
1390                           if (c == inverse)
1391                             break;
1392                           TRANSLATE (inverse, inverse_trt, inverse);
1393                         }
1394                     }
1395                 }
1396               if (charset_base < 0)
1397                 charset_base = 0;
1398
1399               /* Store this character into the translated pattern.  */
1400               bcopy (str, pat, charlen);
1401               pat += charlen;
1402               base_pat += in_charlen;
1403               len_byte -= in_charlen;
1404             }
1405         }
1406       else
1407         {
1408           /* Unibyte buffer.  */
1409           charset_base = 0;
1410           while (--len >= 0)
1411             {
1412               int c, translated;
1413
1414               /* If we got here and the RE flag is set, it's because we're
1415                  dealing with a regexp known to be trivial, so the backslash
1416                  just quotes the next character.  */
1417               if (RE && *base_pat == '\\')
1418                 {
1419                   len--;
1420                   raw_pattern_size--;
1421                   base_pat++;
1422                 }
1423               c = *base_pat++;
1424               TRANSLATE (translated, trt, c);
1425               *pat++ = translated;
1426             }
1427         }
1428
1429       len_byte = pat - patbuf;
1430       len = raw_pattern_size;
1431       pat = base_pat = patbuf;
1432
1433       if (boyer_moore_ok)
1434         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1435                             pos, pos_byte, lim, lim_byte,
1436                             charset_base);
1437       else
1438         return simple_search (n, pat, len, len_byte, trt,
1439                               pos, pos_byte, lim, lim_byte);
1440     }
1441 }
1442 \f
1443 /* Do a simple string search N times for the string PAT,
1444    whose length is LEN/LEN_BYTE,
1445    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1446    TRT is the translation table.
1447
1448    Return the character position where the match is found.
1449    Otherwise, if M matches remained to be found, return -M.
1450
1451    This kind of search works regardless of what is in PAT and
1452    regardless of what is in TRT.  It is used in cases where
1453    boyer_moore cannot work.  */
1454
1455 static int
1456 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1457      int n;
1458      unsigned char *pat;
1459      int len, len_byte;
1460      Lisp_Object trt;
1461      int pos, pos_byte;
1462      int lim, lim_byte;
1463 {
1464   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1465   int forward = n > 0;
1466
1467   if (lim > pos && multibyte)
1468     while (n > 0)
1469       {
1470         while (1)
1471           {
1472             /* Try matching at position POS.  */
1473             int this_pos = pos;
1474             int this_pos_byte = pos_byte;
1475             int this_len = len;
1476             int this_len_byte = len_byte;
1477             unsigned char *p = pat;
1478             if (pos + len > lim)
1479               goto stop;
1480
1481             while (this_len > 0)
1482               {
1483                 int charlen, buf_charlen;
1484                 int pat_ch, buf_ch;
1485
1486                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1487                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488                                                  ZV_BYTE - this_pos_byte,
1489                                                  buf_charlen);
1490                 TRANSLATE (buf_ch, trt, buf_ch);
1491
1492                 if (buf_ch != pat_ch)
1493                   break;
1494
1495                 this_len_byte -= charlen;
1496                 this_len--;
1497                 p += charlen;
1498
1499                 this_pos_byte += buf_charlen;
1500                 this_pos++;
1501               }
1502
1503             if (this_len == 0)
1504               {
1505                 pos += len;
1506                 pos_byte += len_byte;
1507                 break;
1508               }
1509
1510             INC_BOTH (pos, pos_byte);
1511           }
1512
1513         n--;
1514       }
1515   else if (lim > pos)
1516     while (n > 0)
1517       {
1518         while (1)
1519           {
1520             /* Try matching at position POS.  */
1521             int this_pos = pos;
1522             int this_len = len;
1523             unsigned char *p = pat;
1524
1525             if (pos + len > lim)
1526               goto stop;
1527
1528             while (this_len > 0)
1529               {
1530                 int pat_ch = *p++;
1531                 int buf_ch = FETCH_BYTE (this_pos);
1532                 TRANSLATE (buf_ch, trt, buf_ch);
1533
1534                 if (buf_ch != pat_ch)
1535                   break;
1536
1537                 this_len--;
1538                 this_pos++;
1539               }
1540
1541             if (this_len == 0)
1542               {
1543                 pos += len;
1544                 break;
1545               }
1546
1547             pos++;
1548           }
1549
1550         n--;
1551       }
1552   /* Backwards search.  */
1553   else if (lim < pos && multibyte)
1554     while (n < 0)
1555       {
1556         while (1)
1557           {
1558             /* Try matching at position POS.  */
1559             int this_pos = pos - len;
1560             int this_pos_byte = pos_byte - len_byte;
1561             int this_len = len;
1562             int this_len_byte = len_byte;
1563             unsigned char *p = pat;
1564
1565             if (this_pos < lim || this_pos_byte < lim_byte)
1566               goto stop;
1567
1568             while (this_len > 0)
1569               {
1570                 int charlen, buf_charlen;
1571                 int pat_ch, buf_ch;
1572
1573                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1574                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1575                                                  ZV_BYTE - this_pos_byte,
1576                                                  buf_charlen);
1577                 TRANSLATE (buf_ch, trt, buf_ch);
1578
1579                 if (buf_ch != pat_ch)
1580                   break;
1581
1582                 this_len_byte -= charlen;
1583                 this_len--;
1584                 p += charlen;
1585                 this_pos_byte += buf_charlen;
1586                 this_pos++;
1587               }
1588
1589             if (this_len == 0)
1590               {
1591                 pos -= len;
1592                 pos_byte -= len_byte;
1593                 break;
1594               }
1595
1596             DEC_BOTH (pos, pos_byte);
1597           }
1598
1599         n++;
1600       }
1601   else if (lim < pos)
1602     while (n < 0)
1603       {
1604         while (1)
1605           {
1606             /* Try matching at position POS.  */
1607             int this_pos = pos - len;
1608             int this_len = len;
1609             unsigned char *p = pat;
1610
1611             if (pos - len < lim)
1612               goto stop;
1613
1614             while (this_len > 0)
1615               {
1616                 int pat_ch = *p++;
1617                 int buf_ch = FETCH_BYTE (this_pos);
1618                 TRANSLATE (buf_ch, trt, buf_ch);
1619
1620                 if (buf_ch != pat_ch)
1621                   break;
1622                 this_len--;
1623                 this_pos++;
1624               }
1625
1626             if (this_len == 0)
1627               {
1628                 pos -= len;
1629                 break;
1630               }
1631
1632             pos--;
1633           }
1634
1635         n++;
1636       }
1637
1638  stop:
1639   if (n == 0)
1640     {
1641       if (forward)
1642         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1643       else
1644         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1645
1646       return pos;
1647     }
1648   else if (n > 0)
1649     return -n;
1650   else
1651     return n;
1652 }
1653 \f
1654 /* Do Boyer-Moore search N times for the string BASE_PAT,
1655    whose length is LEN/LEN_BYTE,
1656    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1657    DIRECTION says which direction we search in.
1658    TRT and INVERSE_TRT are translation tables.
1659    Characters in PAT are already translated by TRT.
1660
1661    This kind of search works if all the characters in BASE_PAT that
1662    have nontrivial translation are the same aside from the last byte.
1663    This makes it possible to translate just the last byte of a
1664    character, and do so after just a simple test of the context.
1665    CHARSET_BASE is nonzero if there is such a non-ASCII character.
1666
1667    If that criterion is not satisfied, do not call this function.  */
1668
1669 static int
1670 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1671              pos, pos_byte, lim, lim_byte, charset_base)
1672      int n;
1673      unsigned char *base_pat;
1674      int len, len_byte;
1675      Lisp_Object trt;
1676      Lisp_Object inverse_trt;
1677      int pos, pos_byte;
1678      int lim, lim_byte;
1679      int charset_base;
1680 {
1681   int direction = ((n > 0) ? 1 : -1);
1682   register int dirlen;
1683   int infinity, limit, stride_for_teases = 0;
1684   register int *BM_tab;
1685   int *BM_tab_base;
1686   register unsigned char *cursor, *p_limit;
1687   register int i, j;
1688   unsigned char *pat, *pat_end;
1689   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1690
1691   unsigned char simple_translate[0400];
1692   /* These are set to the preceding bytes of a byte to be translated
1693      if charset_base is nonzero.  As the maximum byte length of a
1694      multibyte character is 4, we have to check at most three previous
1695      bytes.  */
1696   int translate_prev_byte1 = 0;
1697   int translate_prev_byte2 = 0;
1698   int translate_prev_byte3 = 0;
1699
1700   BM_tab = (int *) alloca (0400 * sizeof (int));
1701
1702   /* The general approach is that we are going to maintain that we know */
1703   /* the first (closest to the present position, in whatever direction */
1704   /* we're searching) character that could possibly be the last */
1705   /* (furthest from present position) character of a valid match.  We */
1706   /* advance the state of our knowledge by looking at that character */
1707   /* and seeing whether it indeed matches the last character of the */
1708   /* pattern.  If it does, we take a closer look.  If it does not, we */
1709   /* move our pointer (to putative last characters) as far as is */
1710   /* logically possible.  This amount of movement, which I call a */
1711   /* stride, will be the length of the pattern if the actual character */
1712   /* appears nowhere in the pattern, otherwise it will be the distance */
1713   /* from the last occurrence of that character to the end of the */
1714   /* pattern. */
1715   /* As a coding trick, an enormous stride is coded into the table for */
1716   /* characters that match the last character.  This allows use of only */
1717   /* a single test, a test for having gone past the end of the */
1718   /* permissible match region, to test for both possible matches (when */
1719   /* the stride goes past the end immediately) and failure to */
1720   /* match (where you get nudged past the end one stride at a time). */
1721
1722   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1723   /* is determined only by the last character of the putative match. */
1724   /* If that character does not match, we will stride the proper */
1725   /* distance to propose a match that superimposes it on the last */
1726   /* instance of a character that matches it (per trt), or misses */
1727   /* it entirely if there is none. */
1728
1729   dirlen = len_byte * direction;
1730   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1731
1732   /* Record position after the end of the pattern.  */
1733   pat_end = base_pat + len_byte;
1734   /* BASE_PAT points to a character that we start scanning from.
1735      It is the first character in a forward search,
1736      the last character in a backward search.  */
1737   if (direction < 0)
1738     base_pat = pat_end - 1;
1739
1740   BM_tab_base = BM_tab;
1741   BM_tab += 0400;
1742   j = dirlen;           /* to get it in a register */
1743   /* A character that does not appear in the pattern induces a */
1744   /* stride equal to the pattern length. */
1745   while (BM_tab_base != BM_tab)
1746     {
1747       *--BM_tab = j;
1748       *--BM_tab = j;
1749       *--BM_tab = j;
1750       *--BM_tab = j;
1751     }
1752
1753   /* We use this for translation, instead of TRT itself.
1754      We fill this in to handle the characters that actually
1755      occur in the pattern.  Others don't matter anyway!  */
1756   bzero (simple_translate, sizeof simple_translate);
1757   for (i = 0; i < 0400; i++)
1758     simple_translate[i] = i;
1759
1760   if (charset_base)
1761     {
1762       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1763          byte following them are the target of translation.  */
1764       int sample_char = charset_base | 0x20;
1765       unsigned char str[MAX_MULTIBYTE_LENGTH];
1766       int len = CHAR_STRING (sample_char, str);
1767
1768       translate_prev_byte1 = str[len - 2];
1769       if (len > 2)
1770         {
1771           translate_prev_byte2 = str[len - 3];
1772           if (len > 3)
1773             translate_prev_byte3 = str[len - 4];
1774         }
1775     }
1776
1777   i = 0;
1778   while (i != infinity)
1779     {
1780       unsigned char *ptr = base_pat + i;
1781       i += direction;
1782       if (i == dirlen)
1783         i = infinity;
1784       if (! NILP (trt))
1785         {
1786           /* If the byte currently looking at is the last of a
1787              character to check case-equivalents, set CH to that
1788              character.  An ASCII character and a non-ASCII character
1789              matching with CHARSET_BASE are to be checked.  */
1790           int ch = -1;
1791
1792           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1793             ch = *ptr;
1794           else if (charset_base
1795                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1796             {
1797               unsigned char *charstart = ptr - 1;
1798
1799               while (! (CHAR_HEAD_P (*charstart)))
1800                 charstart--;
1801               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1802               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1803                 ch = -1;
1804             }
1805
1806           if (ch >= 0400)
1807             j = ((unsigned char) ch) | 0200;
1808           else
1809             j = *ptr;
1810
1811           if (i == infinity)
1812             stride_for_teases = BM_tab[j];
1813
1814           BM_tab[j] = dirlen - i;
1815           /* A translation table is accompanied by its inverse -- see */
1816           /* comment following downcase_table for details */
1817           if (ch >= 0)
1818             {
1819               int starting_ch = ch;
1820               int starting_j = j;
1821
1822               while (1)
1823                 {
1824                   TRANSLATE (ch, inverse_trt, ch);
1825                   if (ch >= 0400)
1826                     j = ((unsigned char) ch) | 0200;
1827                   else
1828                     j = (unsigned char) ch;
1829
1830                   /* For all the characters that map into CH,
1831                      set up simple_translate to map the last byte
1832                      into STARTING_J.  */
1833                   simple_translate[j] = starting_j;
1834                   if (ch == starting_ch)
1835                     break;
1836                   BM_tab[j] = dirlen - i;
1837                 }
1838             }
1839         }
1840       else
1841         {
1842           j = *ptr;
1843
1844           if (i == infinity)
1845             stride_for_teases = BM_tab[j];
1846           BM_tab[j] = dirlen - i;
1847         }
1848       /* stride_for_teases tells how much to stride if we get a */
1849       /* match on the far character but are subsequently */
1850       /* disappointed, by recording what the stride would have been */
1851       /* for that character if the last character had been */
1852       /* different. */
1853     }
1854   infinity = dirlen - infinity;
1855   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1856   /* loop invariant - POS_BYTE points at where last char (first
1857      char if reverse) of pattern would align in a possible match.  */
1858   while (n != 0)
1859     {
1860       int tail_end;
1861       unsigned char *tail_end_ptr;
1862
1863       /* It's been reported that some (broken) compiler thinks that
1864          Boolean expressions in an arithmetic context are unsigned.
1865          Using an explicit ?1:0 prevents this.  */
1866       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1867           < 0)
1868         return (n * (0 - direction));
1869       /* First we do the part we can by pointers (maybe nothing) */
1870       QUIT;
1871       pat = base_pat;
1872       limit = pos_byte - dirlen + direction;
1873       if (direction > 0)
1874         {
1875           limit = BUFFER_CEILING_OF (limit);
1876           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1877              can take on without hitting edge of buffer or the gap.  */
1878           limit = min (limit, pos_byte + 20000);
1879           limit = min (limit, lim_byte - 1);
1880         }
1881       else
1882         {
1883           limit = BUFFER_FLOOR_OF (limit);
1884           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1885              can take on without hitting edge of buffer or the gap.  */
1886           limit = max (limit, pos_byte - 20000);
1887           limit = max (limit, lim_byte);
1888         }
1889       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1890       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1891
1892       if ((limit - pos_byte) * direction > 20)
1893         {
1894           unsigned char *p2;
1895
1896           p_limit = BYTE_POS_ADDR (limit);
1897           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1898           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1899           while (1)             /* use one cursor setting as long as i can */
1900             {
1901               if (direction > 0) /* worth duplicating */
1902                 {
1903                   /* Use signed comparison if appropriate
1904                      to make cursor+infinity sure to be > p_limit.
1905                      Assuming that the buffer lies in a range of addresses
1906                      that are all "positive" (as ints) or all "negative",
1907                      either kind of comparison will work as long
1908                      as we don't step by infinity.  So pick the kind
1909                      that works when we do step by infinity.  */
1910                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1911                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1912                       cursor += BM_tab[*cursor];
1913                   else
1914                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1915                       cursor += BM_tab[*cursor];
1916                 }
1917               else
1918                 {
1919                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1920                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1921                       cursor += BM_tab[*cursor];
1922                   else
1923                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1924                       cursor += BM_tab[*cursor];
1925                 }
1926 /* If you are here, cursor is beyond the end of the searched region. */
1927 /* This can happen if you match on the far character of the pattern, */
1928 /* because the "stride" of that character is infinity, a number able */
1929 /* to throw you well beyond the end of the search.  It can also */
1930 /* happen if you fail to match within the permitted region and would */
1931 /* otherwise try a character beyond that region */
1932               if ((cursor - p_limit) * direction <= len_byte)
1933                 break;  /* a small overrun is genuine */
1934               cursor -= infinity; /* large overrun = hit */
1935               i = dirlen - direction;
1936               if (! NILP (trt))
1937                 {
1938                   while ((i -= direction) + direction != 0)
1939                     {
1940                       int ch;
1941                       cursor -= direction;
1942                       /* Translate only the last byte of a character.  */
1943                       if (! multibyte
1944                           || ((cursor == tail_end_ptr
1945                                || CHAR_HEAD_P (cursor[1]))
1946                               && (CHAR_HEAD_P (cursor[0])
1947                                   /* Check if this is the last byte of
1948                                      a translable character.  */
1949                                   || (translate_prev_byte1 == cursor[-1]
1950                                       && (CHAR_HEAD_P (translate_prev_byte1)
1951                                           || (translate_prev_byte2 == cursor[-2]
1952                                               && (CHAR_HEAD_P (translate_prev_byte2)
1953                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1954                         ch = simple_translate[*cursor];
1955                       else
1956                         ch = *cursor;
1957                       if (pat[i] != ch)
1958                         break;
1959                     }
1960                 }
1961               else
1962                 {
1963                   while ((i -= direction) + direction != 0)
1964                     {
1965                       cursor -= direction;
1966                       if (pat[i] != *cursor)
1967                         break;
1968                     }
1969                 }
1970               cursor += dirlen - i - direction; /* fix cursor */
1971               if (i + direction == 0)
1972                 {
1973                   int position, start, end;
1974
1975                   cursor -= direction;
1976
1977                   position = pos_byte + cursor - p2 + ((direction > 0)
1978                                                        ? 1 - len_byte : 0);
1979                   set_search_regs (position, len_byte);
1980
1981                   if (NILP (Vinhibit_changing_match_data))
1982                     {
1983                       start = search_regs.start[0];
1984                       end = search_regs.end[0];
1985                     }
1986                   else
1987                     /* If Vinhibit_changing_match_data is non-nil,
1988                        search_regs will not be changed.  So let's
1989                        compute start and end here.  */
1990                     {
1991                       start = BYTE_TO_CHAR (position);
1992                       end = BYTE_TO_CHAR (position + len_byte);
1993                     }
1994
1995                   if ((n -= direction) != 0)
1996                     cursor += dirlen; /* to resume search */
1997                   else
1998                     return direction > 0 ? end : start;
1999                 }
2000               else
2001                 cursor += stride_for_teases; /* <sigh> we lose -  */
2002             }
2003           pos_byte += cursor - p2;
2004         }
2005       else
2006         /* Now we'll pick up a clump that has to be done the hard */
2007         /* way because it covers a discontinuity */
2008         {
2009           limit = ((direction > 0)
2010                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2011                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2012           limit = ((direction > 0)
2013                    ? min (limit + len_byte, lim_byte - 1)
2014                    : max (limit - len_byte, lim_byte));
2015           /* LIMIT is now the last value POS_BYTE can have
2016              and still be valid for a possible match.  */
2017           while (1)
2018             {
2019               /* This loop can be coded for space rather than */
2020               /* speed because it will usually run only once. */
2021               /* (the reach is at most len + 21, and typically */
2022               /* does not exceed len) */
2023               while ((limit - pos_byte) * direction >= 0)
2024                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2025               /* now run the same tests to distinguish going off the */
2026               /* end, a match or a phony match. */
2027               if ((pos_byte - limit) * direction <= len_byte)
2028                 break;  /* ran off the end */
2029               /* Found what might be a match.
2030                  Set POS_BYTE back to last (first if reverse) pos.  */
2031               pos_byte -= infinity;
2032               i = dirlen - direction;
2033               while ((i -= direction) + direction != 0)
2034                 {
2035                   int ch;
2036                   unsigned char *ptr;
2037                   pos_byte -= direction;
2038                   ptr = BYTE_POS_ADDR (pos_byte);
2039                   /* Translate only the last byte of a character.  */
2040                   if (! multibyte
2041                       || ((ptr == tail_end_ptr
2042                            || CHAR_HEAD_P (ptr[1]))
2043                           && (CHAR_HEAD_P (ptr[0])
2044                               /* Check if this is the last byte of a
2045                                  translable character.  */
2046                               || (translate_prev_byte1 == ptr[-1]
2047                                   && (CHAR_HEAD_P (translate_prev_byte1)
2048                                       || (translate_prev_byte2 == ptr[-2]
2049                                           && (CHAR_HEAD_P (translate_prev_byte2)
2050                                               || translate_prev_byte3 == ptr[-3])))))))
2051                     ch = simple_translate[*ptr];
2052                   else
2053                     ch = *ptr;
2054                   if (pat[i] != ch)
2055                     break;
2056                 }
2057               /* Above loop has moved POS_BYTE part or all the way
2058                  back to the first pos (last pos if reverse).
2059                  Set it once again at the last (first if reverse) char.  */
2060               pos_byte += dirlen - i- direction;
2061               if (i + direction == 0)
2062                 {
2063                   int position, start, end;
2064                   pos_byte -= direction;
2065
2066                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2067                   set_search_regs (position, len_byte);
2068
2069                   if (NILP (Vinhibit_changing_match_data))
2070                     {
2071                       start = search_regs.start[0];
2072                       end = search_regs.end[0];
2073                     }
2074                   else
2075                     /* If Vinhibit_changing_match_data is non-nil,
2076                        search_regs will not be changed.  So let's
2077                        compute start and end here.  */
2078                     {
2079                       start = BYTE_TO_CHAR (position);
2080                       end = BYTE_TO_CHAR (position + len_byte);
2081                     }
2082
2083                   if ((n -= direction) != 0)
2084                     pos_byte += dirlen; /* to resume search */
2085                   else
2086                     return direction > 0 ? end : start;
2087                 }
2088               else
2089                 pos_byte += stride_for_teases;
2090             }
2091           }
2092       /* We have done one clump.  Can we continue? */
2093       if ((lim_byte - pos_byte) * direction < 0)
2094         return ((0 - n) * direction);
2095     }
2096   return BYTE_TO_CHAR (pos_byte);
2097 }
2098
2099 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2100    for the overall match just found in the current buffer.
2101    Also clear out the match data for registers 1 and up.  */
2102
2103 static void
2104 set_search_regs (beg_byte, nbytes)
2105      int beg_byte, nbytes;
2106 {
2107   int i;
2108
2109   if (!NILP (Vinhibit_changing_match_data))
2110     return;
2111
2112   /* Make sure we have registers in which to store
2113      the match position.  */
2114   if (search_regs.num_regs == 0)
2115     {
2116       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2117       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2118       search_regs.num_regs = 2;
2119     }
2120
2121   /* Clear out the other registers.  */
2122   for (i = 1; i < search_regs.num_regs; i++)
2123     {
2124       search_regs.start[i] = -1;
2125       search_regs.end[i] = -1;
2126     }
2127
2128   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2129   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2130   XSETBUFFER (last_thing_searched, current_buffer);
2131 }
2132 \f
2133 /* Given a string of words separated by word delimiters,
2134   compute a regexp that matches those exact words
2135   separated by arbitrary punctuation.  */
2136
2137 static Lisp_Object
2138 wordify (string)
2139      Lisp_Object string;
2140 {
2141   register unsigned char *p, *o;
2142   register int i, i_byte, len, punct_count = 0, word_count = 0;
2143   Lisp_Object val;
2144   int prev_c = 0;
2145   int adjust;
2146
2147   CHECK_STRING (string);
2148   p = SDATA (string);
2149   len = SCHARS (string);
2150
2151   for (i = 0, i_byte = 0; i < len; )
2152     {
2153       int c;
2154
2155       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2156
2157       if (SYNTAX (c) != Sword)
2158         {
2159           punct_count++;
2160           if (i > 0 && SYNTAX (prev_c) == Sword)
2161             word_count++;
2162         }
2163
2164       prev_c = c;
2165     }
2166
2167   if (SYNTAX (prev_c) == Sword)
2168     word_count++;
2169   if (!word_count)
2170     return empty_unibyte_string;
2171
2172   adjust = - punct_count + 5 * (word_count - 1) + 4;
2173   if (STRING_MULTIBYTE (string))
2174     val = make_uninit_multibyte_string (len + adjust,
2175                                         SBYTES (string)
2176                                         + adjust);
2177   else
2178     val = make_uninit_string (len + adjust);
2179
2180   o = SDATA (val);
2181   *o++ = '\\';
2182   *o++ = 'b';
2183   prev_c = 0;
2184
2185   for (i = 0, i_byte = 0; i < len; )
2186     {
2187       int c;
2188       int i_byte_orig = i_byte;
2189
2190       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2191
2192       if (SYNTAX (c) == Sword)
2193         {
2194           bcopy (SDATA (string) + i_byte_orig, o,
2195                  i_byte - i_byte_orig);
2196           o += i_byte - i_byte_orig;
2197         }
2198       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2199         {
2200           *o++ = '\\';
2201           *o++ = 'W';
2202           *o++ = '\\';
2203           *o++ = 'W';
2204           *o++ = '*';
2205         }
2206
2207       prev_c = c;
2208     }
2209
2210   *o++ = '\\';
2211   *o++ = 'b';
2212
2213   return val;
2214 }
2215 \f
2216 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2217        "MSearch backward: ",
2218        doc: /* Search backward from point for STRING.
2219 Set point to the beginning of the occurrence found, and return point.
2220 An optional second argument bounds the search; it is a buffer position.
2221 The match found must not extend before that position.
2222 Optional third argument, if t, means if fail just return nil (no error).
2223  If not nil and not t, position at limit of search and return nil.
2224 Optional fourth argument is repeat count--search for successive occurrences.
2225
2226 Search case-sensitivity is determined by the value of the variable
2227 `case-fold-search', which see.
2228
2229 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2230      (string, bound, noerror, count)
2231      Lisp_Object string, bound, noerror, count;
2232 {
2233   return search_command (string, bound, noerror, count, -1, 0, 0);
2234 }
2235
2236 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2237        doc: /* Search forward from point for STRING.
2238 Set point to the end of the occurrence found, and return point.
2239 An optional second argument bounds the search; it is a buffer position.
2240 The match found must not extend after that position.  A value of nil is
2241   equivalent to (point-max).
2242 Optional third argument, if t, means if fail just return nil (no error).
2243   If not nil and not t, move to limit of search and return nil.
2244 Optional fourth argument is repeat count--search for successive occurrences.
2245
2246 Search case-sensitivity is determined by the value of the variable
2247 `case-fold-search', which see.
2248
2249 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2250      (string, bound, noerror, count)
2251      Lisp_Object string, bound, noerror, count;
2252 {
2253   return search_command (string, bound, noerror, count, 1, 0, 0);
2254 }
2255
2256 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2257        "sWord search backward: ",
2258        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2259 Set point to the beginning of the occurrence found, and return point.
2260 An optional second argument bounds the search; it is a buffer position.
2261 The match found must not extend before that position.
2262 Optional third argument, if t, means if fail just return nil (no error).
2263   If not nil and not t, move to limit of search and return nil.
2264 Optional fourth argument is repeat count--search for successive occurrences.  */)
2265      (string, bound, noerror, count)
2266      Lisp_Object string, bound, noerror, count;
2267 {
2268   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2269 }
2270
2271 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2272        "sWord search: ",
2273        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2274 Set point to the end of the occurrence found, and return point.
2275 An optional second argument bounds the search; it is a buffer position.
2276 The match found must not extend after that position.
2277 Optional third argument, if t, means if fail just return nil (no error).
2278   If not nil and not t, move to limit of search and return nil.
2279 Optional fourth argument is repeat count--search for successive occurrences.  */)
2280      (string, bound, noerror, count)
2281      Lisp_Object string, bound, noerror, count;
2282 {
2283   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2284 }
2285
2286 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2287        "sRE search backward: ",
2288        doc: /* Search backward from point for match for regular expression REGEXP.
2289 Set point to the beginning of the match, and return point.
2290 The match found is the one starting last in the buffer
2291 and yet ending before the origin of the search.
2292 An optional second argument bounds the search; it is a buffer position.
2293 The match found must start at or after that position.
2294 Optional third argument, if t, means if fail just return nil (no error).
2295   If not nil and not t, move to limit of search and return nil.
2296 Optional fourth argument is repeat count--search for successive occurrences.
2297 See also the functions `match-beginning', `match-end', `match-string',
2298 and `replace-match'.  */)
2299      (regexp, bound, noerror, count)
2300      Lisp_Object regexp, bound, noerror, count;
2301 {
2302   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2303 }
2304
2305 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2306        "sRE search: ",
2307        doc: /* Search forward from point for regular expression REGEXP.
2308 Set point to the end of the occurrence found, and return point.
2309 An optional second argument bounds the search; it is a buffer position.
2310 The match found must not extend after that position.
2311 Optional third argument, if t, means if fail just return nil (no error).
2312   If not nil and not t, move to limit of search and return nil.
2313 Optional fourth argument is repeat count--search for successive occurrences.
2314 See also the functions `match-beginning', `match-end', `match-string',
2315 and `replace-match'.  */)
2316      (regexp, bound, noerror, count)
2317      Lisp_Object regexp, bound, noerror, count;
2318 {
2319   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2320 }
2321
2322 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2323        "sPosix search backward: ",
2324        doc: /* Search backward from point for match for regular expression REGEXP.
2325 Find the longest match in accord with Posix regular expression rules.
2326 Set point to the beginning of the match, and return point.
2327 The match found is the one starting last in the buffer
2328 and yet ending before the origin of the search.
2329 An optional second argument bounds the search; it is a buffer position.
2330 The match found must start at or after that position.
2331 Optional third argument, if t, means if fail just return nil (no error).
2332   If not nil and not t, move to limit of search and return nil.
2333 Optional fourth argument is repeat count--search for successive occurrences.
2334 See also the functions `match-beginning', `match-end', `match-string',
2335 and `replace-match'.  */)
2336      (regexp, bound, noerror, count)
2337      Lisp_Object regexp, bound, noerror, count;
2338 {
2339   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2340 }
2341
2342 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2343        "sPosix search: ",
2344        doc: /* Search forward from point for regular expression REGEXP.
2345 Find the longest match in accord with Posix regular expression rules.
2346 Set point to the end of the occurrence found, and return point.
2347 An optional second argument bounds the search; it is a buffer position.
2348 The match found must not extend after that position.
2349 Optional third argument, if t, means if fail just return nil (no error).
2350   If not nil and not t, move to limit of search and return nil.
2351 Optional fourth argument is repeat count--search for successive occurrences.
2352 See also the functions `match-beginning', `match-end', `match-string',
2353 and `replace-match'.  */)
2354      (regexp, bound, noerror, count)
2355      Lisp_Object regexp, bound, noerror, count;
2356 {
2357   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2358 }
2359 \f
2360 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2361        doc: /* Replace text matched by last search with NEWTEXT.
2362 Leave point at the end of the replacement text.
2363
2364 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2365 Otherwise maybe capitalize the whole text, or maybe just word initials,
2366 based on the replaced text.
2367 If the replaced text has only capital letters
2368 and has at least one multiletter word, convert NEWTEXT to all caps.
2369 Otherwise if all words are capitalized in the replaced text,
2370 capitalize each word in NEWTEXT.
2371
2372 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2373 Otherwise treat `\\' as special:
2374   `\\&' in NEWTEXT means substitute original matched text.
2375   `\\N' means substitute what matched the Nth `\\(...\\)'.
2376        If Nth parens didn't match, substitute nothing.
2377   `\\\\' means insert one `\\'.
2378 Case conversion does not apply to these substitutions.
2379
2380 FIXEDCASE and LITERAL are optional arguments.
2381
2382 The optional fourth argument STRING can be a string to modify.
2383 This is meaningful when the previous match was done against STRING,
2384 using `string-match'.  When used this way, `replace-match'
2385 creates and returns a new string made by copying STRING and replacing
2386 the part of STRING that was matched.
2387
2388 The optional fifth argument SUBEXP specifies a subexpression;
2389 it says to replace just that subexpression with NEWTEXT,
2390 rather than replacing the entire matched text.
2391 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2392 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2393 NEWTEXT in place of subexp N.
2394 This is useful only after a regular expression search or match,
2395 since only regular expressions have distinguished subexpressions.  */)
2396      (newtext, fixedcase, literal, string, subexp)
2397      Lisp_Object newtext, fixedcase, literal, string, subexp;
2398 {
2399   enum { nochange, all_caps, cap_initial } case_action;
2400   register int pos, pos_byte;
2401   int some_multiletter_word;
2402   int some_lowercase;
2403   int some_uppercase;
2404   int some_nonuppercase_initial;
2405   register int c, prevc;
2406   int sub;
2407   int opoint, newpoint;
2408
2409   CHECK_STRING (newtext);
2410
2411   if (! NILP (string))
2412     CHECK_STRING (string);
2413
2414   case_action = nochange;       /* We tried an initialization */
2415                                 /* but some C compilers blew it */
2416
2417   if (search_regs.num_regs <= 0)
2418     error ("`replace-match' called before any match found");
2419
2420   if (NILP (subexp))
2421     sub = 0;
2422   else
2423     {
2424       CHECK_NUMBER (subexp);
2425       sub = XINT (subexp);
2426       if (sub < 0 || sub >= search_regs.num_regs)
2427         args_out_of_range (subexp, make_number (search_regs.num_regs));
2428     }
2429
2430   if (NILP (string))
2431     {
2432       if (search_regs.start[sub] < BEGV
2433           || search_regs.start[sub] > search_regs.end[sub]
2434           || search_regs.end[sub] > ZV)
2435         args_out_of_range (make_number (search_regs.start[sub]),
2436                            make_number (search_regs.end[sub]));
2437     }
2438   else
2439     {
2440       if (search_regs.start[sub] < 0
2441           || search_regs.start[sub] > search_regs.end[sub]
2442           || search_regs.end[sub] > SCHARS (string))
2443         args_out_of_range (make_number (search_regs.start[sub]),
2444                            make_number (search_regs.end[sub]));
2445     }
2446
2447   if (NILP (fixedcase))
2448     {
2449       /* Decide how to casify by examining the matched text. */
2450       int last;
2451
2452       pos = search_regs.start[sub];
2453       last = search_regs.end[sub];
2454
2455       if (NILP (string))
2456         pos_byte = CHAR_TO_BYTE (pos);
2457       else
2458         pos_byte = string_char_to_byte (string, pos);
2459
2460       prevc = '\n';
2461       case_action = all_caps;
2462
2463       /* some_multiletter_word is set nonzero if any original word
2464          is more than one letter long. */
2465       some_multiletter_word = 0;
2466       some_lowercase = 0;
2467       some_nonuppercase_initial = 0;
2468       some_uppercase = 0;
2469
2470       while (pos < last)
2471         {
2472           if (NILP (string))
2473             {
2474               c = FETCH_CHAR (pos_byte);
2475               INC_BOTH (pos, pos_byte);
2476             }
2477           else
2478             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2479
2480           if (LOWERCASEP (c))
2481             {
2482               /* Cannot be all caps if any original char is lower case */
2483
2484               some_lowercase = 1;
2485               if (SYNTAX (prevc) != Sword)
2486                 some_nonuppercase_initial = 1;
2487               else
2488                 some_multiletter_word = 1;
2489             }
2490           else if (UPPERCASEP (c))
2491             {
2492               some_uppercase = 1;
2493               if (SYNTAX (prevc) != Sword)
2494                 ;
2495               else
2496                 some_multiletter_word = 1;
2497             }
2498           else
2499             {
2500               /* If the initial is a caseless word constituent,
2501                  treat that like a lowercase initial.  */
2502               if (SYNTAX (prevc) != Sword)
2503                 some_nonuppercase_initial = 1;
2504             }
2505
2506           prevc = c;
2507         }
2508
2509       /* Convert to all caps if the old text is all caps
2510          and has at least one multiletter word.  */
2511       if (! some_lowercase && some_multiletter_word)
2512         case_action = all_caps;
2513       /* Capitalize each word, if the old text has all capitalized words.  */
2514       else if (!some_nonuppercase_initial && some_multiletter_word)
2515         case_action = cap_initial;
2516       else if (!some_nonuppercase_initial && some_uppercase)
2517         /* Should x -> yz, operating on X, give Yz or YZ?
2518            We'll assume the latter.  */
2519         case_action = all_caps;
2520       else
2521         case_action = nochange;
2522     }
2523
2524   /* Do replacement in a string.  */
2525   if (!NILP (string))
2526     {
2527       Lisp_Object before, after;
2528
2529       before = Fsubstring (string, make_number (0),
2530                            make_number (search_regs.start[sub]));
2531       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2532
2533       /* Substitute parts of the match into NEWTEXT
2534          if desired.  */
2535       if (NILP (literal))
2536         {
2537           int lastpos = 0;
2538           int lastpos_byte = 0;
2539           /* We build up the substituted string in ACCUM.  */
2540           Lisp_Object accum;
2541           Lisp_Object middle;
2542           int length = SBYTES (newtext);
2543
2544           accum = Qnil;
2545
2546           for (pos_byte = 0, pos = 0; pos_byte < length;)
2547             {
2548               int substart = -1;
2549               int subend = 0;
2550               int delbackslash = 0;
2551
2552               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2553
2554               if (c == '\\')
2555                 {
2556                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2557
2558                   if (c == '&')
2559                     {
2560                       substart = search_regs.start[sub];
2561                       subend = search_regs.end[sub];
2562                     }
2563                   else if (c >= '1' && c <= '9')
2564                     {
2565                       if (search_regs.start[c - '0'] >= 0
2566                           && c <= search_regs.num_regs + '0')
2567                         {
2568                           substart = search_regs.start[c - '0'];
2569                           subend = search_regs.end[c - '0'];
2570                         }
2571                       else
2572                         {
2573                           /* If that subexp did not match,
2574                              replace \\N with nothing.  */
2575                           substart = 0;
2576                           subend = 0;
2577                         }
2578                     }
2579                   else if (c == '\\')
2580                     delbackslash = 1;
2581                   else
2582                     error ("Invalid use of `\\' in replacement text");
2583                 }
2584               if (substart >= 0)
2585                 {
2586                   if (pos - 2 != lastpos)
2587                     middle = substring_both (newtext, lastpos,
2588                                              lastpos_byte,
2589                                              pos - 2, pos_byte - 2);
2590                   else
2591                     middle = Qnil;
2592                   accum = concat3 (accum, middle,
2593                                    Fsubstring (string,
2594                                                make_number (substart),
2595                                                make_number (subend)));
2596                   lastpos = pos;
2597                   lastpos_byte = pos_byte;
2598                 }
2599               else if (delbackslash)
2600                 {
2601                   middle = substring_both (newtext, lastpos,
2602                                            lastpos_byte,
2603                                            pos - 1, pos_byte - 1);
2604
2605                   accum = concat2 (accum, middle);
2606                   lastpos = pos;
2607                   lastpos_byte = pos_byte;
2608                 }
2609             }
2610
2611           if (pos != lastpos)
2612             middle = substring_both (newtext, lastpos,
2613                                      lastpos_byte,
2614                                      pos, pos_byte);
2615           else
2616             middle = Qnil;
2617
2618           newtext = concat2 (accum, middle);
2619         }
2620
2621       /* Do case substitution in NEWTEXT if desired.  */
2622       if (case_action == all_caps)
2623         newtext = Fupcase (newtext);
2624       else if (case_action == cap_initial)
2625         newtext = Fupcase_initials (newtext);
2626
2627       return concat3 (before, newtext, after);
2628     }
2629
2630   /* Record point, then move (quietly) to the start of the match.  */
2631   if (PT >= search_regs.end[sub])
2632     opoint = PT - ZV;
2633   else if (PT > search_regs.start[sub])
2634     opoint = search_regs.end[sub] - ZV;
2635   else
2636     opoint = PT;
2637
2638   /* If we want non-literal replacement,
2639      perform substitution on the replacement string.  */
2640   if (NILP (literal))
2641     {
2642       int length = SBYTES (newtext);
2643       unsigned char *substed;
2644       int substed_alloc_size, substed_len;
2645       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2646       int str_multibyte = STRING_MULTIBYTE (newtext);
2647       Lisp_Object rev_tbl;
2648       int really_changed = 0;
2649
2650       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2651                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2652                                           make_number (0))
2653                 : Qnil);
2654
2655       substed_alloc_size = length * 2 + 100;
2656       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2657       substed_len = 0;
2658
2659       /* Go thru NEWTEXT, producing the actual text to insert in
2660          SUBSTED while adjusting multibyteness to that of the current
2661          buffer.  */
2662
2663       for (pos_byte = 0, pos = 0; pos_byte < length;)
2664         {
2665           unsigned char str[MAX_MULTIBYTE_LENGTH];
2666           unsigned char *add_stuff = NULL;
2667           int add_len = 0;
2668           int idx = -1;
2669
2670           if (str_multibyte)
2671             {
2672               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2673               if (!buf_multibyte)
2674                 c = multibyte_char_to_unibyte (c, rev_tbl);
2675             }
2676           else
2677             {
2678               /* Note that we don't have to increment POS.  */
2679               c = SREF (newtext, pos_byte++);
2680               if (buf_multibyte)
2681                 c = unibyte_char_to_multibyte (c);
2682             }
2683
2684           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2685              or set IDX to a match index, which means put that part
2686              of the buffer text into SUBSTED.  */
2687
2688           if (c == '\\')
2689             {
2690               really_changed = 1;
2691
2692               if (str_multibyte)
2693                 {
2694                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2695                                                       pos, pos_byte);
2696                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2697                     c = multibyte_char_to_unibyte (c, rev_tbl);
2698                 }
2699               else
2700                 {
2701                   c = SREF (newtext, pos_byte++);
2702                   if (buf_multibyte)
2703                     c = unibyte_char_to_multibyte (c);
2704                 }
2705
2706               if (c == '&')
2707                 idx = sub;
2708               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2709                 {
2710                   if (search_regs.start[c - '0'] >= 1)
2711                     idx = c - '0';
2712                 }
2713               else if (c == '\\')
2714                 add_len = 1, add_stuff = "\\";
2715               else
2716                 {
2717                   xfree (substed);
2718                   error ("Invalid use of `\\' in replacement text");
2719                 }
2720             }
2721           else
2722             {
2723               add_len = CHAR_STRING (c, str);
2724               add_stuff = str;
2725             }
2726
2727           /* If we want to copy part of a previous match,
2728              set up ADD_STUFF and ADD_LEN to point to it.  */
2729           if (idx >= 0)
2730             {
2731               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2732               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2733               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2734                 move_gap (search_regs.start[idx]);
2735               add_stuff = BYTE_POS_ADDR (begbyte);
2736             }
2737
2738           /* Now the stuff we want to add to SUBSTED
2739              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2740
2741           /* Make sure SUBSTED is big enough.  */
2742           if (substed_len + add_len >= substed_alloc_size)
2743             {
2744               substed_alloc_size = substed_len + add_len + 500;
2745               substed = (unsigned char *) xrealloc (substed,
2746                                                     substed_alloc_size + 1);
2747             }
2748
2749           /* Now add to the end of SUBSTED.  */
2750           if (add_stuff)
2751             {
2752               bcopy (add_stuff, substed + substed_len, add_len);
2753               substed_len += add_len;
2754             }
2755         }
2756
2757       if (really_changed)
2758         {
2759           if (buf_multibyte)
2760             {
2761               int nchars = multibyte_chars_in_text (substed, substed_len);
2762
2763               newtext = make_multibyte_string (substed, nchars, substed_len);
2764             }
2765           else
2766             newtext = make_unibyte_string (substed, substed_len);
2767         }
2768       xfree (substed);
2769     }
2770
2771   /* Replace the old text with the new in the cleanest possible way.  */
2772   replace_range (search_regs.start[sub], search_regs.end[sub],
2773                  newtext, 1, 0, 1);
2774   newpoint = search_regs.start[sub] + SCHARS (newtext);
2775
2776   if (case_action == all_caps)
2777     Fupcase_region (make_number (search_regs.start[sub]),
2778                     make_number (newpoint));
2779   else if (case_action == cap_initial)
2780     Fupcase_initials_region (make_number (search_regs.start[sub]),
2781                              make_number (newpoint));
2782
2783   /* Adjust search data for this change.  */
2784   {
2785     int oldend = search_regs.end[sub];
2786     int oldstart = search_regs.start[sub];
2787     int change = newpoint - search_regs.end[sub];
2788     int i;
2789
2790     for (i = 0; i < search_regs.num_regs; i++)
2791       {
2792         if (search_regs.start[i] >= oldend)
2793           search_regs.start[i] += change;
2794         else if (search_regs.start[i] > oldstart)
2795           search_regs.start[i] = oldstart;
2796         if (search_regs.end[i] >= oldend)
2797           search_regs.end[i] += change;
2798         else if (search_regs.end[i] > oldstart)
2799           search_regs.end[i] = oldstart;
2800       }
2801   }
2802
2803   /* Put point back where it was in the text.  */
2804   if (opoint <= 0)
2805     TEMP_SET_PT (opoint + ZV);
2806   else
2807     TEMP_SET_PT (opoint);
2808
2809   /* Now move point "officially" to the start of the inserted replacement.  */
2810   move_if_not_intangible (newpoint);
2811
2812   return Qnil;
2813 }
2814 \f
2815 static Lisp_Object
2816 match_limit (num, beginningp)
2817      Lisp_Object num;
2818      int beginningp;
2819 {
2820   register int n;
2821
2822   CHECK_NUMBER (num);
2823   n = XINT (num);
2824   if (n < 0)
2825     args_out_of_range (num, make_number (0));
2826   if (search_regs.num_regs <= 0)
2827     error ("No match data, because no search succeeded");
2828   if (n >= search_regs.num_regs
2829       || search_regs.start[n] < 0)
2830     return Qnil;
2831   return (make_number ((beginningp) ? search_regs.start[n]
2832                                     : search_regs.end[n]));
2833 }
2834
2835 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2836        doc: /* Return position of start of text matched by last search.
2837 SUBEXP, a number, specifies which parenthesized expression in the last
2838   regexp.
2839 Value is nil if SUBEXPth pair didn't match, or there were less than
2840   SUBEXP pairs.
2841 Zero means the entire text matched by the whole regexp or whole string.  */)
2842      (subexp)
2843      Lisp_Object subexp;
2844 {
2845   return match_limit (subexp, 1);
2846 }
2847
2848 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2849        doc: /* Return position of end of text matched by last search.
2850 SUBEXP, a number, specifies which parenthesized expression in the last
2851   regexp.
2852 Value is nil if SUBEXPth pair didn't match, or there were less than
2853   SUBEXP pairs.
2854 Zero means the entire text matched by the whole regexp or whole string.  */)
2855      (subexp)
2856      Lisp_Object subexp;
2857 {
2858   return match_limit (subexp, 0);
2859 }
2860
2861 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2862        doc: /* Return a list containing all info on what the last search matched.
2863 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2864 All the elements are markers or nil (nil if the Nth pair didn't match)
2865 if the last match was on a buffer; integers or nil if a string was matched.
2866 Use `store-match-data' to reinstate the data in this list.
2867
2868 If INTEGERS (the optional first argument) is non-nil, always use
2869 integers \(rather than markers) to represent buffer positions.  In
2870 this case, and if the last match was in a buffer, the buffer will get
2871 stored as one additional element at the end of the list.
2872
2873 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2874 enough to hold all the values, and if INTEGERS is non-nil, no consing
2875 is done.
2876
2877 If optional third arg RESEAT is non-nil, any previous markers on the
2878 REUSE list will be modified to point to nowhere.
2879
2880 Return value is undefined if the last search failed.  */)
2881   (integers, reuse, reseat)
2882      Lisp_Object integers, reuse, reseat;
2883 {
2884   Lisp_Object tail, prev;
2885   Lisp_Object *data;
2886   int i, len;
2887
2888   if (!NILP (reseat))
2889     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2890       if (MARKERP (XCAR (tail)))
2891         {
2892           unchain_marker (XMARKER (XCAR (tail)));
2893           XSETCAR (tail, Qnil);
2894         }
2895
2896   if (NILP (last_thing_searched))
2897     return Qnil;
2898
2899   prev = Qnil;
2900
2901   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2902                                  * sizeof (Lisp_Object));
2903
2904   len = 0;
2905   for (i = 0; i < search_regs.num_regs; i++)
2906     {
2907       int start = search_regs.start[i];
2908       if (start >= 0)
2909         {
2910           if (EQ (last_thing_searched, Qt)
2911               || ! NILP (integers))
2912             {
2913               XSETFASTINT (data[2 * i], start);
2914               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2915             }
2916           else if (BUFFERP (last_thing_searched))
2917             {
2918               data[2 * i] = Fmake_marker ();
2919               Fset_marker (data[2 * i],
2920                            make_number (start),
2921                            last_thing_searched);
2922               data[2 * i + 1] = Fmake_marker ();
2923               Fset_marker (data[2 * i + 1],
2924                            make_number (search_regs.end[i]),
2925                            last_thing_searched);
2926             }
2927           else
2928             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2929             abort ();
2930
2931           len = 2 * i + 2;
2932         }
2933       else
2934         data[2 * i] = data[2 * i + 1] = Qnil;
2935     }
2936
2937   if (BUFFERP (last_thing_searched) && !NILP (integers))
2938     {
2939       data[len] = last_thing_searched;
2940       len++;
2941     }
2942
2943   /* If REUSE is not usable, cons up the values and return them.  */
2944   if (! CONSP (reuse))
2945     return Flist (len, data);
2946
2947   /* If REUSE is a list, store as many value elements as will fit
2948      into the elements of REUSE.  */
2949   for (i = 0, tail = reuse; CONSP (tail);
2950        i++, tail = XCDR (tail))
2951     {
2952       if (i < len)
2953         XSETCAR (tail, data[i]);
2954       else
2955         XSETCAR (tail, Qnil);
2956       prev = tail;
2957     }
2958
2959   /* If we couldn't fit all value elements into REUSE,
2960      cons up the rest of them and add them to the end of REUSE.  */
2961   if (i < len)
2962     XSETCDR (prev, Flist (len - i, data + i));
2963
2964   return reuse;
2965 }
2966
2967 /* Internal usage only:
2968    If RESEAT is `evaporate', put the markers back on the free list
2969    immediately.  No other references to the markers must exist in this case,
2970    so it is used only internally on the unwind stack and save-match-data from
2971    Lisp.  */
2972
2973 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2974        doc: /* Set internal data on last search match from elements of LIST.
2975 LIST should have been created by calling `match-data' previously.
2976
2977 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2978     (list, reseat)
2979      register Lisp_Object list, reseat;
2980 {
2981   register int i;
2982   register Lisp_Object marker;
2983
2984   if (running_asynch_code)
2985     save_search_regs ();
2986
2987   CHECK_LIST (list);
2988
2989   /* Unless we find a marker with a buffer or an explicit buffer
2990      in LIST, assume that this match data came from a string.  */
2991   last_thing_searched = Qt;
2992
2993   /* Allocate registers if they don't already exist.  */
2994   {
2995     int length = XFASTINT (Flength (list)) / 2;
2996
2997     if (length > search_regs.num_regs)
2998       {
2999         if (search_regs.num_regs == 0)
3000           {
3001             search_regs.start
3002               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3003             search_regs.end
3004               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3005           }
3006         else
3007           {
3008             search_regs.start
3009               = (regoff_t *) xrealloc (search_regs.start,
3010                                        length * sizeof (regoff_t));
3011             search_regs.end
3012               = (regoff_t *) xrealloc (search_regs.end,
3013                                        length * sizeof (regoff_t));
3014           }
3015
3016         for (i = search_regs.num_regs; i < length; i++)
3017           search_regs.start[i] = -1;
3018
3019         search_regs.num_regs = length;
3020       }
3021
3022     for (i = 0; CONSP (list); i++)
3023       {
3024         marker = XCAR (list);
3025         if (BUFFERP (marker))
3026           {
3027             last_thing_searched = marker;
3028             break;
3029           }
3030         if (i >= length)
3031           break;
3032         if (NILP (marker))
3033           {
3034             search_regs.start[i] = -1;
3035             list = XCDR (list);
3036           }
3037         else
3038           {
3039             int from;
3040             Lisp_Object m;
3041
3042             m = marker;
3043             if (MARKERP (marker))
3044               {
3045                 if (XMARKER (marker)->buffer == 0)
3046                   XSETFASTINT (marker, 0);
3047                 else
3048                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3049               }
3050
3051             CHECK_NUMBER_COERCE_MARKER (marker);
3052             from = XINT (marker);
3053
3054             if (!NILP (reseat) && MARKERP (m))
3055               {
3056                 if (EQ (reseat, Qevaporate))
3057                   free_marker (m);
3058                 else
3059                   unchain_marker (XMARKER (m));
3060                 XSETCAR (list, Qnil);
3061               }
3062
3063             if ((list = XCDR (list), !CONSP (list)))
3064               break;
3065
3066             m = marker = XCAR (list);
3067
3068             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3069               XSETFASTINT (marker, 0);
3070
3071             CHECK_NUMBER_COERCE_MARKER (marker);
3072             search_regs.start[i] = from;
3073             search_regs.end[i] = XINT (marker);
3074
3075             if (!NILP (reseat) && MARKERP (m))
3076               {
3077                 if (EQ (reseat, Qevaporate))
3078                   free_marker (m);
3079                 else
3080                   unchain_marker (XMARKER (m));
3081                 XSETCAR (list, Qnil);
3082               }
3083           }
3084         list = XCDR (list);
3085       }
3086
3087     for (; i < search_regs.num_regs; i++)
3088       search_regs.start[i] = -1;
3089   }
3090
3091   return Qnil;
3092 }
3093
3094 /* If non-zero the match data have been saved in saved_search_regs
3095    during the execution of a sentinel or filter. */
3096 static int search_regs_saved;
3097 static struct re_registers saved_search_regs;
3098 static Lisp_Object saved_last_thing_searched;
3099
3100 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3101    if asynchronous code (filter or sentinel) is running. */
3102 static void
3103 save_search_regs ()
3104 {
3105   if (!search_regs_saved)
3106     {
3107       saved_search_regs.num_regs = search_regs.num_regs;
3108       saved_search_regs.start = search_regs.start;
3109       saved_search_regs.end = search_regs.end;
3110       saved_last_thing_searched = last_thing_searched;
3111       last_thing_searched = Qnil;
3112       search_regs.num_regs = 0;
3113       search_regs.start = 0;
3114       search_regs.end = 0;
3115
3116       search_regs_saved = 1;
3117     }
3118 }
3119
3120 /* Called upon exit from filters and sentinels. */
3121 void
3122 restore_search_regs ()
3123 {
3124   if (search_regs_saved)
3125     {
3126       if (search_regs.num_regs > 0)
3127         {
3128           xfree (search_regs.start);
3129           xfree (search_regs.end);
3130         }
3131       search_regs.num_regs = saved_search_regs.num_regs;
3132       search_regs.start = saved_search_regs.start;
3133       search_regs.end = saved_search_regs.end;
3134       last_thing_searched = saved_last_thing_searched;
3135       saved_last_thing_searched = Qnil;
3136       search_regs_saved = 0;
3137     }
3138 }
3139
3140 static Lisp_Object
3141 unwind_set_match_data (list)
3142      Lisp_Object list;
3143 {
3144   /* It is safe to free (evaporate) the markers immediately.  */
3145   return Fset_match_data (list, Qevaporate);
3146 }
3147
3148 /* Called to unwind protect the match data.  */
3149 void
3150 record_unwind_save_match_data ()
3151 {
3152   record_unwind_protect (unwind_set_match_data,
3153                          Fmatch_data (Qnil, Qnil, Qnil));
3154 }
3155
3156 /* Quote a string to inactivate reg-expr chars */
3157
3158 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3159        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3160      (string)
3161      Lisp_Object string;
3162 {
3163   register unsigned char *in, *out, *end;
3164   register unsigned char *temp;
3165   int backslashes_added = 0;
3166
3167   CHECK_STRING (string);
3168
3169   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3170
3171   /* Now copy the data into the new string, inserting escapes. */
3172
3173   in = SDATA (string);
3174   end = in + SBYTES (string);
3175   out = temp;
3176
3177   for (; in != end; in++)
3178     {
3179       if (*in == '['
3180           || *in == '*' || *in == '.' || *in == '\\'
3181           || *in == '?' || *in == '+'
3182           || *in == '^' || *in == '$')
3183         *out++ = '\\', backslashes_added++;
3184       *out++ = *in;
3185     }
3186
3187   return make_specified_string (temp,
3188                                 SCHARS (string) + backslashes_added,
3189                                 out - temp,
3190                                 STRING_MULTIBYTE (string));
3191 }
3192 \f
3193 void
3194 syms_of_search ()
3195 {
3196   register int i;
3197
3198   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3199     {
3200       searchbufs[i].buf.allocated = 100;
3201       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3202       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3203       searchbufs[i].regexp = Qnil;
3204       searchbufs[i].whitespace_regexp = Qnil;
3205       searchbufs[i].syntax_table = Qnil;
3206       staticpro (&searchbufs[i].regexp);
3207       staticpro (&searchbufs[i].whitespace_regexp);
3208       staticpro (&searchbufs[i].syntax_table);
3209       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3210     }
3211   searchbuf_head = &searchbufs[0];
3212
3213   Qsearch_failed = intern ("search-failed");
3214   staticpro (&Qsearch_failed);
3215   Qinvalid_regexp = intern ("invalid-regexp");
3216   staticpro (&Qinvalid_regexp);
3217
3218   Fput (Qsearch_failed, Qerror_conditions,
3219         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3220   Fput (Qsearch_failed, Qerror_message,
3221         build_string ("Search failed"));
3222
3223   Fput (Qinvalid_regexp, Qerror_conditions,
3224         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3225   Fput (Qinvalid_regexp, Qerror_message,
3226         build_string ("Invalid regexp"));
3227
3228   last_thing_searched = Qnil;
3229   staticpro (&last_thing_searched);
3230
3231   saved_last_thing_searched = Qnil;
3232   staticpro (&saved_last_thing_searched);
3233
3234   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3235       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3236 Some commands use this for user-specified regexps.
3237 Spaces that occur inside character classes or repetition operators
3238 or other such regexp constructs are not replaced with this.
3239 A value of nil (which is the normal value) means treat spaces literally.  */);
3240   Vsearch_spaces_regexp = Qnil;
3241
3242   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3243       doc: /* Internal use only.
3244 If non-nil, the match data will not be changed during call to searching or
3245 matching functions, such as `looking-at', `string-match', `re-search-forward'
3246 etc.  */);
3247   Vinhibit_changing_match_data = Qnil;
3248
3249   defsubr (&Slooking_at);
3250   defsubr (&Sposix_looking_at);
3251   defsubr (&Sstring_match);
3252   defsubr (&Sposix_string_match);
3253   defsubr (&Ssearch_forward);
3254   defsubr (&Ssearch_backward);
3255   defsubr (&Sword_search_forward);
3256   defsubr (&Sword_search_backward);
3257   defsubr (&Sre_search_forward);
3258   defsubr (&Sre_search_backward);
3259   defsubr (&Sposix_search_forward);
3260   defsubr (&Sposix_search_backward);
3261   defsubr (&Sreplace_match);
3262   defsubr (&Smatch_beginning);
3263   defsubr (&Smatch_end);
3264   defsubr (&Smatch_data);
3265   defsubr (&Sset_match_data);
3266   defsubr (&Sregexp_quote);
3267 }
3268
3269 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3270    (do not change this comment) */