src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs ();
 101 static void save_search_regs ();
 102 static int simple_search ();
 103 static int boyer_moore ();
 104 static int search_buffer ();
 105 static void matcher_overflow () NO_RETURN;
 106
 107 static void
 108 matcher_overflow ()
 109 {
 110   error ("Stack overflow in regexp matcher");
 111 }
 112
 113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 114    PATTERN is the pattern to compile.
 115    CP is the place to put the result.
 116    TRANSLATE is a translation table for ignoring case, or nil for none.
 117    REGP is the structure that says where to store the "register"
 118    values that will result from matching this pattern.
 119    If it is 0, we should compile the pattern not to record any
 120    subexpression bounds.
 121    POSIX is nonzero if we want full backtracking (POSIX style)
 122    for this pattern.  0 means backtrack only enough to get a valid match.
 123    MULTIBYTE is nonzero if we want to handle multibyte characters in
 124    PATTERN.  0 means all multibyte characters are recognized just as
 125    sequences of binary data.
 126
 127    The behavior also depends on Vsearch_spaces_regexp.  */
 128
 129 static void
 130 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 131      struct regexp_cache *cp;
 132      Lisp_Object pattern;
 133      Lisp_Object translate;
 134      struct re_registers *regp;
 135      int posix;
 136      int multibyte;
 137 {
 138   unsigned char *raw_pattern;
 139   int raw_pattern_size;
 140   char *val;
 141   reg_syntax_t old;
 142
 143   /* MULTIBYTE says whether the text to be searched is multibyte.
 144      We must convert PATTERN to match that, or we will not really
 145      find things right.  */
 146
 147   if (multibyte == STRING_MULTIBYTE (pattern))
 148     {
 149       raw_pattern = (unsigned char *) SDATA (pattern);
 150       raw_pattern_size = SBYTES (pattern);
 151     }
 152   else if (multibyte)
 153     {
 154       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 155                                                   SCHARS (pattern));
 156       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 157       copy_text (SDATA (pattern), raw_pattern,
 158                  SCHARS (pattern), 0, 1);
 159     }
 160   else
 161     {
 162       /* Converting multibyte to single-byte.
 163
 164          ??? Perhaps this conversion should be done in a special way
 165          by subtracting nonascii-insert-offset from each non-ASCII char,
 166          so that only the multibyte chars which really correspond to
 167          the chosen single-byte character set can possibly match.  */
 168       raw_pattern_size = SCHARS (pattern);
 169       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 170       copy_text (SDATA (pattern), raw_pattern,
 171                  SBYTES (pattern), 1, 0);
 172     }
 173
 174   cp->regexp = Qnil;
 175   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 176   cp->posix = posix;
 177   cp->buf.multibyte = multibyte;
 178   cp->whitespace_regexp = Vsearch_spaces_regexp;
 179   /* rms: I think BLOCK_INPUT is not needed here any more,
 180      because regex.c defines malloc to call xmalloc.
 181      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 182      So let's turn it off.  */
 183   /*  BLOCK_INPUT;  */
 184   old = re_set_syntax (RE_SYNTAX_EMACS
 185                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 186
 187   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 188                             : SDATA (Vsearch_spaces_regexp));
 189
 190   val = (char *) re_compile_pattern ((char *)raw_pattern,
 191                                      raw_pattern_size, &cp->buf);
 192
 193   /* If the compiled pattern hard codes some of the contents of the
 194      syntax-table, it can only be reused with *this* syntax table.  */
 195   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 196
 197   re_set_whitespace_regexp (NULL);
 198
 199   re_set_syntax (old);
 200   /* UNBLOCK_INPUT;  */
 201   if (val)
 202     xsignal1 (Qinvalid_regexp, build_string (val));
 203
 204   cp->regexp = Fcopy_sequence (pattern);
 205 }
 206
 207 /* Shrink each compiled regexp buffer in the cache
 208    to the size actually used right now.
 209    This is called from garbage collection.  */
 210
 211 void
 212 shrink_regexp_cache ()
 213 {
 214   struct regexp_cache *cp;
 215
 216   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 217     {
 218       cp->buf.allocated = cp->buf.used;
 219       cp->buf.buffer
 220         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 221     }
 222 }
 223
 224 /* Clear the regexp cache w.r.t. a particular syntax table,
 225    because it was changed.
 226    There is no danger of memory leak here because re_compile_pattern
 227    automagically manages the memory in each re_pattern_buffer struct,
 228    based on its `allocated' and `buffer' values.  */
 229 void
 230 clear_regexp_cache ()
 231 {
 232   int i;
 233
 234   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 235     /* It's tempting to compare with the syntax-table we've actually changd,
 236        but it's not sufficient because char-table inheritance mewans that
 237        modifying one syntax-table can change others at the same time.  */
 238     if (!EQ (searchbufs[i].syntax_table, Qt))
 239       searchbufs[i].regexp = Qnil;
 240 }
 241
 242 /* Compile a regexp if necessary, but first check to see if there's one in
 243    the cache.
 244    PATTERN is the pattern to compile.
 245    TRANSLATE is a translation table for ignoring case, or nil for none.
 246    REGP is the structure that says where to store the "register"
 247    values that will result from matching this pattern.
 248    If it is 0, we should compile the pattern not to record any
 249    subexpression bounds.
 250    POSIX is nonzero if we want full backtracking (POSIX style)
 251    for this pattern.  0 means backtrack only enough to get a valid match.  */
 252
 253 struct re_pattern_buffer *
 254 compile_pattern (pattern, regp, translate, posix, multibyte)
 255      Lisp_Object pattern;
 256      struct re_registers *regp;
 257      Lisp_Object translate;
 258      int posix, multibyte;
 259 {
 260   struct regexp_cache *cp, **cpp;
 261
 262   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 263     {
 264       cp = *cpp;
 265       /* Entries are initialized to nil, and may be set to nil by
 266          compile_pattern_1 if the pattern isn't valid.  Don't apply
 267          string accessors in those cases.  However, compile_pattern_1
 268          is only applied to the cache entry we pick here to reuse.  So
 269          nil should never appear before a non-nil entry.  */
 270       if (NILP (cp->regexp))
 271         goto compile_it;
 272       if (SCHARS (cp->regexp) == SCHARS (pattern)
 273           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 274           && !NILP (Fstring_equal (cp->regexp, pattern))
 275           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 276           && cp->posix == posix
 277           && cp->buf.multibyte == multibyte
 278           && (EQ (cp->syntax_table, Qt)
 279               || EQ (cp->syntax_table, current_buffer->syntax_table))
 280           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 281         break;
 282
 283       /* If we're at the end of the cache, compile into the nil cell
 284          we found, or the last (least recently used) cell with a
 285          string value.  */
 286       if (cp->next == 0)
 287         {
 288         compile_it:
 289           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 290           break;
 291         }
 292     }
 293
 294   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 295      either because we found it in the cache or because we just compiled it.
 296      Move it to the front of the queue to mark it as most recently used.  */
 297   *cpp = cp->next;
 298   cp->next = searchbuf_head;
 299   searchbuf_head = cp;
 300
 301   /* Advise the searching functions about the space we have allocated
 302      for register data.  */
 303   if (regp)
 304     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 305
 306   return &cp->buf;
 307 }
 308
 309 \f
 310 static Lisp_Object
 311 looking_at_1 (string, posix)
 312      Lisp_Object string;
 313      int posix;
 314 {
 315   Lisp_Object val;
 316   unsigned char *p1, *p2;
 317   int s1, s2;
 318   register int i;
 319   struct re_pattern_buffer *bufp;
 320
 321   if (running_asynch_code)
 322     save_search_regs ();
 323
 324   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 325   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 326     = current_buffer->case_eqv_table;
 327
 328   CHECK_STRING (string);
 329   bufp = compile_pattern (string,
 330                           (NILP (Vinhibit_changing_match_data)
 331                            ? &search_regs : NULL),
 332                           (!NILP (current_buffer->case_fold_search)
 333                            ? current_buffer->case_canon_table : Qnil),
 334                           posix,
 335                           !NILP (current_buffer->enable_multibyte_characters));
 336
 337   immediate_quit = 1;
 338   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 339
 340   /* Get pointers and sizes of the two strings
 341      that make up the visible portion of the buffer. */
 342
 343   p1 = BEGV_ADDR;
 344   s1 = GPT_BYTE - BEGV_BYTE;
 345   p2 = GAP_END_ADDR;
 346   s2 = ZV_BYTE - GPT_BYTE;
 347   if (s1 < 0)
 348     {
 349       p2 = p1;
 350       s2 = ZV_BYTE - BEGV_BYTE;
 351       s1 = 0;
 352     }
 353   if (s2 < 0)
 354     {
 355       s1 = ZV_BYTE - BEGV_BYTE;
 356       s2 = 0;
 357     }
 358
 359   re_match_object = Qnil;
 360
 361   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 362                   PT_BYTE - BEGV_BYTE,
 363                   (NILP (Vinhibit_changing_match_data)
 364                    ? &search_regs : NULL),
 365                   ZV_BYTE - BEGV_BYTE);
 366   immediate_quit = 0;
 367
 368   if (i == -2)
 369     matcher_overflow ();
 370
 371   val = (0 <= i ? Qt : Qnil);
 372   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 373     for (i = 0; i < search_regs.num_regs; i++)
 374       if (search_regs.start[i] >= 0)
 375         {
 376           search_regs.start[i]
 377             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 378           search_regs.end[i]
 379             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 380         }
 381
 382   /* Set last_thing_searched only when match data is changed.  */
 383   if (NILP (Vinhibit_changing_match_data))
 384     XSETBUFFER (last_thing_searched, current_buffer);
 385
 386   return val;
 387 }
 388
 389 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 390        doc: /* Return t if text after point matches regular expression REGEXP.
 391 This function modifies the match data that `match-beginning',
 392 `match-end' and `match-data' access; save and restore the match
 393 data if you want to preserve them.  */)
 394      (regexp)
 395      Lisp_Object regexp;
 396 {
 397   return looking_at_1 (regexp, 0);
 398 }
 399
 400 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 401        doc: /* Return t if text after point matches regular expression REGEXP.
 402 Find the longest match, in accord with Posix regular expression rules.
 403 This function modifies the match data that `match-beginning',
 404 `match-end' and `match-data' access; save and restore the match
 405 data if you want to preserve them.  */)
 406      (regexp)
 407      Lisp_Object regexp;
 408 {
 409   return looking_at_1 (regexp, 1);
 410 }
 411 \f
 412 static Lisp_Object
 413 string_match_1 (regexp, string, start, posix)
 414      Lisp_Object regexp, string, start;
 415      int posix;
 416 {
 417   int val;
 418   struct re_pattern_buffer *bufp;
 419   int pos, pos_byte;
 420   int i;
 421
 422   if (running_asynch_code)
 423     save_search_regs ();
 424
 425   CHECK_STRING (regexp);
 426   CHECK_STRING (string);
 427
 428   if (NILP (start))
 429     pos = 0, pos_byte = 0;
 430   else
 431     {
 432       int len = SCHARS (string);
 433
 434       CHECK_NUMBER (start);
 435       pos = XINT (start);
 436       if (pos < 0 && -pos <= len)
 437         pos = len + pos;
 438       else if (0 > pos || pos > len)
 439         args_out_of_range (string, start);
 440       pos_byte = string_char_to_byte (string, pos);
 441     }
 442
 443   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 444   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 445     = current_buffer->case_eqv_table;
 446
 447   bufp = compile_pattern (regexp,
 448                           (NILP (Vinhibit_changing_match_data)
 449                            ? &search_regs : NULL),
 450                           (!NILP (current_buffer->case_fold_search)
 451                            ? current_buffer->case_canon_table : Qnil),
 452                           posix,
 453                           STRING_MULTIBYTE (string));
 454   immediate_quit = 1;
 455   re_match_object = string;
 456
 457   val = re_search (bufp, (char *) SDATA (string),
 458                    SBYTES (string), pos_byte,
 459                    SBYTES (string) - pos_byte,
 460                    (NILP (Vinhibit_changing_match_data)
 461                     ? &search_regs : NULL));
 462   immediate_quit = 0;
 463
 464   /* Set last_thing_searched only when match data is changed.  */
 465   if (NILP (Vinhibit_changing_match_data))
 466     last_thing_searched = Qt;
 467
 468   if (val == -2)
 469     matcher_overflow ();
 470   if (val < 0) return Qnil;
 471
 472   if (NILP (Vinhibit_changing_match_data))
 473     for (i = 0; i < search_regs.num_regs; i++)
 474       if (search_regs.start[i] >= 0)
 475         {
 476           search_regs.start[i]
 477             = string_byte_to_char (string, search_regs.start[i]);
 478           search_regs.end[i]
 479             = string_byte_to_char (string, search_regs.end[i]);
 480         }
 481
 482   return make_number (string_byte_to_char (string, val));
 483 }
 484
 485 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 486        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 487 Matching ignores case if `case-fold-search' is non-nil.
 488 If third arg START is non-nil, start search at that index in STRING.
 489 For index of first char beyond the match, do (match-end 0).
 490 `match-end' and `match-beginning' also give indices of substrings
 491 matched by parenthesis constructs in the pattern.
 492
 493 You can use the function `match-string' to extract the substrings
 494 matched by the parenthesis constructions in REGEXP. */)
 495      (regexp, string, start)
 496      Lisp_Object regexp, string, start;
 497 {
 498   return string_match_1 (regexp, string, start, 0);
 499 }
 500
 501 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 502        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 503 Find the longest match, in accord with Posix regular expression rules.
 504 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 505 If third arg START is non-nil, start search at that index in STRING.
 506 For index of first char beyond the match, do (match-end 0).
 507 `match-end' and `match-beginning' also give indices of substrings
 508 matched by parenthesis constructs in the pattern.  */)
 509      (regexp, string, start)
 510      Lisp_Object regexp, string, start;
 511 {
 512   return string_match_1 (regexp, string, start, 1);
 513 }
 514
 515 /* Match REGEXP against STRING, searching all of STRING,
 516    and return the index of the match, or negative on failure.
 517    This does not clobber the match data.  */
 518
 519 int
 520 fast_string_match (regexp, string)
 521      Lisp_Object regexp, string;
 522 {
 523   int val;
 524   struct re_pattern_buffer *bufp;
 525
 526   bufp = compile_pattern (regexp, 0, Qnil,
 527                           0, STRING_MULTIBYTE (string));
 528   immediate_quit = 1;
 529   re_match_object = string;
 530
 531   val = re_search (bufp, (char *) SDATA (string),
 532                    SBYTES (string), 0,
 533                    SBYTES (string), 0);
 534   immediate_quit = 0;
 535   return val;
 536 }
 537
 538 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 539    and return the index of the match, or negative on failure.
 540    This does not clobber the match data.
 541    We assume that STRING contains single-byte characters.  */
 542
 543 extern Lisp_Object Vascii_downcase_table;
 544
 545 int
 546 fast_c_string_match_ignore_case (regexp, string)
 547      Lisp_Object regexp;
 548      const char *string;
 549 {
 550   int val;
 551   struct re_pattern_buffer *bufp;
 552   int len = strlen (string);
 553
 554   regexp = string_make_unibyte (regexp);
 555   re_match_object = Qt;
 556   bufp = compile_pattern (regexp, 0,
 557                           Vascii_canon_table, 0,
 558                           0);
 559   immediate_quit = 1;
 560   val = re_search (bufp, string, len, 0, len, 0);
 561   immediate_quit = 0;
 562   return val;
 563 }
 564
 565 /* Like fast_string_match but ignore case.  */
 566
 567 int
 568 fast_string_match_ignore_case (regexp, string)
 569      Lisp_Object regexp, string;
 570 {
 571   int val;
 572   struct re_pattern_buffer *bufp;
 573
 574   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 575                           0, STRING_MULTIBYTE (string));
 576   immediate_quit = 1;
 577   re_match_object = string;
 578
 579   val = re_search (bufp, (char *) SDATA (string),
 580                    SBYTES (string), 0,
 581                    SBYTES (string), 0);
 582   immediate_quit = 0;
 583   return val;
 584 }
 585 \f
 586 /* The newline cache: remembering which sections of text have no newlines.  */
 587
 588 /* If the user has requested newline caching, make sure it's on.
 589    Otherwise, make sure it's off.
 590    This is our cheezy way of associating an action with the change of
 591    state of a buffer-local variable.  */
 592 static void
 593 newline_cache_on_off (buf)
 594      struct buffer *buf;
 595 {
 596   if (NILP (buf->cache_long_line_scans))
 597     {
 598       /* It should be off.  */
 599       if (buf->newline_cache)
 600         {
 601           free_region_cache (buf->newline_cache);
 602           buf->newline_cache = 0;
 603         }
 604     }
 605   else
 606     {
 607       /* It should be on.  */
 608       if (buf->newline_cache == 0)
 609         buf->newline_cache = new_region_cache ();
 610     }
 611 }
 612
 613 \f
 614 /* Search for COUNT instances of the character TARGET between START and END.
 615
 616    If COUNT is positive, search forwards; END must be >= START.
 617    If COUNT is negative, search backwards for the -COUNTth instance;
 618       END must be <= START.
 619    If COUNT is zero, do anything you please; run rogue, for all I care.
 620
 621    If END is zero, use BEGV or ZV instead, as appropriate for the
 622    direction indicated by COUNT.
 623
 624    If we find COUNT instances, set *SHORTAGE to zero, and return the
 625    position past the COUNTth match.  Note that for reverse motion
 626    this is not the same as the usual convention for Emacs motion commands.
 627
 628    If we don't find COUNT instances before reaching END, set *SHORTAGE
 629    to the number of TARGETs left unfound, and return END.
 630
 631    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 632    except when inside redisplay.  */
 633
 634 int
 635 scan_buffer (target, start, end, count, shortage, allow_quit)
 636      register int target;
 637      int start, end;
 638      int count;
 639      int *shortage;
 640      int allow_quit;
 641 {
 642   struct region_cache *newline_cache;
 643   int direction;
 644
 645   if (count > 0)
 646     {
 647       direction = 1;
 648       if (! end) end = ZV;
 649     }
 650   else
 651     {
 652       direction = -1;
 653       if (! end) end = BEGV;
 654     }
 655
 656   newline_cache_on_off (current_buffer);
 657   newline_cache = current_buffer->newline_cache;
 658
 659   if (shortage != 0)
 660     *shortage = 0;
 661
 662   immediate_quit = allow_quit;
 663
 664   if (count > 0)
 665     while (start != end)
 666       {
 667         /* Our innermost scanning loop is very simple; it doesn't know
 668            about gaps, buffer ends, or the newline cache.  ceiling is
 669            the position of the last character before the next such
 670            obstacle --- the last character the dumb search loop should
 671            examine.  */
 672         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 673         int start_byte = CHAR_TO_BYTE (start);
 674         int tem;
 675
 676         /* If we're looking for a newline, consult the newline cache
 677            to see where we can avoid some scanning.  */
 678         if (target == '\n' && newline_cache)
 679           {
 680             int next_change;
 681             immediate_quit = 0;
 682             while (region_cache_forward
 683                    (current_buffer, newline_cache, start_byte, &next_change))
 684               start_byte = next_change;
 685             immediate_quit = allow_quit;
 686
 687             /* START should never be after END.  */
 688             if (start_byte > ceiling_byte)
 689               start_byte = ceiling_byte;
 690
 691             /* Now the text after start is an unknown region, and
 692                next_change is the position of the next known region. */
 693             ceiling_byte = min (next_change - 1, ceiling_byte);
 694           }
 695
 696         /* The dumb loop can only scan text stored in contiguous
 697            bytes. BUFFER_CEILING_OF returns the last character
 698            position that is contiguous, so the ceiling is the
 699            position after that.  */
 700         tem = BUFFER_CEILING_OF (start_byte);
 701         ceiling_byte = min (tem, ceiling_byte);
 702
 703         {
 704           /* The termination address of the dumb loop.  */
 705           register unsigned char *ceiling_addr
 706             = BYTE_POS_ADDR (ceiling_byte) + 1;
 707           register unsigned char *cursor
 708             = BYTE_POS_ADDR (start_byte);
 709           unsigned char *base = cursor;
 710
 711           while (cursor < ceiling_addr)
 712             {
 713               unsigned char *scan_start = cursor;
 714
 715               /* The dumb loop.  */
 716               while (*cursor != target && ++cursor < ceiling_addr)
 717                 ;
 718
 719               /* If we're looking for newlines, cache the fact that
 720                  the region from start to cursor is free of them. */
 721               if (target == '\n' && newline_cache)
 722                 know_region_cache (current_buffer, newline_cache,
 723                                    start_byte + scan_start - base,
 724                                    start_byte + cursor - base);
 725
 726               /* Did we find the target character?  */
 727               if (cursor < ceiling_addr)
 728                 {
 729                   if (--count == 0)
 730                     {
 731                       immediate_quit = 0;
 732                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 733                     }
 734                   cursor++;
 735                 }
 736             }
 737
 738           start = BYTE_TO_CHAR (start_byte + cursor - base);
 739         }
 740       }
 741   else
 742     while (start > end)
 743       {
 744         /* The last character to check before the next obstacle.  */
 745         int ceiling_byte = CHAR_TO_BYTE (end);
 746         int start_byte = CHAR_TO_BYTE (start);
 747         int tem;
 748
 749         /* Consult the newline cache, if appropriate.  */
 750         if (target == '\n' && newline_cache)
 751           {
 752             int next_change;
 753             immediate_quit = 0;
 754             while (region_cache_backward
 755                    (current_buffer, newline_cache, start_byte, &next_change))
 756               start_byte = next_change;
 757             immediate_quit = allow_quit;
 758
 759             /* Start should never be at or before end.  */
 760             if (start_byte <= ceiling_byte)
 761               start_byte = ceiling_byte + 1;
 762
 763             /* Now the text before start is an unknown region, and
 764                next_change is the position of the next known region. */
 765             ceiling_byte = max (next_change, ceiling_byte);
 766           }
 767
 768         /* Stop scanning before the gap.  */
 769         tem = BUFFER_FLOOR_OF (start_byte - 1);
 770         ceiling_byte = max (tem, ceiling_byte);
 771
 772         {
 773           /* The termination address of the dumb loop.  */
 774           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 775           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 776           unsigned char *base = cursor;
 777
 778           while (cursor >= ceiling_addr)
 779             {
 780               unsigned char *scan_start = cursor;
 781
 782               while (*cursor != target && --cursor >= ceiling_addr)
 783                 ;
 784
 785               /* If we're looking for newlines, cache the fact that
 786                  the region from after the cursor to start is free of them.  */
 787               if (target == '\n' && newline_cache)
 788                 know_region_cache (current_buffer, newline_cache,
 789                                    start_byte + cursor - base,
 790                                    start_byte + scan_start - base);
 791
 792               /* Did we find the target character?  */
 793               if (cursor >= ceiling_addr)
 794                 {
 795                   if (++count >= 0)
 796                     {
 797                       immediate_quit = 0;
 798                       return BYTE_TO_CHAR (start_byte + cursor - base);
 799                     }
 800                   cursor--;
 801                 }
 802             }
 803
 804           start = BYTE_TO_CHAR (start_byte + cursor - base);
 805         }
 806       }
 807
 808   immediate_quit = 0;
 809   if (shortage != 0)
 810     *shortage = count * direction;
 811   return start;
 812 }
 813 \f
 814 /* Search for COUNT instances of a line boundary, which means either a
 815    newline or (if selective display enabled) a carriage return.
 816    Start at START.  If COUNT is negative, search backwards.
 817
 818    We report the resulting position by calling TEMP_SET_PT_BOTH.
 819
 820    If we find COUNT instances. we position after (always after,
 821    even if scanning backwards) the COUNTth match, and return 0.
 822
 823    If we don't find COUNT instances before reaching the end of the
 824    buffer (or the beginning, if scanning backwards), we return
 825    the number of line boundaries left unfound, and position at
 826    the limit we bumped up against.
 827
 828    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 829    except in special cases.  */
 830
 831 int
 832 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 833      int start, start_byte;
 834      int limit, limit_byte;
 835      register int count;
 836      int allow_quit;
 837 {
 838   int direction = ((count > 0) ? 1 : -1);
 839
 840   register unsigned char *cursor;
 841   unsigned char *base;
 842
 843   register int ceiling;
 844   register unsigned char *ceiling_addr;
 845
 846   int old_immediate_quit = immediate_quit;
 847
 848   /* The code that follows is like scan_buffer
 849      but checks for either newline or carriage return.  */
 850
 851   if (allow_quit)
 852     immediate_quit++;
 853
 854   start_byte = CHAR_TO_BYTE (start);
 855
 856   if (count > 0)
 857     {
 858       while (start_byte < limit_byte)
 859         {
 860           ceiling =  BUFFER_CEILING_OF (start_byte);
 861           ceiling = min (limit_byte - 1, ceiling);
 862           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 863           base = (cursor = BYTE_POS_ADDR (start_byte));
 864           while (1)
 865             {
 866               while (*cursor != '\n' && ++cursor != ceiling_addr)
 867                 ;
 868
 869               if (cursor != ceiling_addr)
 870                 {
 871                   if (--count == 0)
 872                     {
 873                       immediate_quit = old_immediate_quit;
 874                       start_byte = start_byte + cursor - base + 1;
 875                       start = BYTE_TO_CHAR (start_byte);
 876                       TEMP_SET_PT_BOTH (start, start_byte);
 877                       return 0;
 878                     }
 879                   else
 880                     if (++cursor == ceiling_addr)
 881                       break;
 882                 }
 883               else
 884                 break;
 885             }
 886           start_byte += cursor - base;
 887         }
 888     }
 889   else
 890     {
 891       while (start_byte > limit_byte)
 892         {
 893           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 894           ceiling = max (limit_byte, ceiling);
 895           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 896           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 897           while (1)
 898             {
 899               while (--cursor != ceiling_addr && *cursor != '\n')
 900                 ;
 901
 902               if (cursor != ceiling_addr)
 903                 {
 904                   if (++count == 0)
 905                     {
 906                       immediate_quit = old_immediate_quit;
 907                       /* Return the position AFTER the match we found.  */
 908                       start_byte = start_byte + cursor - base + 1;
 909                       start = BYTE_TO_CHAR (start_byte);
 910                       TEMP_SET_PT_BOTH (start, start_byte);
 911                       return 0;
 912                     }
 913                 }
 914               else
 915                 break;
 916             }
 917           /* Here we add 1 to compensate for the last decrement
 918              of CURSOR, which took it past the valid range.  */
 919           start_byte += cursor - base + 1;
 920         }
 921     }
 922
 923   TEMP_SET_PT_BOTH (limit, limit_byte);
 924   immediate_quit = old_immediate_quit;
 925
 926   return count * direction;
 927 }
 928
 929 int
 930 find_next_newline_no_quit (from, cnt)
 931      register int from, cnt;
 932 {
 933   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 934 }
 935
 936 /* Like find_next_newline, but returns position before the newline,
 937    not after, and only search up to TO.  This isn't just
 938    find_next_newline (...)-1, because you might hit TO.  */
 939
 940 int
 941 find_before_next_newline (from, to, cnt)
 942      int from, to, cnt;
 943 {
 944   int shortage;
 945   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 946
 947   if (shortage == 0)
 948     pos--;
 949
 950   return pos;
 951 }
 952 \f
 953 /* Subroutines of Lisp buffer search functions. */
 954
 955 static Lisp_Object
 956 search_command (string, bound, noerror, count, direction, RE, posix)
 957      Lisp_Object string, bound, noerror, count;
 958      int direction;
 959      int RE;
 960      int posix;
 961 {
 962   register int np;
 963   int lim, lim_byte;
 964   int n = direction;
 965
 966   if (!NILP (count))
 967     {
 968       CHECK_NUMBER (count);
 969       n *= XINT (count);
 970     }
 971
 972   CHECK_STRING (string);
 973   if (NILP (bound))
 974     {
 975       if (n > 0)
 976         lim = ZV, lim_byte = ZV_BYTE;
 977       else
 978         lim = BEGV, lim_byte = BEGV_BYTE;
 979     }
 980   else
 981     {
 982       CHECK_NUMBER_COERCE_MARKER (bound);
 983       lim = XINT (bound);
 984       if (n > 0 ? lim < PT : lim > PT)
 985         error ("Invalid search bound (wrong side of point)");
 986       if (lim > ZV)
 987         lim = ZV, lim_byte = ZV_BYTE;
 988       else if (lim < BEGV)
 989         lim = BEGV, lim_byte = BEGV_BYTE;
 990       else
 991         lim_byte = CHAR_TO_BYTE (lim);
 992     }
 993
 994   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 995   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 996     = current_buffer->case_eqv_table;
 997
 998   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 999                       (!NILP (current_buffer->case_fold_search)
1000                        ? current_buffer->case_canon_table
1001                        : Qnil),
1002                       (!NILP (current_buffer->case_fold_search)
1003                        ? current_buffer->case_eqv_table
1004                        : Qnil),
1005                       posix);
1006   if (np <= 0)
1007     {
1008       if (NILP (noerror))
1009         xsignal1 (Qsearch_failed, string);
1010
1011       if (!EQ (noerror, Qt))
1012         {
1013           if (lim < BEGV || lim > ZV)
1014             abort ();
1015           SET_PT_BOTH (lim, lim_byte);
1016           return Qnil;
1017 #if 0 /* This would be clean, but maybe programs depend on
1018          a value of nil here.  */
1019           np = lim;
1020 #endif
1021         }
1022       else
1023         return Qnil;
1024     }
1025
1026   if (np < BEGV || np > ZV)
1027     abort ();
1028
1029   SET_PT (np);
1030
1031   return make_number (np);
1032 }
1033 \f
1034 /* Return 1 if REGEXP it matches just one constant string.  */
1035
1036 static int
1037 trivial_regexp_p (regexp)
1038      Lisp_Object regexp;
1039 {
1040   int len = SBYTES (regexp);
1041   unsigned char *s = SDATA (regexp);
1042   while (--len >= 0)
1043     {
1044       switch (*s++)
1045         {
1046         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1047           return 0;
1048         case '\\':
1049           if (--len < 0)
1050             return 0;
1051           switch (*s++)
1052             {
1053             case '|': case '(': case ')': case '`': case '\'': case 'b':
1054             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1055             case 'S': case '=': case '{': case '}': case '_':
1056             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1057             case '1': case '2': case '3': case '4': case '5':
1058             case '6': case '7': case '8': case '9':
1059               return 0;
1060             }
1061         }
1062     }
1063   return 1;
1064 }
1065
1066 /* Search for the n'th occurrence of STRING in the current buffer,
1067    starting at position POS and stopping at position LIM,
1068    treating STRING as a literal string if RE is false or as
1069    a regular expression if RE is true.
1070
1071    If N is positive, searching is forward and LIM must be greater than POS.
1072    If N is negative, searching is backward and LIM must be less than POS.
1073
1074    Returns -x if x occurrences remain to be found (x > 0),
1075    or else the position at the beginning of the Nth occurrence
1076    (if searching backward) or the end (if searching forward).
1077
1078    POSIX is nonzero if we want full backtracking (POSIX style)
1079    for this pattern.  0 means backtrack only enough to get a valid match.  */
1080
1081 #define TRANSLATE(out, trt, d)                  \
1082 do                                              \
1083   {                                             \
1084     if (! NILP (trt))                           \
1085       {                                         \
1086         Lisp_Object temp;                       \
1087         temp = Faref (trt, make_number (d));    \
1088         if (INTEGERP (temp))                    \
1089           out = XINT (temp);                    \
1090         else                                    \
1091           out = d;                              \
1092       }                                         \
1093     else                                        \
1094       out = d;                                  \
1095   }                                             \
1096 while (0)
1097
1098 /* Only used in search_buffer, to record the end position of the match
1099    when searching regexps and SEARCH_REGS should not be changed
1100    (i.e. Vinhibit_changing_match_data is non-nil).  */
1101 static struct re_registers search_regs_1;
1102
1103 static int
1104 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1105                RE, trt, inverse_trt, posix)
1106      Lisp_Object string;
1107      int pos;
1108      int pos_byte;
1109      int lim;
1110      int lim_byte;
1111      int n;
1112      int RE;
1113      Lisp_Object trt;
1114      Lisp_Object inverse_trt;
1115      int posix;
1116 {
1117   int len = SCHARS (string);
1118   int len_byte = SBYTES (string);
1119   register int i;
1120
1121   if (running_asynch_code)
1122     save_search_regs ();
1123
1124   /* Searching 0 times means don't move.  */
1125   /* Null string is found at starting position.  */
1126   if (len == 0 || n == 0)
1127     {
1128       set_search_regs (pos_byte, 0);
1129       return pos;
1130     }
1131
1132   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1133     {
1134       unsigned char *p1, *p2;
1135       int s1, s2;
1136       struct re_pattern_buffer *bufp;
1137
1138       bufp = compile_pattern (string,
1139                               (NILP (Vinhibit_changing_match_data)
1140                                ? &search_regs : &search_regs_1),
1141                               trt, posix,
1142                               !NILP (current_buffer->enable_multibyte_characters));
1143
1144       immediate_quit = 1;       /* Quit immediately if user types ^G,
1145                                    because letting this function finish
1146                                    can take too long. */
1147       QUIT;                     /* Do a pending quit right away,
1148                                    to avoid paradoxical behavior */
1149       /* Get pointers and sizes of the two strings
1150          that make up the visible portion of the buffer. */
1151
1152       p1 = BEGV_ADDR;
1153       s1 = GPT_BYTE - BEGV_BYTE;
1154       p2 = GAP_END_ADDR;
1155       s2 = ZV_BYTE - GPT_BYTE;
1156       if (s1 < 0)
1157         {
1158           p2 = p1;
1159           s2 = ZV_BYTE - BEGV_BYTE;
1160           s1 = 0;
1161         }
1162       if (s2 < 0)
1163         {
1164           s1 = ZV_BYTE - BEGV_BYTE;
1165           s2 = 0;
1166         }
1167       re_match_object = Qnil;
1168
1169       while (n < 0)
1170         {
1171           int val;
1172           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1173                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1174                              (NILP (Vinhibit_changing_match_data)
1175                               ? &search_regs : &search_regs_1),
1176                              /* Don't allow match past current point */
1177                              pos_byte - BEGV_BYTE);
1178           if (val == -2)
1179             {
1180               matcher_overflow ();
1181             }
1182           if (val >= 0)
1183             {
1184               if (NILP (Vinhibit_changing_match_data))
1185                 {
1186                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1187                   for (i = 0; i < search_regs.num_regs; i++)
1188                     if (search_regs.start[i] >= 0)
1189                       {
1190                         search_regs.start[i]
1191                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1192                         search_regs.end[i]
1193                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1194                       }
1195                   XSETBUFFER (last_thing_searched, current_buffer);
1196                   /* Set pos to the new position. */
1197                   pos = search_regs.start[0];
1198                 }
1199               else
1200                 {
1201                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1202                   /* Set pos to the new position.  */
1203                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1204                 }
1205             }
1206           else
1207             {
1208               immediate_quit = 0;
1209               return (n);
1210             }
1211           n++;
1212         }
1213       while (n > 0)
1214         {
1215           int val;
1216           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1217                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1218                              (NILP (Vinhibit_changing_match_data)
1219                               ? &search_regs : &search_regs_1),
1220                              lim_byte - BEGV_BYTE);
1221           if (val == -2)
1222             {
1223               matcher_overflow ();
1224             }
1225           if (val >= 0)
1226             {
1227               if (NILP (Vinhibit_changing_match_data))
1228                 {
1229                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1230                   for (i = 0; i < search_regs.num_regs; i++)
1231                     if (search_regs.start[i] >= 0)
1232                       {
1233                         search_regs.start[i]
1234                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1235                         search_regs.end[i]
1236                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1237                       }
1238                   XSETBUFFER (last_thing_searched, current_buffer);
1239                   pos = search_regs.end[0];
1240                 }
1241               else
1242                 {
1243                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1244                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1245                 }
1246             }
1247           else
1248             {
1249               immediate_quit = 0;
1250               return (0 - n);
1251             }
1252           n--;
1253         }
1254       immediate_quit = 0;
1255       return (pos);
1256     }
1257   else                          /* non-RE case */
1258     {
1259       unsigned char *raw_pattern, *pat;
1260       int raw_pattern_size;
1261       int raw_pattern_size_byte;
1262       unsigned char *patbuf;
1263       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1264       unsigned char *base_pat;
1265       /* Set to positive if we find a non-ASCII char that need
1266          translation.  Otherwise set to zero later.  */
1267       int charset_base = -1;
1268       int boyer_moore_ok = 1;
1269
1270       /* MULTIBYTE says whether the text to be searched is multibyte.
1271          We must convert PATTERN to match that, or we will not really
1272          find things right.  */
1273
1274       if (multibyte == STRING_MULTIBYTE (string))
1275         {
1276           raw_pattern = (unsigned char *) SDATA (string);
1277           raw_pattern_size = SCHARS (string);
1278           raw_pattern_size_byte = SBYTES (string);
1279         }
1280       else if (multibyte)
1281         {
1282           raw_pattern_size = SCHARS (string);
1283           raw_pattern_size_byte
1284             = count_size_as_multibyte (SDATA (string),
1285                                        raw_pattern_size);
1286           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1287           copy_text (SDATA (string), raw_pattern,
1288                      SCHARS (string), 0, 1);
1289         }
1290       else
1291         {
1292           /* Converting multibyte to single-byte.
1293
1294              ??? Perhaps this conversion should be done in a special way
1295              by subtracting nonascii-insert-offset from each non-ASCII char,
1296              so that only the multibyte chars which really correspond to
1297              the chosen single-byte character set can possibly match.  */
1298           raw_pattern_size = SCHARS (string);
1299           raw_pattern_size_byte = SCHARS (string);
1300           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1301           copy_text (SDATA (string), raw_pattern,
1302                      SBYTES (string), 1, 0);
1303         }
1304
1305       /* Copy and optionally translate the pattern.  */
1306       len = raw_pattern_size;
1307       len_byte = raw_pattern_size_byte;
1308       patbuf = (unsigned char *) alloca (len_byte);
1309       pat = patbuf;
1310       base_pat = raw_pattern;
1311       if (multibyte)
1312         {
1313           /* Fill patbuf by translated characters in STRING while
1314              checking if we can use boyer-moore search.  If TRT is
1315              non-nil, we can use boyer-moore search only if TRT can be
1316              represented by the byte array of 256 elements.  For that,
1317              all non-ASCII case-equivalents of all case-senstive
1318              characters in STRING must belong to the same charset and
1319              row.  */
1320
1321           while (--len >= 0)
1322             {
1323               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1324               int c, translated, inverse;
1325               int in_charlen, charlen;
1326
1327               /* If we got here and the RE flag is set, it's because we're
1328                  dealing with a regexp known to be trivial, so the backslash
1329                  just quotes the next character.  */
1330               if (RE && *base_pat == '\\')
1331                 {
1332                   len--;
1333                   raw_pattern_size--;
1334                   len_byte--;
1335                   base_pat++;
1336                 }
1337
1338               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1339
1340               if (NILP (trt))
1341                 {
1342                   str = base_pat;
1343                   charlen = in_charlen;
1344                 }
1345               else
1346                 {
1347                   /* Translate the character.  */
1348                   TRANSLATE (translated, trt, c);
1349                   charlen = CHAR_STRING (translated, str_base);
1350                   str = str_base;
1351
1352                   /* Check if C has any other case-equivalents.  */
1353                   TRANSLATE (inverse, inverse_trt, c);
1354                   /* If so, check if we can use boyer-moore.  */
1355                   if (c != inverse && boyer_moore_ok)
1356                     {
1357                       /* Check if all equivalents belong to the same
1358                          charset & row.  Note that the check of C
1359                          itself is done by the last iteration.  Note
1360                          also that we don't have to check ASCII
1361                          characters because boyer-moore search can
1362                          always handle their translation.  */
1363                       while (1)
1364                         {
1365                           if (ASCII_BYTE_P (inverse))
1366                             {
1367                               if (charset_base > 0)
1368                                 {
1369                                   boyer_moore_ok = 0;
1370                                   break;
1371                                 }
1372                               charset_base = 0;
1373                             }
1374                           else if (SINGLE_BYTE_CHAR_P (inverse))
1375                             {
1376                               /* Boyer-moore search can't handle a
1377                                  translation of an eight-bit
1378                                  character.  */
1379                               boyer_moore_ok = 0;
1380                               break;
1381                             }
1382                           else if (charset_base < 0)
1383                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1384                           else if ((inverse & ~CHAR_FIELD3_MASK)
1385                                    != charset_base)
1386                             {
1387                               boyer_moore_ok = 0;
1388                               break;
1389                             }
1390                           if (c == inverse)
1391                             break;
1392                           TRANSLATE (inverse, inverse_trt, inverse);
1393                         }
1394                     }
1395                 }
1396               if (charset_base < 0)
1397                 charset_base = 0;
1398
1399               /* Store this character into the translated pattern.  */
1400               bcopy (str, pat, charlen);
1401               pat += charlen;
1402               base_pat += in_charlen;
1403               len_byte -= in_charlen;
1404             }
1405         }
1406       else
1407         {
1408           /* Unibyte buffer.  */
1409           charset_base = 0;
1410           while (--len >= 0)
1411             {
1412               int c, translated;
1413
1414               /* If we got here and the RE flag is set, it's because we're
1415                  dealing with a regexp known to be trivial, so the backslash
1416                  just quotes the next character.  */
1417               if (RE && *base_pat == '\\')
1418                 {
1419                   len--;
1420                   raw_pattern_size--;
1421                   base_pat++;
1422                 }
1423               c = *base_pat++;
1424               TRANSLATE (translated, trt, c);
1425               *pat++ = translated;
1426             }
1427         }
1428
1429       len_byte = pat - patbuf;
1430       len = raw_pattern_size;
1431       pat = base_pat = patbuf;
1432
1433       if (boyer_moore_ok)
1434         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1435                             pos, pos_byte, lim, lim_byte,
1436                             charset_base);
1437       else
1438         return simple_search (n, pat, len, len_byte, trt,
1439                               pos, pos_byte, lim, lim_byte);
1440     }
1441 }
1442 \f
1443 /* Do a simple string search N times for the string PAT,
1444    whose length is LEN/LEN_BYTE,
1445    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1446    TRT is the translation table.
1447
1448    Return the character position where the match is found.
1449    Otherwise, if M matches remained to be found, return -M.
1450
1451    This kind of search works regardless of what is in PAT and
1452    regardless of what is in TRT.  It is used in cases where
1453    boyer_moore cannot work.  */
1454
1455 static int
1456 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1457      int n;
1458      unsigned char *pat;
1459      int len, len_byte;
1460      Lisp_Object trt;
1461      int pos, pos_byte;
1462      int lim, lim_byte;
1463 {
1464   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1465   int forward = n > 0;
1466
1467   if (lim > pos && multibyte)
1468     while (n > 0)
1469       {
1470         while (1)
1471           {
1472             /* Try matching at position POS.  */
1473             int this_pos = pos;
1474             int this_pos_byte = pos_byte;
1475             int this_len = len;
1476             int this_len_byte = len_byte;
1477             unsigned char *p = pat;
1478             if (pos + len > lim)
1479               goto stop;
1480
1481             while (this_len > 0)
1482               {
1483                 int charlen, buf_charlen;
1484                 int pat_ch, buf_ch;
1485
1486                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1487                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488                                                  ZV_BYTE - this_pos_byte,
1489                                                  buf_charlen);
1490                 TRANSLATE (buf_ch, trt, buf_ch);
1491
1492                 if (buf_ch != pat_ch)
1493                   break;
1494
1495                 this_len_byte -= charlen;
1496                 this_len--;
1497                 p += charlen;
1498
1499                 this_pos_byte += buf_charlen;
1500                 this_pos++;
1501               }
1502
1503             if (this_len == 0)
1504               {
1505                 pos += len;
1506                 pos_byte += len_byte;
1507                 break;
1508               }
1509
1510             INC_BOTH (pos, pos_byte);
1511           }
1512
1513         n--;
1514       }
1515   else if (lim > pos)
1516     while (n > 0)
1517       {
1518         while (1)
1519           {
1520             /* Try matching at position POS.  */
1521             int this_pos = pos;
1522             int this_len = len;
1523             unsigned char *p = pat;
1524
1525             if (pos + len > lim)
1526               goto stop;
1527
1528             while (this_len > 0)
1529               {
1530                 int pat_ch = *p++;
1531                 int buf_ch = FETCH_BYTE (this_pos);
1532                 TRANSLATE (buf_ch, trt, buf_ch);
1533
1534                 if (buf_ch != pat_ch)
1535                   break;
1536
1537                 this_len--;
1538                 this_pos++;
1539               }
1540
1541             if (this_len == 0)
1542               {
1543                 pos += len;
1544                 break;
1545               }
1546
1547             pos++;
1548           }
1549
1550         n--;
1551       }
1552   /* Backwards search.  */
1553   else if (lim < pos && multibyte)
1554     while (n < 0)
1555       {
1556         while (1)
1557           {
1558             /* Try matching at position POS.  */
1559             int this_pos = pos - len;
1560             int this_pos_byte = pos_byte - len_byte;
1561             int this_len = len;
1562             int this_len_byte = len_byte;
1563             unsigned char *p = pat;
1564
1565             if (this_pos < lim || this_pos_byte < lim_byte)
1566               goto stop;
1567
1568             while (this_len > 0)
1569               {
1570                 int charlen, buf_charlen;
1571                 int pat_ch, buf_ch;
1572
1573                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1574                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1575                                                  ZV_BYTE - this_pos_byte,
1576                                                  buf_charlen);
1577                 TRANSLATE (buf_ch, trt, buf_ch);
1578
1579                 if (buf_ch != pat_ch)
1580                   break;
1581
1582                 this_len_byte -= charlen;
1583                 this_len--;
1584                 p += charlen;
1585                 this_pos_byte += buf_charlen;
1586                 this_pos++;
1587               }
1588
1589             if (this_len == 0)
1590               {
1591                 pos -= len;
1592                 pos_byte -= len_byte;
1593                 break;
1594               }
1595
1596             DEC_BOTH (pos, pos_byte);
1597           }
1598
1599         n++;
1600       }
1601   else if (lim < pos)
1602     while (n < 0)
1603       {
1604         while (1)
1605           {
1606             /* Try matching at position POS.  */
1607             int this_pos = pos - len;
1608             int this_len = len;
1609             unsigned char *p = pat;
1610
1611             if (pos - len < lim)
1612               goto stop;
1613
1614             while (this_len > 0)
1615               {
1616                 int pat_ch = *p++;
1617                 int buf_ch = FETCH_BYTE (this_pos);
1618                 TRANSLATE (buf_ch, trt, buf_ch);
1619
1620                 if (buf_ch != pat_ch)
1621                   break;
1622                 this_len--;
1623                 this_pos++;
1624               }
1625
1626             if (this_len == 0)
1627               {
1628                 pos -= len;
1629                 break;
1630               }
1631
1632             pos--;
1633           }
1634
1635         n++;
1636       }
1637
1638  stop:
1639   if (n == 0)
1640     {
1641       if (forward)
1642         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1643       else
1644         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1645
1646       return pos;
1647     }
1648   else if (n > 0)
1649     return -n;
1650   else
1651     return n;
1652 }
1653 \f
1654 /* Do Boyer-Moore search N times for the string BASE_PAT,
1655    whose length is LEN/LEN_BYTE,
1656    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1657    DIRECTION says which direction we search in.
1658    TRT and INVERSE_TRT are translation tables.
1659    Characters in PAT are already translated by TRT.
1660
1661    This kind of search works if all the characters in BASE_PAT that
1662    have nontrivial translation are the same aside from the last byte.
1663    This makes it possible to translate just the last byte of a
1664    character, and do so after just a simple test of the context.
1665    CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1666
1667    If that criterion is not satisfied, do not call this function.  */
1668
1669 static int
1670 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1671              pos, pos_byte, lim, lim_byte, charset_base)
1672      int n;
1673      unsigned char *base_pat;
1674      int len, len_byte;
1675      Lisp_Object trt;
1676      Lisp_Object inverse_trt;
1677      int pos, pos_byte;
1678      int lim, lim_byte;
1679      int charset_base;
1680 {
1681   int direction = ((n > 0) ? 1 : -1);
1682   register int dirlen;
1683   int infinity, limit, stride_for_teases = 0;
1684   register int *BM_tab;
1685   int *BM_tab_base;
1686   register unsigned char *cursor, *p_limit;
1687   register int i, j;
1688   unsigned char *pat, *pat_end;
1689   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1690
1691   unsigned char simple_translate[0400];
1692   /* These are set to the preceding bytes of a byte to be translated
1693      if charset_base is nonzero.  As the maximum byte length of a
1694      multibyte character is 4, we have to check at most three previous
1695      bytes.  */
1696   int translate_prev_byte1 = 0;
1697   int translate_prev_byte2 = 0;
1698   int translate_prev_byte3 = 0;
1699
1700 #ifdef C_ALLOCA
1701   int BM_tab_space[0400];
1702   BM_tab = &BM_tab_space[0];
1703 #else
1704   BM_tab = (int *) alloca (0400 * sizeof (int));
1705 #endif
1706   /* The general approach is that we are going to maintain that we know */
1707   /* the first (closest to the present position, in whatever direction */
1708   /* we're searching) character that could possibly be the last */
1709   /* (furthest from present position) character of a valid match.  We */
1710   /* advance the state of our knowledge by looking at that character */
1711   /* and seeing whether it indeed matches the last character of the */
1712   /* pattern.  If it does, we take a closer look.  If it does not, we */
1713   /* move our pointer (to putative last characters) as far as is */
1714   /* logically possible.  This amount of movement, which I call a */
1715   /* stride, will be the length of the pattern if the actual character */
1716   /* appears nowhere in the pattern, otherwise it will be the distance */
1717   /* from the last occurrence of that character to the end of the */
1718   /* pattern. */
1719   /* As a coding trick, an enormous stride is coded into the table for */
1720   /* characters that match the last character.  This allows use of only */
1721   /* a single test, a test for having gone past the end of the */
1722   /* permissible match region, to test for both possible matches (when */
1723   /* the stride goes past the end immediately) and failure to */
1724   /* match (where you get nudged past the end one stride at a time). */
1725
1726   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1727   /* is determined only by the last character of the putative match. */
1728   /* If that character does not match, we will stride the proper */
1729   /* distance to propose a match that superimposes it on the last */
1730   /* instance of a character that matches it (per trt), or misses */
1731   /* it entirely if there is none. */
1732
1733   dirlen = len_byte * direction;
1734   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1735
1736   /* Record position after the end of the pattern.  */
1737   pat_end = base_pat + len_byte;
1738   /* BASE_PAT points to a character that we start scanning from.
1739      It is the first character in a forward search,
1740      the last character in a backward search.  */
1741   if (direction < 0)
1742     base_pat = pat_end - 1;
1743
1744   BM_tab_base = BM_tab;
1745   BM_tab += 0400;
1746   j = dirlen;           /* to get it in a register */
1747   /* A character that does not appear in the pattern induces a */
1748   /* stride equal to the pattern length. */
1749   while (BM_tab_base != BM_tab)
1750     {
1751       *--BM_tab = j;
1752       *--BM_tab = j;
1753       *--BM_tab = j;
1754       *--BM_tab = j;
1755     }
1756
1757   /* We use this for translation, instead of TRT itself.
1758      We fill this in to handle the characters that actually
1759      occur in the pattern.  Others don't matter anyway!  */
1760   bzero (simple_translate, sizeof simple_translate);
1761   for (i = 0; i < 0400; i++)
1762     simple_translate[i] = i;
1763
1764   if (charset_base)
1765     {
1766       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1767          byte following them are the target of translation.  */
1768       int sample_char = charset_base | 0x20;
1769       unsigned char str[MAX_MULTIBYTE_LENGTH];
1770       int len = CHAR_STRING (sample_char, str);
1771
1772       translate_prev_byte1 = str[len - 2];
1773       if (len > 2)
1774         {
1775           translate_prev_byte2 = str[len - 3];
1776           if (len > 3)
1777             translate_prev_byte3 = str[len - 4];
1778         }
1779     }
1780
1781   i = 0;
1782   while (i != infinity)
1783     {
1784       unsigned char *ptr = base_pat + i;
1785       i += direction;
1786       if (i == dirlen)
1787         i = infinity;
1788       if (! NILP (trt))
1789         {
1790           /* If the byte currently looking at is the last of a
1791              character to check case-equivalents, set CH to that
1792              character.  An ASCII character and a non-ASCII character
1793              matching with CHARSET_BASE are to be checked.  */
1794           int ch = -1;
1795
1796           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1797             ch = *ptr;
1798           else if (charset_base
1799                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1800             {
1801               unsigned char *charstart = ptr - 1;
1802
1803               while (! (CHAR_HEAD_P (*charstart)))
1804                 charstart--;
1805               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1806               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1807                 ch = -1;
1808             }
1809
1810           if (ch >= 0400)
1811             j = ((unsigned char) ch) | 0200;
1812           else
1813             j = *ptr;
1814
1815           if (i == infinity)
1816             stride_for_teases = BM_tab[j];
1817
1818           BM_tab[j] = dirlen - i;
1819           /* A translation table is accompanied by its inverse -- see */
1820           /* comment following downcase_table for details */
1821           if (ch >= 0)
1822             {
1823               int starting_ch = ch;
1824               int starting_j = j;
1825
1826               while (1)
1827                 {
1828                   TRANSLATE (ch, inverse_trt, ch);
1829                   if (ch >= 0400)
1830                     j = ((unsigned char) ch) | 0200;
1831                   else
1832                     j = (unsigned char) ch;
1833
1834                   /* For all the characters that map into CH,
1835                      set up simple_translate to map the last byte
1836                      into STARTING_J.  */
1837                   simple_translate[j] = starting_j;
1838                   if (ch == starting_ch)
1839                     break;
1840                   BM_tab[j] = dirlen - i;
1841                 }
1842             }
1843         }
1844       else
1845         {
1846           j = *ptr;
1847
1848           if (i == infinity)
1849             stride_for_teases = BM_tab[j];
1850           BM_tab[j] = dirlen - i;
1851         }
1852       /* stride_for_teases tells how much to stride if we get a */
1853       /* match on the far character but are subsequently */
1854       /* disappointed, by recording what the stride would have been */
1855       /* for that character if the last character had been */
1856       /* different. */
1857     }
1858   infinity = dirlen - infinity;
1859   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1860   /* loop invariant - POS_BYTE points at where last char (first
1861      char if reverse) of pattern would align in a possible match.  */
1862   while (n != 0)
1863     {
1864       int tail_end;
1865       unsigned char *tail_end_ptr;
1866
1867       /* It's been reported that some (broken) compiler thinks that
1868          Boolean expressions in an arithmetic context are unsigned.
1869          Using an explicit ?1:0 prevents this.  */
1870       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1871           < 0)
1872         return (n * (0 - direction));
1873       /* First we do the part we can by pointers (maybe nothing) */
1874       QUIT;
1875       pat = base_pat;
1876       limit = pos_byte - dirlen + direction;
1877       if (direction > 0)
1878         {
1879           limit = BUFFER_CEILING_OF (limit);
1880           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1881              can take on without hitting edge of buffer or the gap.  */
1882           limit = min (limit, pos_byte + 20000);
1883           limit = min (limit, lim_byte - 1);
1884         }
1885       else
1886         {
1887           limit = BUFFER_FLOOR_OF (limit);
1888           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1889              can take on without hitting edge of buffer or the gap.  */
1890           limit = max (limit, pos_byte - 20000);
1891           limit = max (limit, lim_byte);
1892         }
1893       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1894       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1895
1896       if ((limit - pos_byte) * direction > 20)
1897         {
1898           unsigned char *p2;
1899
1900           p_limit = BYTE_POS_ADDR (limit);
1901           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1902           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1903           while (1)             /* use one cursor setting as long as i can */
1904             {
1905               if (direction > 0) /* worth duplicating */
1906                 {
1907                   /* Use signed comparison if appropriate
1908                      to make cursor+infinity sure to be > p_limit.
1909                      Assuming that the buffer lies in a range of addresses
1910                      that are all "positive" (as ints) or all "negative",
1911                      either kind of comparison will work as long
1912                      as we don't step by infinity.  So pick the kind
1913                      that works when we do step by infinity.  */
1914                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1915                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1916                       cursor += BM_tab[*cursor];
1917                   else
1918                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1919                       cursor += BM_tab[*cursor];
1920                 }
1921               else
1922                 {
1923                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1924                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1925                       cursor += BM_tab[*cursor];
1926                   else
1927                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1928                       cursor += BM_tab[*cursor];
1929                 }
1930 /* If you are here, cursor is beyond the end of the searched region. */
1931 /* This can happen if you match on the far character of the pattern, */
1932 /* because the "stride" of that character is infinity, a number able */
1933 /* to throw you well beyond the end of the search.  It can also */
1934 /* happen if you fail to match within the permitted region and would */
1935 /* otherwise try a character beyond that region */
1936               if ((cursor - p_limit) * direction <= len_byte)
1937                 break;  /* a small overrun is genuine */
1938               cursor -= infinity; /* large overrun = hit */
1939               i = dirlen - direction;
1940               if (! NILP (trt))
1941                 {
1942                   while ((i -= direction) + direction != 0)
1943                     {
1944                       int ch;
1945                       cursor -= direction;
1946                       /* Translate only the last byte of a character.  */
1947                       if (! multibyte
1948                           || ((cursor == tail_end_ptr
1949                                || CHAR_HEAD_P (cursor[1]))
1950                               && (CHAR_HEAD_P (cursor[0])
1951                                   /* Check if this is the last byte of
1952                                      a translable character.  */
1953                                   || (translate_prev_byte1 == cursor[-1]
1954                                       && (CHAR_HEAD_P (translate_prev_byte1)
1955                                           || (translate_prev_byte2 == cursor[-2]
1956                                               && (CHAR_HEAD_P (translate_prev_byte2)
1957                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1958                         ch = simple_translate[*cursor];
1959                       else
1960                         ch = *cursor;
1961                       if (pat[i] != ch)
1962                         break;
1963                     }
1964                 }
1965               else
1966                 {
1967                   while ((i -= direction) + direction != 0)
1968                     {
1969                       cursor -= direction;
1970                       if (pat[i] != *cursor)
1971                         break;
1972                     }
1973                 }
1974               cursor += dirlen - i - direction; /* fix cursor */
1975               if (i + direction == 0)
1976                 {
1977                   int position, start, end;
1978
1979                   cursor -= direction;
1980
1981                   position = pos_byte + cursor - p2 + ((direction > 0)
1982                                                        ? 1 - len_byte : 0);
1983                   set_search_regs (position, len_byte);
1984
1985                   if (NILP (Vinhibit_changing_match_data))
1986                     {
1987                       start = search_regs.start[0];
1988                       end = search_regs.end[0];
1989                     }
1990                   else
1991                     /* If Vinhibit_changing_match_data is non-nil,
1992                        search_regs will not be changed.  So let's
1993                        compute start and end here.  */
1994                     {
1995                       start = BYTE_TO_CHAR (position);
1996                       end = BYTE_TO_CHAR (position + len_byte);
1997                     }
1998
1999                   if ((n -= direction) != 0)
2000                     cursor += dirlen; /* to resume search */
2001                   else
2002                     return direction > 0 ? end : start;
2003                 }
2004               else
2005                 cursor += stride_for_teases; /* <sigh> we lose -  */
2006             }
2007           pos_byte += cursor - p2;
2008         }
2009       else
2010         /* Now we'll pick up a clump that has to be done the hard */
2011         /* way because it covers a discontinuity */
2012         {
2013           limit = ((direction > 0)
2014                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2015                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2016           limit = ((direction > 0)
2017                    ? min (limit + len_byte, lim_byte - 1)
2018                    : max (limit - len_byte, lim_byte));
2019           /* LIMIT is now the last value POS_BYTE can have
2020              and still be valid for a possible match.  */
2021           while (1)
2022             {
2023               /* This loop can be coded for space rather than */
2024               /* speed because it will usually run only once. */
2025               /* (the reach is at most len + 21, and typically */
2026               /* does not exceed len) */
2027               while ((limit - pos_byte) * direction >= 0)
2028                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2029               /* now run the same tests to distinguish going off the */
2030               /* end, a match or a phony match. */
2031               if ((pos_byte - limit) * direction <= len_byte)
2032                 break;  /* ran off the end */
2033               /* Found what might be a match.
2034                  Set POS_BYTE back to last (first if reverse) pos.  */
2035               pos_byte -= infinity;
2036               i = dirlen - direction;
2037               while ((i -= direction) + direction != 0)
2038                 {
2039                   int ch;
2040                   unsigned char *ptr;
2041                   pos_byte -= direction;
2042                   ptr = BYTE_POS_ADDR (pos_byte);
2043                   /* Translate only the last byte of a character.  */
2044                   if (! multibyte
2045                       || ((ptr == tail_end_ptr
2046                            || CHAR_HEAD_P (ptr[1]))
2047                           && (CHAR_HEAD_P (ptr[0])
2048                               /* Check if this is the last byte of a
2049                                  translable character.  */
2050                               || (translate_prev_byte1 == ptr[-1]
2051                                   && (CHAR_HEAD_P (translate_prev_byte1)
2052                                       || (translate_prev_byte2 == ptr[-2]
2053                                           && (CHAR_HEAD_P (translate_prev_byte2)
2054                                               || translate_prev_byte3 == ptr[-3])))))))
2055                     ch = simple_translate[*ptr];
2056                   else
2057                     ch = *ptr;
2058                   if (pat[i] != ch)
2059                     break;
2060                 }
2061               /* Above loop has moved POS_BYTE part or all the way
2062                  back to the first pos (last pos if reverse).
2063                  Set it once again at the last (first if reverse) char.  */
2064               pos_byte += dirlen - i- direction;
2065               if (i + direction == 0)
2066                 {
2067                   int position, start, end;
2068                   pos_byte -= direction;
2069
2070                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2071                   set_search_regs (position, len_byte);
2072
2073                   if (NILP (Vinhibit_changing_match_data))
2074                     {
2075                       start = search_regs.start[0];
2076                       end = search_regs.end[0];
2077                     }
2078                   else
2079                     /* If Vinhibit_changing_match_data is non-nil,
2080                        search_regs will not be changed.  So let's
2081                        compute start and end here.  */
2082                     {
2083                       start = BYTE_TO_CHAR (position);
2084                       end = BYTE_TO_CHAR (position + len_byte);
2085                     }
2086
2087                   if ((n -= direction) != 0)
2088                     pos_byte += dirlen; /* to resume search */
2089                   else
2090                     return direction > 0 ? end : start;
2091                 }
2092               else
2093                 pos_byte += stride_for_teases;
2094             }
2095           }
2096       /* We have done one clump.  Can we continue? */
2097       if ((lim_byte - pos_byte) * direction < 0)
2098         return ((0 - n) * direction);
2099     }
2100   return BYTE_TO_CHAR (pos_byte);
2101 }
2102
2103 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2104    for the overall match just found in the current buffer.
2105    Also clear out the match data for registers 1 and up.  */
2106
2107 static void
2108 set_search_regs (beg_byte, nbytes)
2109      int beg_byte, nbytes;
2110 {
2111   int i;
2112
2113   if (!NILP (Vinhibit_changing_match_data))
2114     return;
2115
2116   /* Make sure we have registers in which to store
2117      the match position.  */
2118   if (search_regs.num_regs == 0)
2119     {
2120       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2121       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2122       search_regs.num_regs = 2;
2123     }
2124
2125   /* Clear out the other registers.  */
2126   for (i = 1; i < search_regs.num_regs; i++)
2127     {
2128       search_regs.start[i] = -1;
2129       search_regs.end[i] = -1;
2130     }
2131
2132   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2133   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2134   XSETBUFFER (last_thing_searched, current_buffer);
2135 }
2136 \f
2137 /* Given a string of words separated by word delimiters,
2138   compute a regexp that matches those exact words
2139   separated by arbitrary punctuation.  */
2140
2141 static Lisp_Object
2142 wordify (string)
2143      Lisp_Object string;
2144 {
2145   register unsigned char *p, *o;
2146   register int i, i_byte, len, punct_count = 0, word_count = 0;
2147   Lisp_Object val;
2148   int prev_c = 0;
2149   int adjust;
2150
2151   CHECK_STRING (string);
2152   p = SDATA (string);
2153   len = SCHARS (string);
2154
2155   for (i = 0, i_byte = 0; i < len; )
2156     {
2157       int c;
2158
2159       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2160
2161       if (SYNTAX (c) != Sword)
2162         {
2163           punct_count++;
2164           if (i > 0 && SYNTAX (prev_c) == Sword)
2165             word_count++;
2166         }
2167
2168       prev_c = c;
2169     }
2170
2171   if (SYNTAX (prev_c) == Sword)
2172     word_count++;
2173   if (!word_count)
2174     return empty_unibyte_string;
2175
2176   adjust = - punct_count + 5 * (word_count - 1) + 4;
2177   if (STRING_MULTIBYTE (string))
2178     val = make_uninit_multibyte_string (len + adjust,
2179                                         SBYTES (string)
2180                                         + adjust);
2181   else
2182     val = make_uninit_string (len + adjust);
2183
2184   o = SDATA (val);
2185   *o++ = '\\';
2186   *o++ = 'b';
2187   prev_c = 0;
2188
2189   for (i = 0, i_byte = 0; i < len; )
2190     {
2191       int c;
2192       int i_byte_orig = i_byte;
2193
2194       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2195
2196       if (SYNTAX (c) == Sword)
2197         {
2198           bcopy (SDATA (string) + i_byte_orig, o,
2199                  i_byte - i_byte_orig);
2200           o += i_byte - i_byte_orig;
2201         }
2202       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2203         {
2204           *o++ = '\\';
2205           *o++ = 'W';
2206           *o++ = '\\';
2207           *o++ = 'W';
2208           *o++ = '*';
2209         }
2210
2211       prev_c = c;
2212     }
2213
2214   *o++ = '\\';
2215   *o++ = 'b';
2216
2217   return val;
2218 }
2219 \f
2220 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2221        "MSearch backward: ",
2222        doc: /* Search backward from point for STRING.
2223 Set point to the beginning of the occurrence found, and return point.
2224 An optional second argument bounds the search; it is a buffer position.
2225 The match found must not extend before that position.
2226 Optional third argument, if t, means if fail just return nil (no error).
2227  If not nil and not t, position at limit of search and return nil.
2228 Optional fourth argument is repeat count--search for successive occurrences.
2229
2230 Search case-sensitivity is determined by the value of the variable
2231 `case-fold-search', which see.
2232
2233 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2234      (string, bound, noerror, count)
2235      Lisp_Object string, bound, noerror, count;
2236 {
2237   return search_command (string, bound, noerror, count, -1, 0, 0);
2238 }
2239
2240 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2241        doc: /* Search forward from point for STRING.
2242 Set point to the end of the occurrence found, and return point.
2243 An optional second argument bounds the search; it is a buffer position.
2244 The match found must not extend after that position.  A value of nil is
2245   equivalent to (point-max).
2246 Optional third argument, if t, means if fail just return nil (no error).
2247   If not nil and not t, move to limit of search and return nil.
2248 Optional fourth argument is repeat count--search for successive occurrences.
2249
2250 Search case-sensitivity is determined by the value of the variable
2251 `case-fold-search', which see.
2252
2253 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2254      (string, bound, noerror, count)
2255      Lisp_Object string, bound, noerror, count;
2256 {
2257   return search_command (string, bound, noerror, count, 1, 0, 0);
2258 }
2259
2260 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2261        "sWord search backward: ",
2262        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2263 Set point to the beginning of the occurrence found, and return point.
2264 An optional second argument bounds the search; it is a buffer position.
2265 The match found must not extend before that position.
2266 Optional third argument, if t, means if fail just return nil (no error).
2267   If not nil and not t, move to limit of search and return nil.
2268 Optional fourth argument is repeat count--search for successive occurrences.  */)
2269      (string, bound, noerror, count)
2270      Lisp_Object string, bound, noerror, count;
2271 {
2272   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2273 }
2274
2275 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2276        "sWord search: ",
2277        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2278 Set point to the end of the occurrence found, and return point.
2279 An optional second argument bounds the search; it is a buffer position.
2280 The match found must not extend after that position.
2281 Optional third argument, if t, means if fail just return nil (no error).
2282   If not nil and not t, move to limit of search and return nil.
2283 Optional fourth argument is repeat count--search for successive occurrences.  */)
2284      (string, bound, noerror, count)
2285      Lisp_Object string, bound, noerror, count;
2286 {
2287   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2288 }
2289
2290 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2291        "sRE search backward: ",
2292        doc: /* Search backward from point for match for regular expression REGEXP.
2293 Set point to the beginning of the match, and return point.
2294 The match found is the one starting last in the buffer
2295 and yet ending before the origin of the search.
2296 An optional second argument bounds the search; it is a buffer position.
2297 The match found must start at or after that position.
2298 Optional third argument, if t, means if fail just return nil (no error).
2299   If not nil and not t, move to limit of search and return nil.
2300 Optional fourth argument is repeat count--search for successive occurrences.
2301 See also the functions `match-beginning', `match-end', `match-string',
2302 and `replace-match'.  */)
2303      (regexp, bound, noerror, count)
2304      Lisp_Object regexp, bound, noerror, count;
2305 {
2306   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2307 }
2308
2309 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2310        "sRE search: ",
2311        doc: /* Search forward from point for regular expression REGEXP.
2312 Set point to the end of the occurrence found, and return point.
2313 An optional second argument bounds the search; it is a buffer position.
2314 The match found must not extend after that position.
2315 Optional third argument, if t, means if fail just return nil (no error).
2316   If not nil and not t, move to limit of search and return nil.
2317 Optional fourth argument is repeat count--search for successive occurrences.
2318 See also the functions `match-beginning', `match-end', `match-string',
2319 and `replace-match'.  */)
2320      (regexp, bound, noerror, count)
2321      Lisp_Object regexp, bound, noerror, count;
2322 {
2323   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2324 }
2325
2326 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2327        "sPosix search backward: ",
2328        doc: /* Search backward from point for match for regular expression REGEXP.
2329 Find the longest match in accord with Posix regular expression rules.
2330 Set point to the beginning of the match, and return point.
2331 The match found is the one starting last in the buffer
2332 and yet ending before the origin of the search.
2333 An optional second argument bounds the search; it is a buffer position.
2334 The match found must start at or after that position.
2335 Optional third argument, if t, means if fail just return nil (no error).
2336   If not nil and not t, move to limit of search and return nil.
2337 Optional fourth argument is repeat count--search for successive occurrences.
2338 See also the functions `match-beginning', `match-end', `match-string',
2339 and `replace-match'.  */)
2340      (regexp, bound, noerror, count)
2341      Lisp_Object regexp, bound, noerror, count;
2342 {
2343   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2344 }
2345
2346 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2347        "sPosix search: ",
2348        doc: /* Search forward from point for regular expression REGEXP.
2349 Find the longest match in accord with Posix regular expression rules.
2350 Set point to the end of the occurrence found, and return point.
2351 An optional second argument bounds the search; it is a buffer position.
2352 The match found must not extend after that position.
2353 Optional third argument, if t, means if fail just return nil (no error).
2354   If not nil and not t, move to limit of search and return nil.
2355 Optional fourth argument is repeat count--search for successive occurrences.
2356 See also the functions `match-beginning', `match-end', `match-string',
2357 and `replace-match'.  */)
2358      (regexp, bound, noerror, count)
2359      Lisp_Object regexp, bound, noerror, count;
2360 {
2361   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2362 }
2363 \f
2364 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2365        doc: /* Replace text matched by last search with NEWTEXT.
2366 Leave point at the end of the replacement text.
2367
2368 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2369 Otherwise maybe capitalize the whole text, or maybe just word initials,
2370 based on the replaced text.
2371 If the replaced text has only capital letters
2372 and has at least one multiletter word, convert NEWTEXT to all caps.
2373 Otherwise if all words are capitalized in the replaced text,
2374 capitalize each word in NEWTEXT.
2375
2376 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2377 Otherwise treat `\\' as special:
2378   `\\&' in NEWTEXT means substitute original matched text.
2379   `\\N' means substitute what matched the Nth `\\(...\\)'.
2380        If Nth parens didn't match, substitute nothing.
2381   `\\\\' means insert one `\\'.
2382 Case conversion does not apply to these substitutions.
2383
2384 FIXEDCASE and LITERAL are optional arguments.
2385
2386 The optional fourth argument STRING can be a string to modify.
2387 This is meaningful when the previous match was done against STRING,
2388 using `string-match'.  When used this way, `replace-match'
2389 creates and returns a new string made by copying STRING and replacing
2390 the part of STRING that was matched.
2391
2392 The optional fifth argument SUBEXP specifies a subexpression;
2393 it says to replace just that subexpression with NEWTEXT,
2394 rather than replacing the entire matched text.
2395 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2396 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2397 NEWTEXT in place of subexp N.
2398 This is useful only after a regular expression search or match,
2399 since only regular expressions have distinguished subexpressions.  */)
2400      (newtext, fixedcase, literal, string, subexp)
2401      Lisp_Object newtext, fixedcase, literal, string, subexp;
2402 {
2403   enum { nochange, all_caps, cap_initial } case_action;
2404   register int pos, pos_byte;
2405   int some_multiletter_word;
2406   int some_lowercase;
2407   int some_uppercase;
2408   int some_nonuppercase_initial;
2409   register int c, prevc;
2410   int sub;
2411   int opoint, newpoint;
2412
2413   CHECK_STRING (newtext);
2414
2415   if (! NILP (string))
2416     CHECK_STRING (string);
2417
2418   case_action = nochange;       /* We tried an initialization */
2419                                 /* but some C compilers blew it */
2420
2421   if (search_regs.num_regs <= 0)
2422     error ("`replace-match' called before any match found");
2423
2424   if (NILP (subexp))
2425     sub = 0;
2426   else
2427     {
2428       CHECK_NUMBER (subexp);
2429       sub = XINT (subexp);
2430       if (sub < 0 || sub >= search_regs.num_regs)
2431         args_out_of_range (subexp, make_number (search_regs.num_regs));
2432     }
2433
2434   if (NILP (string))
2435     {
2436       if (search_regs.start[sub] < BEGV
2437           || search_regs.start[sub] > search_regs.end[sub]
2438           || search_regs.end[sub] > ZV)
2439         args_out_of_range (make_number (search_regs.start[sub]),
2440                            make_number (search_regs.end[sub]));
2441     }
2442   else
2443     {
2444       if (search_regs.start[sub] < 0
2445           || search_regs.start[sub] > search_regs.end[sub]
2446           || search_regs.end[sub] > SCHARS (string))
2447         args_out_of_range (make_number (search_regs.start[sub]),
2448                            make_number (search_regs.end[sub]));
2449     }
2450
2451   if (NILP (fixedcase))
2452     {
2453       /* Decide how to casify by examining the matched text. */
2454       int last;
2455
2456       pos = search_regs.start[sub];
2457       last = search_regs.end[sub];
2458
2459       if (NILP (string))
2460         pos_byte = CHAR_TO_BYTE (pos);
2461       else
2462         pos_byte = string_char_to_byte (string, pos);
2463
2464       prevc = '\n';
2465       case_action = all_caps;
2466
2467       /* some_multiletter_word is set nonzero if any original word
2468          is more than one letter long. */
2469       some_multiletter_word = 0;
2470       some_lowercase = 0;
2471       some_nonuppercase_initial = 0;
2472       some_uppercase = 0;
2473
2474       while (pos < last)
2475         {
2476           if (NILP (string))
2477             {
2478               c = FETCH_CHAR (pos_byte);
2479               INC_BOTH (pos, pos_byte);
2480             }
2481           else
2482             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2483
2484           if (LOWERCASEP (c))
2485             {
2486               /* Cannot be all caps if any original char is lower case */
2487
2488               some_lowercase = 1;
2489               if (SYNTAX (prevc) != Sword)
2490                 some_nonuppercase_initial = 1;
2491               else
2492                 some_multiletter_word = 1;
2493             }
2494           else if (UPPERCASEP (c))
2495             {
2496               some_uppercase = 1;
2497               if (SYNTAX (prevc) != Sword)
2498                 ;
2499               else
2500                 some_multiletter_word = 1;
2501             }
2502           else
2503             {
2504               /* If the initial is a caseless word constituent,
2505                  treat that like a lowercase initial.  */
2506               if (SYNTAX (prevc) != Sword)
2507                 some_nonuppercase_initial = 1;
2508             }
2509
2510           prevc = c;
2511         }
2512
2513       /* Convert to all caps if the old text is all caps
2514          and has at least one multiletter word.  */
2515       if (! some_lowercase && some_multiletter_word)
2516         case_action = all_caps;
2517       /* Capitalize each word, if the old text has all capitalized words.  */
2518       else if (!some_nonuppercase_initial && some_multiletter_word)
2519         case_action = cap_initial;
2520       else if (!some_nonuppercase_initial && some_uppercase)
2521         /* Should x -> yz, operating on X, give Yz or YZ?
2522            We'll assume the latter.  */
2523         case_action = all_caps;
2524       else
2525         case_action = nochange;
2526     }
2527
2528   /* Do replacement in a string.  */
2529   if (!NILP (string))
2530     {
2531       Lisp_Object before, after;
2532
2533       before = Fsubstring (string, make_number (0),
2534                            make_number (search_regs.start[sub]));
2535       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2536
2537       /* Substitute parts of the match into NEWTEXT
2538          if desired.  */
2539       if (NILP (literal))
2540         {
2541           int lastpos = 0;
2542           int lastpos_byte = 0;
2543           /* We build up the substituted string in ACCUM.  */
2544           Lisp_Object accum;
2545           Lisp_Object middle;
2546           int length = SBYTES (newtext);
2547
2548           accum = Qnil;
2549
2550           for (pos_byte = 0, pos = 0; pos_byte < length;)
2551             {
2552               int substart = -1;
2553               int subend = 0;
2554               int delbackslash = 0;
2555
2556               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2557
2558               if (c == '\\')
2559                 {
2560                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2561
2562                   if (c == '&')
2563                     {
2564                       substart = search_regs.start[sub];
2565                       subend = search_regs.end[sub];
2566                     }
2567                   else if (c >= '1' && c <= '9')
2568                     {
2569                       if (search_regs.start[c - '0'] >= 0
2570                           && c <= search_regs.num_regs + '0')
2571                         {
2572                           substart = search_regs.start[c - '0'];
2573                           subend = search_regs.end[c - '0'];
2574                         }
2575                       else
2576                         {
2577                           /* If that subexp did not match,
2578                              replace \\N with nothing.  */
2579                           substart = 0;
2580                           subend = 0;
2581                         }
2582                     }
2583                   else if (c == '\\')
2584                     delbackslash = 1;
2585                   else
2586                     error ("Invalid use of `\\' in replacement text");
2587                 }
2588               if (substart >= 0)
2589                 {
2590                   if (pos - 2 != lastpos)
2591                     middle = substring_both (newtext, lastpos,
2592                                              lastpos_byte,
2593                                              pos - 2, pos_byte - 2);
2594                   else
2595                     middle = Qnil;
2596                   accum = concat3 (accum, middle,
2597                                    Fsubstring (string,
2598                                                make_number (substart),
2599                                                make_number (subend)));
2600                   lastpos = pos;
2601                   lastpos_byte = pos_byte;
2602                 }
2603               else if (delbackslash)
2604                 {
2605                   middle = substring_both (newtext, lastpos,
2606                                            lastpos_byte,
2607                                            pos - 1, pos_byte - 1);
2608
2609                   accum = concat2 (accum, middle);
2610                   lastpos = pos;
2611                   lastpos_byte = pos_byte;
2612                 }
2613             }
2614
2615           if (pos != lastpos)
2616             middle = substring_both (newtext, lastpos,
2617                                      lastpos_byte,
2618                                      pos, pos_byte);
2619           else
2620             middle = Qnil;
2621
2622           newtext = concat2 (accum, middle);
2623         }
2624
2625       /* Do case substitution in NEWTEXT if desired.  */
2626       if (case_action == all_caps)
2627         newtext = Fupcase (newtext);
2628       else if (case_action == cap_initial)
2629         newtext = Fupcase_initials (newtext);
2630
2631       return concat3 (before, newtext, after);
2632     }
2633
2634   /* Record point, then move (quietly) to the start of the match.  */
2635   if (PT >= search_regs.end[sub])
2636     opoint = PT - ZV;
2637   else if (PT > search_regs.start[sub])
2638     opoint = search_regs.end[sub] - ZV;
2639   else
2640     opoint = PT;
2641
2642   /* If we want non-literal replacement,
2643      perform substitution on the replacement string.  */
2644   if (NILP (literal))
2645     {
2646       int length = SBYTES (newtext);
2647       unsigned char *substed;
2648       int substed_alloc_size, substed_len;
2649       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2650       int str_multibyte = STRING_MULTIBYTE (newtext);
2651       Lisp_Object rev_tbl;
2652       int really_changed = 0;
2653
2654       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2655                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2656                                           make_number (0))
2657                 : Qnil);
2658
2659       substed_alloc_size = length * 2 + 100;
2660       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2661       substed_len = 0;
2662
2663       /* Go thru NEWTEXT, producing the actual text to insert in
2664          SUBSTED while adjusting multibyteness to that of the current
2665          buffer.  */
2666
2667       for (pos_byte = 0, pos = 0; pos_byte < length;)
2668         {
2669           unsigned char str[MAX_MULTIBYTE_LENGTH];
2670           unsigned char *add_stuff = NULL;
2671           int add_len = 0;
2672           int idx = -1;
2673
2674           if (str_multibyte)
2675             {
2676               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2677               if (!buf_multibyte)
2678                 c = multibyte_char_to_unibyte (c, rev_tbl);
2679             }
2680           else
2681             {
2682               /* Note that we don't have to increment POS.  */
2683               c = SREF (newtext, pos_byte++);
2684               if (buf_multibyte)
2685                 c = unibyte_char_to_multibyte (c);
2686             }
2687
2688           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2689              or set IDX to a match index, which means put that part
2690              of the buffer text into SUBSTED.  */
2691
2692           if (c == '\\')
2693             {
2694               really_changed = 1;
2695
2696               if (str_multibyte)
2697                 {
2698                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2699                                                       pos, pos_byte);
2700                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2701                     c = multibyte_char_to_unibyte (c, rev_tbl);
2702                 }
2703               else
2704                 {
2705                   c = SREF (newtext, pos_byte++);
2706                   if (buf_multibyte)
2707                     c = unibyte_char_to_multibyte (c);
2708                 }
2709
2710               if (c == '&')
2711                 idx = sub;
2712               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2713                 {
2714                   if (search_regs.start[c - '0'] >= 1)
2715                     idx = c - '0';
2716                 }
2717               else if (c == '\\')
2718                 add_len = 1, add_stuff = "\\";
2719               else
2720                 {
2721                   xfree (substed);
2722                   error ("Invalid use of `\\' in replacement text");
2723                 }
2724             }
2725           else
2726             {
2727               add_len = CHAR_STRING (c, str);
2728               add_stuff = str;
2729             }
2730
2731           /* If we want to copy part of a previous match,
2732              set up ADD_STUFF and ADD_LEN to point to it.  */
2733           if (idx >= 0)
2734             {
2735               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2736               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2737               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2738                 move_gap (search_regs.start[idx]);
2739               add_stuff = BYTE_POS_ADDR (begbyte);
2740             }
2741
2742           /* Now the stuff we want to add to SUBSTED
2743              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2744
2745           /* Make sure SUBSTED is big enough.  */
2746           if (substed_len + add_len >= substed_alloc_size)
2747             {
2748               substed_alloc_size = substed_len + add_len + 500;
2749               substed = (unsigned char *) xrealloc (substed,
2750                                                     substed_alloc_size + 1);
2751             }
2752
2753           /* Now add to the end of SUBSTED.  */
2754           if (add_stuff)
2755             {
2756               bcopy (add_stuff, substed + substed_len, add_len);
2757               substed_len += add_len;
2758             }
2759         }
2760
2761       if (really_changed)
2762         {
2763           if (buf_multibyte)
2764             {
2765               int nchars = multibyte_chars_in_text (substed, substed_len);
2766
2767               newtext = make_multibyte_string (substed, nchars, substed_len);
2768             }
2769           else
2770             newtext = make_unibyte_string (substed, substed_len);
2771         }
2772       xfree (substed);
2773     }
2774
2775   /* Replace the old text with the new in the cleanest possible way.  */
2776   replace_range (search_regs.start[sub], search_regs.end[sub],
2777                  newtext, 1, 0, 1);
2778   newpoint = search_regs.start[sub] + SCHARS (newtext);
2779
2780   if (case_action == all_caps)
2781     Fupcase_region (make_number (search_regs.start[sub]),
2782                     make_number (newpoint));
2783   else if (case_action == cap_initial)
2784     Fupcase_initials_region (make_number (search_regs.start[sub]),
2785                              make_number (newpoint));
2786
2787   /* Adjust search data for this change.  */
2788   {
2789     int oldend = search_regs.end[sub];
2790     int oldstart = search_regs.start[sub];
2791     int change = newpoint - search_regs.end[sub];
2792     int i;
2793
2794     for (i = 0; i < search_regs.num_regs; i++)
2795       {
2796         if (search_regs.start[i] >= oldend)
2797           search_regs.start[i] += change;
2798         else if (search_regs.start[i] > oldstart)
2799           search_regs.start[i] = oldstart;
2800         if (search_regs.end[i] >= oldend)
2801           search_regs.end[i] += change;
2802         else if (search_regs.end[i] > oldstart)
2803           search_regs.end[i] = oldstart;
2804       }
2805   }
2806
2807   /* Put point back where it was in the text.  */
2808   if (opoint <= 0)
2809     TEMP_SET_PT (opoint + ZV);
2810   else
2811     TEMP_SET_PT (opoint);
2812
2813   /* Now move point "officially" to the start of the inserted replacement.  */
2814   move_if_not_intangible (newpoint);
2815
2816   return Qnil;
2817 }
2818 \f
2819 static Lisp_Object
2820 match_limit (num, beginningp)
2821      Lisp_Object num;
2822      int beginningp;
2823 {
2824   register int n;
2825
2826   CHECK_NUMBER (num);
2827   n = XINT (num);
2828   if (n < 0)
2829     args_out_of_range (num, make_number (0));
2830   if (search_regs.num_regs <= 0)
2831     error ("No match data, because no search succeeded");
2832   if (n >= search_regs.num_regs
2833       || search_regs.start[n] < 0)
2834     return Qnil;
2835   return (make_number ((beginningp) ? search_regs.start[n]
2836                                     : search_regs.end[n]));
2837 }
2838
2839 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2840        doc: /* Return position of start of text matched by last search.
2841 SUBEXP, a number, specifies which parenthesized expression in the last
2842   regexp.
2843 Value is nil if SUBEXPth pair didn't match, or there were less than
2844   SUBEXP pairs.
2845 Zero means the entire text matched by the whole regexp or whole string.  */)
2846      (subexp)
2847      Lisp_Object subexp;
2848 {
2849   return match_limit (subexp, 1);
2850 }
2851
2852 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2853        doc: /* Return position of end of text matched by last search.
2854 SUBEXP, a number, specifies which parenthesized expression in the last
2855   regexp.
2856 Value is nil if SUBEXPth pair didn't match, or there were less than
2857   SUBEXP pairs.
2858 Zero means the entire text matched by the whole regexp or whole string.  */)
2859      (subexp)
2860      Lisp_Object subexp;
2861 {
2862   return match_limit (subexp, 0);
2863 }
2864
2865 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2866        doc: /* Return a list containing all info on what the last search matched.
2867 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2868 All the elements are markers or nil (nil if the Nth pair didn't match)
2869 if the last match was on a buffer; integers or nil if a string was matched.
2870 Use `store-match-data' to reinstate the data in this list.
2871
2872 If INTEGERS (the optional first argument) is non-nil, always use
2873 integers \(rather than markers) to represent buffer positions.  In
2874 this case, and if the last match was in a buffer, the buffer will get
2875 stored as one additional element at the end of the list.
2876
2877 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2878 enough to hold all the values, and if INTEGERS is non-nil, no consing
2879 is done.
2880
2881 If optional third arg RESEAT is non-nil, any previous markers on the
2882 REUSE list will be modified to point to nowhere.
2883
2884 Return value is undefined if the last search failed.  */)
2885   (integers, reuse, reseat)
2886      Lisp_Object integers, reuse, reseat;
2887 {
2888   Lisp_Object tail, prev;
2889   Lisp_Object *data;
2890   int i, len;
2891
2892   if (!NILP (reseat))
2893     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2894       if (MARKERP (XCAR (tail)))
2895         {
2896           unchain_marker (XMARKER (XCAR (tail)));
2897           XSETCAR (tail, Qnil);
2898         }
2899
2900   if (NILP (last_thing_searched))
2901     return Qnil;
2902
2903   prev = Qnil;
2904
2905   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2906                                  * sizeof (Lisp_Object));
2907
2908   len = 0;
2909   for (i = 0; i < search_regs.num_regs; i++)
2910     {
2911       int start = search_regs.start[i];
2912       if (start >= 0)
2913         {
2914           if (EQ (last_thing_searched, Qt)
2915               || ! NILP (integers))
2916             {
2917               XSETFASTINT (data[2 * i], start);
2918               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2919             }
2920           else if (BUFFERP (last_thing_searched))
2921             {
2922               data[2 * i] = Fmake_marker ();
2923               Fset_marker (data[2 * i],
2924                            make_number (start),
2925                            last_thing_searched);
2926               data[2 * i + 1] = Fmake_marker ();
2927               Fset_marker (data[2 * i + 1],
2928                            make_number (search_regs.end[i]),
2929                            last_thing_searched);
2930             }
2931           else
2932             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2933             abort ();
2934
2935           len = 2 * i + 2;
2936         }
2937       else
2938         data[2 * i] = data[2 * i + 1] = Qnil;
2939     }
2940
2941   if (BUFFERP (last_thing_searched) && !NILP (integers))
2942     {
2943       data[len] = last_thing_searched;
2944       len++;
2945     }
2946
2947   /* If REUSE is not usable, cons up the values and return them.  */
2948   if (! CONSP (reuse))
2949     return Flist (len, data);
2950
2951   /* If REUSE is a list, store as many value elements as will fit
2952      into the elements of REUSE.  */
2953   for (i = 0, tail = reuse; CONSP (tail);
2954        i++, tail = XCDR (tail))
2955     {
2956       if (i < len)
2957         XSETCAR (tail, data[i]);
2958       else
2959         XSETCAR (tail, Qnil);
2960       prev = tail;
2961     }
2962
2963   /* If we couldn't fit all value elements into REUSE,
2964      cons up the rest of them and add them to the end of REUSE.  */
2965   if (i < len)
2966     XSETCDR (prev, Flist (len - i, data + i));
2967
2968   return reuse;
2969 }
2970
2971 /* Internal usage only:
2972    If RESEAT is `evaporate', put the markers back on the free list
2973    immediately.  No other references to the markers must exist in this case,
2974    so it is used only internally on the unwind stack and save-match-data from
2975    Lisp.  */
2976
2977 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2978        doc: /* Set internal data on last search match from elements of LIST.
2979 LIST should have been created by calling `match-data' previously.
2980
2981 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2982     (list, reseat)
2983      register Lisp_Object list, reseat;
2984 {
2985   register int i;
2986   register Lisp_Object marker;
2987
2988   if (running_asynch_code)
2989     save_search_regs ();
2990
2991   CHECK_LIST (list);
2992
2993   /* Unless we find a marker with a buffer or an explicit buffer
2994      in LIST, assume that this match data came from a string.  */
2995   last_thing_searched = Qt;
2996
2997   /* Allocate registers if they don't already exist.  */
2998   {
2999     int length = XFASTINT (Flength (list)) / 2;
3000
3001     if (length > search_regs.num_regs)
3002       {
3003         if (search_regs.num_regs == 0)
3004           {
3005             search_regs.start
3006               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3007             search_regs.end
3008               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3009           }
3010         else
3011           {
3012             search_regs.start
3013               = (regoff_t *) xrealloc (search_regs.start,
3014                                        length * sizeof (regoff_t));
3015             search_regs.end
3016               = (regoff_t *) xrealloc (search_regs.end,
3017                                        length * sizeof (regoff_t));
3018           }
3019
3020         for (i = search_regs.num_regs; i < length; i++)
3021           search_regs.start[i] = -1;
3022
3023         search_regs.num_regs = length;
3024       }
3025
3026     for (i = 0; CONSP (list); i++)
3027       {
3028         marker = XCAR (list);
3029         if (BUFFERP (marker))
3030           {
3031             last_thing_searched = marker;
3032             break;
3033           }
3034         if (i >= length)
3035           break;
3036         if (NILP (marker))
3037           {
3038             search_regs.start[i] = -1;
3039             list = XCDR (list);
3040           }
3041         else
3042           {
3043             int from;
3044             Lisp_Object m;
3045
3046             m = marker;
3047             if (MARKERP (marker))
3048               {
3049                 if (XMARKER (marker)->buffer == 0)
3050                   XSETFASTINT (marker, 0);
3051                 else
3052                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3053               }
3054
3055             CHECK_NUMBER_COERCE_MARKER (marker);
3056             from = XINT (marker);
3057
3058             if (!NILP (reseat) && MARKERP (m))
3059               {
3060                 if (EQ (reseat, Qevaporate))
3061                   free_marker (m);
3062                 else
3063                   unchain_marker (XMARKER (m));
3064                 XSETCAR (list, Qnil);
3065               }
3066
3067             if ((list = XCDR (list), !CONSP (list)))
3068               break;
3069
3070             m = marker = XCAR (list);
3071
3072             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3073               XSETFASTINT (marker, 0);
3074
3075             CHECK_NUMBER_COERCE_MARKER (marker);
3076             search_regs.start[i] = from;
3077             search_regs.end[i] = XINT (marker);
3078
3079             if (!NILP (reseat) && MARKERP (m))
3080               {
3081                 if (EQ (reseat, Qevaporate))
3082                   free_marker (m);
3083                 else
3084                   unchain_marker (XMARKER (m));
3085                 XSETCAR (list, Qnil);
3086               }
3087           }
3088         list = XCDR (list);
3089       }
3090
3091     for (; i < search_regs.num_regs; i++)
3092       search_regs.start[i] = -1;
3093   }
3094
3095   return Qnil;
3096 }
3097
3098 /* If non-zero the match data have been saved in saved_search_regs
3099    during the execution of a sentinel or filter. */
3100 static int search_regs_saved;
3101 static struct re_registers saved_search_regs;
3102 static Lisp_Object saved_last_thing_searched;
3103
3104 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3105    if asynchronous code (filter or sentinel) is running. */
3106 static void
3107 save_search_regs ()
3108 {
3109   if (!search_regs_saved)
3110     {
3111       saved_search_regs.num_regs = search_regs.num_regs;
3112       saved_search_regs.start = search_regs.start;
3113       saved_search_regs.end = search_regs.end;
3114       saved_last_thing_searched = last_thing_searched;
3115       last_thing_searched = Qnil;
3116       search_regs.num_regs = 0;
3117       search_regs.start = 0;
3118       search_regs.end = 0;
3119
3120       search_regs_saved = 1;
3121     }
3122 }
3123
3124 /* Called upon exit from filters and sentinels. */
3125 void
3126 restore_search_regs ()
3127 {
3128   if (search_regs_saved)
3129     {
3130       if (search_regs.num_regs > 0)
3131         {
3132           xfree (search_regs.start);
3133           xfree (search_regs.end);
3134         }
3135       search_regs.num_regs = saved_search_regs.num_regs;
3136       search_regs.start = saved_search_regs.start;
3137       search_regs.end = saved_search_regs.end;
3138       last_thing_searched = saved_last_thing_searched;
3139       saved_last_thing_searched = Qnil;
3140       search_regs_saved = 0;
3141     }
3142 }
3143
3144 static Lisp_Object
3145 unwind_set_match_data (list)
3146      Lisp_Object list;
3147 {
3148   /* It is safe to free (evaporate) the markers immediately.  */
3149   return Fset_match_data (list, Qevaporate);
3150 }
3151
3152 /* Called to unwind protect the match data.  */
3153 void
3154 record_unwind_save_match_data ()
3155 {
3156   record_unwind_protect (unwind_set_match_data,
3157                          Fmatch_data (Qnil, Qnil, Qnil));
3158 }
3159
3160 /* Quote a string to inactivate reg-expr chars */
3161
3162 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3163        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3164      (string)
3165      Lisp_Object string;
3166 {
3167   register unsigned char *in, *out, *end;
3168   register unsigned char *temp;
3169   int backslashes_added = 0;
3170
3171   CHECK_STRING (string);
3172
3173   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3174
3175   /* Now copy the data into the new string, inserting escapes. */
3176
3177   in = SDATA (string);
3178   end = in + SBYTES (string);
3179   out = temp;
3180
3181   for (; in != end; in++)
3182     {
3183       if (*in == '['
3184           || *in == '*' || *in == '.' || *in == '\\'
3185           || *in == '?' || *in == '+'
3186           || *in == '^' || *in == '$')
3187         *out++ = '\\', backslashes_added++;
3188       *out++ = *in;
3189     }
3190
3191   return make_specified_string (temp,
3192                                 SCHARS (string) + backslashes_added,
3193                                 out - temp,
3194                                 STRING_MULTIBYTE (string));
3195 }
3196 \f
3197 void
3198 syms_of_search ()
3199 {
3200   register int i;
3201
3202   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3203     {
3204       searchbufs[i].buf.allocated = 100;
3205       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3206       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3207       searchbufs[i].regexp = Qnil;
3208       searchbufs[i].whitespace_regexp = Qnil;
3209       searchbufs[i].syntax_table = Qnil;
3210       staticpro (&searchbufs[i].regexp);
3211       staticpro (&searchbufs[i].whitespace_regexp);
3212       staticpro (&searchbufs[i].syntax_table);
3213       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3214     }
3215   searchbuf_head = &searchbufs[0];
3216
3217   Qsearch_failed = intern ("search-failed");
3218   staticpro (&Qsearch_failed);
3219   Qinvalid_regexp = intern ("invalid-regexp");
3220   staticpro (&Qinvalid_regexp);
3221
3222   Fput (Qsearch_failed, Qerror_conditions,
3223         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3224   Fput (Qsearch_failed, Qerror_message,
3225         build_string ("Search failed"));
3226
3227   Fput (Qinvalid_regexp, Qerror_conditions,
3228         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3229   Fput (Qinvalid_regexp, Qerror_message,
3230         build_string ("Invalid regexp"));
3231
3232   last_thing_searched = Qnil;
3233   staticpro (&last_thing_searched);
3234
3235   saved_last_thing_searched = Qnil;
3236   staticpro (&saved_last_thing_searched);
3237
3238   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3239       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3240 Some commands use this for user-specified regexps.
3241 Spaces that occur inside character classes or repetition operators
3242 or other such regexp constructs are not replaced with this.
3243 A value of nil (which is the normal value) means treat spaces literally.  */);
3244   Vsearch_spaces_regexp = Qnil;
3245
3246   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3247       doc: /* Internal use only.
3248 If non-nil, the match data will not be changed during call to searching or
3249 matching functions, such as `looking-at', `string-match', `re-search-forward'
3250 etc.  */);
3251   Vinhibit_changing_match_data = Qnil;
3252
3253   defsubr (&Slooking_at);
3254   defsubr (&Sposix_looking_at);
3255   defsubr (&Sstring_match);
3256   defsubr (&Sposix_string_match);
3257   defsubr (&Ssearch_forward);
3258   defsubr (&Ssearch_backward);
3259   defsubr (&Sword_search_forward);
3260   defsubr (&Sword_search_backward);
3261   defsubr (&Sre_search_forward);
3262   defsubr (&Sre_search_backward);
3263   defsubr (&Sposix_search_forward);
3264   defsubr (&Sposix_search_backward);
3265   defsubr (&Sreplace_match);
3266   defsubr (&Smatch_beginning);
3267   defsubr (&Smatch_end);
3268   defsubr (&Smatch_data);
3269   defsubr (&Sset_match_data);
3270   defsubr (&Sregexp_quote);
3271 }
3272
3273 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3274    (do not change this comment) */