src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs ();
 101 static void save_search_regs ();
 102 static int simple_search ();
 103 static int boyer_moore ();
 104 static int search_buffer ();
 105 static void matcher_overflow () NO_RETURN;
 106
 107 static void
 108 matcher_overflow ()
 109 {
 110   error ("Stack overflow in regexp matcher");
 111 }
 112
 113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 114    PATTERN is the pattern to compile.
 115    CP is the place to put the result.
 116    TRANSLATE is a translation table for ignoring case, or nil for none.
 117    REGP is the structure that says where to store the "register"
 118    values that will result from matching this pattern.
 119    If it is 0, we should compile the pattern not to record any
 120    subexpression bounds.
 121    POSIX is nonzero if we want full backtracking (POSIX style)
 122    for this pattern.  0 means backtrack only enough to get a valid match.
 123
 124    The behavior also depends on Vsearch_spaces_regexp.  */
 125
 126 static void
 127 compile_pattern_1 (cp, pattern, translate, regp, posix)
 128      struct regexp_cache *cp;
 129      Lisp_Object pattern;
 130      Lisp_Object translate;
 131      struct re_registers *regp;
 132      int posix;
 133 {
 134   char *val;
 135   reg_syntax_t old;
 136
 137   cp->regexp = Qnil;
 138   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 139   cp->posix = posix;
 140   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 141   cp->buf.charset_unibyte = charset_unibyte;
 142   if (STRINGP (Vsearch_spaces_regexp))
 143     cp->whitespace_regexp = Vsearch_spaces_regexp;
 144   else
 145     cp->whitespace_regexp = Qnil;
 146
 147   /* rms: I think BLOCK_INPUT is not needed here any more,
 148      because regex.c defines malloc to call xmalloc.
 149      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 150      So let's turn it off.  */
 151   /*  BLOCK_INPUT;  */
 152   old = re_set_syntax (RE_SYNTAX_EMACS
 153                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 154
 155   if (STRINGP (Vsearch_spaces_regexp))
 156     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 157   else
 158     re_set_whitespace_regexp (NULL);
 159
 160   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 161                                      SBYTES (pattern), &cp->buf);
 162
 163   /* If the compiled pattern hard codes some of the contents of the
 164      syntax-table, it can only be reused with *this* syntax table.  */
 165   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 166
 167   re_set_whitespace_regexp (NULL);
 168
 169   re_set_syntax (old);
 170   /* UNBLOCK_INPUT;  */
 171   if (val)
 172     xsignal1 (Qinvalid_regexp, build_string (val));
 173
 174   cp->regexp = Fcopy_sequence (pattern);
 175 }
 176
 177 /* Shrink each compiled regexp buffer in the cache
 178    to the size actually used right now.
 179    This is called from garbage collection.  */
 180
 181 void
 182 shrink_regexp_cache ()
 183 {
 184   struct regexp_cache *cp;
 185
 186   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 187     {
 188       cp->buf.allocated = cp->buf.used;
 189       cp->buf.buffer
 190         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 191     }
 192 }
 193
 194 /* Clear the regexp cache w.r.t. a particular syntax table,
 195    because it was changed.
 196    There is no danger of memory leak here because re_compile_pattern
 197    automagically manages the memory in each re_pattern_buffer struct,
 198    based on its `allocated' and `buffer' values.  */
 199 void
 200 clear_regexp_cache ()
 201 {
 202   int i;
 203
 204   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 205     /* It's tempting to compare with the syntax-table we've actually changd,
 206        but it's not sufficient because char-table inheritance mewans that
 207        modifying one syntax-table can change others at the same time.  */
 208     if (!EQ (searchbufs[i].syntax_table, Qt))
 209       searchbufs[i].regexp = Qnil;
 210 }
 211
 212 /* Compile a regexp if necessary, but first check to see if there's one in
 213    the cache.
 214    PATTERN is the pattern to compile.
 215    TRANSLATE is a translation table for ignoring case, or nil for none.
 216    REGP is the structure that says where to store the "register"
 217    values that will result from matching this pattern.
 218    If it is 0, we should compile the pattern not to record any
 219    subexpression bounds.
 220    POSIX is nonzero if we want full backtracking (POSIX style)
 221    for this pattern.  0 means backtrack only enough to get a valid match.  */
 222
 223 struct re_pattern_buffer *
 224 compile_pattern (pattern, regp, translate, posix, multibyte)
 225      Lisp_Object pattern;
 226      struct re_registers *regp;
 227      Lisp_Object translate;
 228      int posix, multibyte;
 229 {
 230   struct regexp_cache *cp, **cpp;
 231
 232   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 233     {
 234       cp = *cpp;
 235       /* Entries are initialized to nil, and may be set to nil by
 236          compile_pattern_1 if the pattern isn't valid.  Don't apply
 237          string accessors in those cases.  However, compile_pattern_1
 238          is only applied to the cache entry we pick here to reuse.  So
 239          nil should never appear before a non-nil entry.  */
 240       if (NILP (cp->regexp))
 241         goto compile_it;
 242       if (SCHARS (cp->regexp) == SCHARS (pattern)
 243           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 244           && !NILP (Fstring_equal (cp->regexp, pattern))
 245           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 246           && cp->posix == posix
 247           && (EQ (cp->syntax_table, Qt)
 248               || EQ (cp->syntax_table, current_buffer->syntax_table))
 249           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 250           && cp->buf.charset_unibyte == charset_unibyte)
 251         break;
 252
 253       /* If we're at the end of the cache, compile into the nil cell
 254          we found, or the last (least recently used) cell with a
 255          string value.  */
 256       if (cp->next == 0)
 257         {
 258         compile_it:
 259           compile_pattern_1 (cp, pattern, translate, regp, posix);
 260           break;
 261         }
 262     }
 263
 264   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 265      either because we found it in the cache or because we just compiled it.
 266      Move it to the front of the queue to mark it as most recently used.  */
 267   *cpp = cp->next;
 268   cp->next = searchbuf_head;
 269   searchbuf_head = cp;
 270
 271   /* Advise the searching functions about the space we have allocated
 272      for register data.  */
 273   if (regp)
 274     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 275
 276   /* The compiled pattern can be used both for mulitbyte and unibyte
 277      target.  But, we have to tell which the pattern is used for. */
 278   cp->buf.target_multibyte = multibyte;
 279
 280   return &cp->buf;
 281 }
 282
 283 \f
 284 static Lisp_Object
 285 looking_at_1 (string, posix)
 286      Lisp_Object string;
 287      int posix;
 288 {
 289   Lisp_Object val;
 290   unsigned char *p1, *p2;
 291   int s1, s2;
 292   register int i;
 293   struct re_pattern_buffer *bufp;
 294
 295   if (running_asynch_code)
 296     save_search_regs ();
 297
 298   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 299   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 300     = current_buffer->case_eqv_table;
 301
 302   CHECK_STRING (string);
 303   bufp = compile_pattern (string,
 304                           (NILP (Vinhibit_changing_match_data)
 305                            ? &search_regs : NULL),
 306                           (!NILP (current_buffer->case_fold_search)
 307                            ? current_buffer->case_canon_table : Qnil),
 308                           posix,
 309                           !NILP (current_buffer->enable_multibyte_characters));
 310
 311   immediate_quit = 1;
 312   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 313
 314   /* Get pointers and sizes of the two strings
 315      that make up the visible portion of the buffer. */
 316
 317   p1 = BEGV_ADDR;
 318   s1 = GPT_BYTE - BEGV_BYTE;
 319   p2 = GAP_END_ADDR;
 320   s2 = ZV_BYTE - GPT_BYTE;
 321   if (s1 < 0)
 322     {
 323       p2 = p1;
 324       s2 = ZV_BYTE - BEGV_BYTE;
 325       s1 = 0;
 326     }
 327   if (s2 < 0)
 328     {
 329       s1 = ZV_BYTE - BEGV_BYTE;
 330       s2 = 0;
 331     }
 332
 333   re_match_object = Qnil;
 334
 335   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 336                   PT_BYTE - BEGV_BYTE,
 337                   (NILP (Vinhibit_changing_match_data)
 338                    ? &search_regs : NULL),
 339                   ZV_BYTE - BEGV_BYTE);
 340   immediate_quit = 0;
 341
 342   if (i == -2)
 343     matcher_overflow ();
 344
 345   val = (0 <= i ? Qt : Qnil);
 346   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 347     for (i = 0; i < search_regs.num_regs; i++)
 348       if (search_regs.start[i] >= 0)
 349         {
 350           search_regs.start[i]
 351             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 352           search_regs.end[i]
 353             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 354         }
 355
 356   /* Set last_thing_searched only when match data is changed.  */
 357   if (NILP (Vinhibit_changing_match_data))
 358     XSETBUFFER (last_thing_searched, current_buffer);
 359
 360   return val;
 361 }
 362
 363 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 364        doc: /* Return t if text after point matches regular expression REGEXP.
 365 This function modifies the match data that `match-beginning',
 366 `match-end' and `match-data' access; save and restore the match
 367 data if you want to preserve them.  */)
 368      (regexp)
 369      Lisp_Object regexp;
 370 {
 371   return looking_at_1 (regexp, 0);
 372 }
 373
 374 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 375        doc: /* Return t if text after point matches regular expression REGEXP.
 376 Find the longest match, in accord with Posix regular expression rules.
 377 This function modifies the match data that `match-beginning',
 378 `match-end' and `match-data' access; save and restore the match
 379 data if you want to preserve them.  */)
 380      (regexp)
 381      Lisp_Object regexp;
 382 {
 383   return looking_at_1 (regexp, 1);
 384 }
 385 \f
 386 static Lisp_Object
 387 string_match_1 (regexp, string, start, posix)
 388      Lisp_Object regexp, string, start;
 389      int posix;
 390 {
 391   int val;
 392   struct re_pattern_buffer *bufp;
 393   int pos, pos_byte;
 394   int i;
 395
 396   if (running_asynch_code)
 397     save_search_regs ();
 398
 399   CHECK_STRING (regexp);
 400   CHECK_STRING (string);
 401
 402   if (NILP (start))
 403     pos = 0, pos_byte = 0;
 404   else
 405     {
 406       int len = SCHARS (string);
 407
 408       CHECK_NUMBER (start);
 409       pos = XINT (start);
 410       if (pos < 0 && -pos <= len)
 411         pos = len + pos;
 412       else if (0 > pos || pos > len)
 413         args_out_of_range (string, start);
 414       pos_byte = string_char_to_byte (string, pos);
 415     }
 416
 417   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 418   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 419     = current_buffer->case_eqv_table;
 420
 421   bufp = compile_pattern (regexp,
 422                           (NILP (Vinhibit_changing_match_data)
 423                            ? &search_regs : NULL),
 424                           (!NILP (current_buffer->case_fold_search)
 425                            ? current_buffer->case_canon_table : Qnil),
 426                           posix,
 427                           STRING_MULTIBYTE (string));
 428   immediate_quit = 1;
 429   re_match_object = string;
 430
 431   val = re_search (bufp, (char *) SDATA (string),
 432                    SBYTES (string), pos_byte,
 433                    SBYTES (string) - pos_byte,
 434                    (NILP (Vinhibit_changing_match_data)
 435                     ? &search_regs : NULL));
 436   immediate_quit = 0;
 437
 438   /* Set last_thing_searched only when match data is changed.  */
 439   if (NILP (Vinhibit_changing_match_data))
 440     last_thing_searched = Qt;
 441
 442   if (val == -2)
 443     matcher_overflow ();
 444   if (val < 0) return Qnil;
 445
 446   if (NILP (Vinhibit_changing_match_data))
 447     for (i = 0; i < search_regs.num_regs; i++)
 448       if (search_regs.start[i] >= 0)
 449         {
 450           search_regs.start[i]
 451             = string_byte_to_char (string, search_regs.start[i]);
 452           search_regs.end[i]
 453             = string_byte_to_char (string, search_regs.end[i]);
 454         }
 455
 456   return make_number (string_byte_to_char (string, val));
 457 }
 458
 459 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 460        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 461 Matching ignores case if `case-fold-search' is non-nil.
 462 If third arg START is non-nil, start search at that index in STRING.
 463 For index of first char beyond the match, do (match-end 0).
 464 `match-end' and `match-beginning' also give indices of substrings
 465 matched by parenthesis constructs in the pattern.
 466
 467 You can use the function `match-string' to extract the substrings
 468 matched by the parenthesis constructions in REGEXP. */)
 469      (regexp, string, start)
 470      Lisp_Object regexp, string, start;
 471 {
 472   return string_match_1 (regexp, string, start, 0);
 473 }
 474
 475 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 476        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 477 Find the longest match, in accord with Posix regular expression rules.
 478 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 479 If third arg START is non-nil, start search at that index in STRING.
 480 For index of first char beyond the match, do (match-end 0).
 481 `match-end' and `match-beginning' also give indices of substrings
 482 matched by parenthesis constructs in the pattern.  */)
 483      (regexp, string, start)
 484      Lisp_Object regexp, string, start;
 485 {
 486   return string_match_1 (regexp, string, start, 1);
 487 }
 488
 489 /* Match REGEXP against STRING, searching all of STRING,
 490    and return the index of the match, or negative on failure.
 491    This does not clobber the match data.  */
 492
 493 int
 494 fast_string_match (regexp, string)
 495      Lisp_Object regexp, string;
 496 {
 497   int val;
 498   struct re_pattern_buffer *bufp;
 499
 500   bufp = compile_pattern (regexp, 0, Qnil,
 501                           0, STRING_MULTIBYTE (string));
 502   immediate_quit = 1;
 503   re_match_object = string;
 504
 505   val = re_search (bufp, (char *) SDATA (string),
 506                    SBYTES (string), 0,
 507                    SBYTES (string), 0);
 508   immediate_quit = 0;
 509   return val;
 510 }
 511
 512 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 513    and return the index of the match, or negative on failure.
 514    This does not clobber the match data.
 515    We assume that STRING contains single-byte characters.  */
 516
 517 extern Lisp_Object Vascii_downcase_table;
 518
 519 int
 520 fast_c_string_match_ignore_case (regexp, string)
 521      Lisp_Object regexp;
 522      const char *string;
 523 {
 524   int val;
 525   struct re_pattern_buffer *bufp;
 526   int len = strlen (string);
 527
 528   regexp = string_make_unibyte (regexp);
 529   re_match_object = Qt;
 530   bufp = compile_pattern (regexp, 0,
 531                           Vascii_canon_table, 0,
 532                           0);
 533   immediate_quit = 1;
 534   val = re_search (bufp, string, len, 0, len, 0);
 535   immediate_quit = 0;
 536   return val;
 537 }
 538
 539 /* Like fast_string_match but ignore case.  */
 540
 541 int
 542 fast_string_match_ignore_case (regexp, string)
 543      Lisp_Object regexp, string;
 544 {
 545   int val;
 546   struct re_pattern_buffer *bufp;
 547
 548   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 549                           0, STRING_MULTIBYTE (string));
 550   immediate_quit = 1;
 551   re_match_object = string;
 552
 553   val = re_search (bufp, (char *) SDATA (string),
 554                    SBYTES (string), 0,
 555                    SBYTES (string), 0);
 556   immediate_quit = 0;
 557   return val;
 558 }
 559 \f
 560 /* The newline cache: remembering which sections of text have no newlines.  */
 561
 562 /* If the user has requested newline caching, make sure it's on.
 563    Otherwise, make sure it's off.
 564    This is our cheezy way of associating an action with the change of
 565    state of a buffer-local variable.  */
 566 static void
 567 newline_cache_on_off (buf)
 568      struct buffer *buf;
 569 {
 570   if (NILP (buf->cache_long_line_scans))
 571     {
 572       /* It should be off.  */
 573       if (buf->newline_cache)
 574         {
 575           free_region_cache (buf->newline_cache);
 576           buf->newline_cache = 0;
 577         }
 578     }
 579   else
 580     {
 581       /* It should be on.  */
 582       if (buf->newline_cache == 0)
 583         buf->newline_cache = new_region_cache ();
 584     }
 585 }
 586
 587 \f
 588 /* Search for COUNT instances of the character TARGET between START and END.
 589
 590    If COUNT is positive, search forwards; END must be >= START.
 591    If COUNT is negative, search backwards for the -COUNTth instance;
 592       END must be <= START.
 593    If COUNT is zero, do anything you please; run rogue, for all I care.
 594
 595    If END is zero, use BEGV or ZV instead, as appropriate for the
 596    direction indicated by COUNT.
 597
 598    If we find COUNT instances, set *SHORTAGE to zero, and return the
 599    position past the COUNTth match.  Note that for reverse motion
 600    this is not the same as the usual convention for Emacs motion commands.
 601
 602    If we don't find COUNT instances before reaching END, set *SHORTAGE
 603    to the number of TARGETs left unfound, and return END.
 604
 605    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 606    except when inside redisplay.  */
 607
 608 int
 609 scan_buffer (target, start, end, count, shortage, allow_quit)
 610      register int target;
 611      int start, end;
 612      int count;
 613      int *shortage;
 614      int allow_quit;
 615 {
 616   struct region_cache *newline_cache;
 617   int direction;
 618
 619   if (count > 0)
 620     {
 621       direction = 1;
 622       if (! end) end = ZV;
 623     }
 624   else
 625     {
 626       direction = -1;
 627       if (! end) end = BEGV;
 628     }
 629
 630   newline_cache_on_off (current_buffer);
 631   newline_cache = current_buffer->newline_cache;
 632
 633   if (shortage != 0)
 634     *shortage = 0;
 635
 636   immediate_quit = allow_quit;
 637
 638   if (count > 0)
 639     while (start != end)
 640       {
 641         /* Our innermost scanning loop is very simple; it doesn't know
 642            about gaps, buffer ends, or the newline cache.  ceiling is
 643            the position of the last character before the next such
 644            obstacle --- the last character the dumb search loop should
 645            examine.  */
 646         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 647         int start_byte = CHAR_TO_BYTE (start);
 648         int tem;
 649
 650         /* If we're looking for a newline, consult the newline cache
 651            to see where we can avoid some scanning.  */
 652         if (target == '\n' && newline_cache)
 653           {
 654             int next_change;
 655             immediate_quit = 0;
 656             while (region_cache_forward
 657                    (current_buffer, newline_cache, start_byte, &next_change))
 658               start_byte = next_change;
 659             immediate_quit = allow_quit;
 660
 661             /* START should never be after END.  */
 662             if (start_byte > ceiling_byte)
 663               start_byte = ceiling_byte;
 664
 665             /* Now the text after start is an unknown region, and
 666                next_change is the position of the next known region. */
 667             ceiling_byte = min (next_change - 1, ceiling_byte);
 668           }
 669
 670         /* The dumb loop can only scan text stored in contiguous
 671            bytes. BUFFER_CEILING_OF returns the last character
 672            position that is contiguous, so the ceiling is the
 673            position after that.  */
 674         tem = BUFFER_CEILING_OF (start_byte);
 675         ceiling_byte = min (tem, ceiling_byte);
 676
 677         {
 678           /* The termination address of the dumb loop.  */
 679           register unsigned char *ceiling_addr
 680             = BYTE_POS_ADDR (ceiling_byte) + 1;
 681           register unsigned char *cursor
 682             = BYTE_POS_ADDR (start_byte);
 683           unsigned char *base = cursor;
 684
 685           while (cursor < ceiling_addr)
 686             {
 687               unsigned char *scan_start = cursor;
 688
 689               /* The dumb loop.  */
 690               while (*cursor != target && ++cursor < ceiling_addr)
 691                 ;
 692
 693               /* If we're looking for newlines, cache the fact that
 694                  the region from start to cursor is free of them. */
 695               if (target == '\n' && newline_cache)
 696                 know_region_cache (current_buffer, newline_cache,
 697                                    start_byte + scan_start - base,
 698                                    start_byte + cursor - base);
 699
 700               /* Did we find the target character?  */
 701               if (cursor < ceiling_addr)
 702                 {
 703                   if (--count == 0)
 704                     {
 705                       immediate_quit = 0;
 706                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 707                     }
 708                   cursor++;
 709                 }
 710             }
 711
 712           start = BYTE_TO_CHAR (start_byte + cursor - base);
 713         }
 714       }
 715   else
 716     while (start > end)
 717       {
 718         /* The last character to check before the next obstacle.  */
 719         int ceiling_byte = CHAR_TO_BYTE (end);
 720         int start_byte = CHAR_TO_BYTE (start);
 721         int tem;
 722
 723         /* Consult the newline cache, if appropriate.  */
 724         if (target == '\n' && newline_cache)
 725           {
 726             int next_change;
 727             immediate_quit = 0;
 728             while (region_cache_backward
 729                    (current_buffer, newline_cache, start_byte, &next_change))
 730               start_byte = next_change;
 731             immediate_quit = allow_quit;
 732
 733             /* Start should never be at or before end.  */
 734             if (start_byte <= ceiling_byte)
 735               start_byte = ceiling_byte + 1;
 736
 737             /* Now the text before start is an unknown region, and
 738                next_change is the position of the next known region. */
 739             ceiling_byte = max (next_change, ceiling_byte);
 740           }
 741
 742         /* Stop scanning before the gap.  */
 743         tem = BUFFER_FLOOR_OF (start_byte - 1);
 744         ceiling_byte = max (tem, ceiling_byte);
 745
 746         {
 747           /* The termination address of the dumb loop.  */
 748           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 749           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 750           unsigned char *base = cursor;
 751
 752           while (cursor >= ceiling_addr)
 753             {
 754               unsigned char *scan_start = cursor;
 755
 756               while (*cursor != target && --cursor >= ceiling_addr)
 757                 ;
 758
 759               /* If we're looking for newlines, cache the fact that
 760                  the region from after the cursor to start is free of them.  */
 761               if (target == '\n' && newline_cache)
 762                 know_region_cache (current_buffer, newline_cache,
 763                                    start_byte + cursor - base,
 764                                    start_byte + scan_start - base);
 765
 766               /* Did we find the target character?  */
 767               if (cursor >= ceiling_addr)
 768                 {
 769                   if (++count >= 0)
 770                     {
 771                       immediate_quit = 0;
 772                       return BYTE_TO_CHAR (start_byte + cursor - base);
 773                     }
 774                   cursor--;
 775                 }
 776             }
 777
 778           start = BYTE_TO_CHAR (start_byte + cursor - base);
 779         }
 780       }
 781
 782   immediate_quit = 0;
 783   if (shortage != 0)
 784     *shortage = count * direction;
 785   return start;
 786 }
 787 \f
 788 /* Search for COUNT instances of a line boundary, which means either a
 789    newline or (if selective display enabled) a carriage return.
 790    Start at START.  If COUNT is negative, search backwards.
 791
 792    We report the resulting position by calling TEMP_SET_PT_BOTH.
 793
 794    If we find COUNT instances. we position after (always after,
 795    even if scanning backwards) the COUNTth match, and return 0.
 796
 797    If we don't find COUNT instances before reaching the end of the
 798    buffer (or the beginning, if scanning backwards), we return
 799    the number of line boundaries left unfound, and position at
 800    the limit we bumped up against.
 801
 802    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 803    except in special cases.  */
 804
 805 int
 806 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 807      int start, start_byte;
 808      int limit, limit_byte;
 809      register int count;
 810      int allow_quit;
 811 {
 812   int direction = ((count > 0) ? 1 : -1);
 813
 814   register unsigned char *cursor;
 815   unsigned char *base;
 816
 817   register int ceiling;
 818   register unsigned char *ceiling_addr;
 819
 820   int old_immediate_quit = immediate_quit;
 821
 822   /* The code that follows is like scan_buffer
 823      but checks for either newline or carriage return.  */
 824
 825   if (allow_quit)
 826     immediate_quit++;
 827
 828   start_byte = CHAR_TO_BYTE (start);
 829
 830   if (count > 0)
 831     {
 832       while (start_byte < limit_byte)
 833         {
 834           ceiling =  BUFFER_CEILING_OF (start_byte);
 835           ceiling = min (limit_byte - 1, ceiling);
 836           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 837           base = (cursor = BYTE_POS_ADDR (start_byte));
 838           while (1)
 839             {
 840               while (*cursor != '\n' && ++cursor != ceiling_addr)
 841                 ;
 842
 843               if (cursor != ceiling_addr)
 844                 {
 845                   if (--count == 0)
 846                     {
 847                       immediate_quit = old_immediate_quit;
 848                       start_byte = start_byte + cursor - base + 1;
 849                       start = BYTE_TO_CHAR (start_byte);
 850                       TEMP_SET_PT_BOTH (start, start_byte);
 851                       return 0;
 852                     }
 853                   else
 854                     if (++cursor == ceiling_addr)
 855                       break;
 856                 }
 857               else
 858                 break;
 859             }
 860           start_byte += cursor - base;
 861         }
 862     }
 863   else
 864     {
 865       while (start_byte > limit_byte)
 866         {
 867           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 868           ceiling = max (limit_byte, ceiling);
 869           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 870           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 871           while (1)
 872             {
 873               while (--cursor != ceiling_addr && *cursor != '\n')
 874                 ;
 875
 876               if (cursor != ceiling_addr)
 877                 {
 878                   if (++count == 0)
 879                     {
 880                       immediate_quit = old_immediate_quit;
 881                       /* Return the position AFTER the match we found.  */
 882                       start_byte = start_byte + cursor - base + 1;
 883                       start = BYTE_TO_CHAR (start_byte);
 884                       TEMP_SET_PT_BOTH (start, start_byte);
 885                       return 0;
 886                     }
 887                 }
 888               else
 889                 break;
 890             }
 891           /* Here we add 1 to compensate for the last decrement
 892              of CURSOR, which took it past the valid range.  */
 893           start_byte += cursor - base + 1;
 894         }
 895     }
 896
 897   TEMP_SET_PT_BOTH (limit, limit_byte);
 898   immediate_quit = old_immediate_quit;
 899
 900   return count * direction;
 901 }
 902
 903 int
 904 find_next_newline_no_quit (from, cnt)
 905      register int from, cnt;
 906 {
 907   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 908 }
 909
 910 /* Like find_next_newline, but returns position before the newline,
 911    not after, and only search up to TO.  This isn't just
 912    find_next_newline (...)-1, because you might hit TO.  */
 913
 914 int
 915 find_before_next_newline (from, to, cnt)
 916      int from, to, cnt;
 917 {
 918   int shortage;
 919   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 920
 921   if (shortage == 0)
 922     pos--;
 923
 924   return pos;
 925 }
 926 \f
 927 /* Subroutines of Lisp buffer search functions. */
 928
 929 static Lisp_Object
 930 search_command (string, bound, noerror, count, direction, RE, posix)
 931      Lisp_Object string, bound, noerror, count;
 932      int direction;
 933      int RE;
 934      int posix;
 935 {
 936   register int np;
 937   int lim, lim_byte;
 938   int n = direction;
 939
 940   if (!NILP (count))
 941     {
 942       CHECK_NUMBER (count);
 943       n *= XINT (count);
 944     }
 945
 946   CHECK_STRING (string);
 947   if (NILP (bound))
 948     {
 949       if (n > 0)
 950         lim = ZV, lim_byte = ZV_BYTE;
 951       else
 952         lim = BEGV, lim_byte = BEGV_BYTE;
 953     }
 954   else
 955     {
 956       CHECK_NUMBER_COERCE_MARKER (bound);
 957       lim = XINT (bound);
 958       if (n > 0 ? lim < PT : lim > PT)
 959         error ("Invalid search bound (wrong side of point)");
 960       if (lim > ZV)
 961         lim = ZV, lim_byte = ZV_BYTE;
 962       else if (lim < BEGV)
 963         lim = BEGV, lim_byte = BEGV_BYTE;
 964       else
 965         lim_byte = CHAR_TO_BYTE (lim);
 966     }
 967
 968   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 969   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 970     = current_buffer->case_eqv_table;
 971
 972   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 973                       (!NILP (current_buffer->case_fold_search)
 974                        ? current_buffer->case_canon_table
 975                        : Qnil),
 976                       (!NILP (current_buffer->case_fold_search)
 977                        ? current_buffer->case_eqv_table
 978                        : Qnil),
 979                       posix);
 980   if (np <= 0)
 981     {
 982       if (NILP (noerror))
 983         xsignal1 (Qsearch_failed, string);
 984
 985       if (!EQ (noerror, Qt))
 986         {
 987           if (lim < BEGV || lim > ZV)
 988             abort ();
 989           SET_PT_BOTH (lim, lim_byte);
 990           return Qnil;
 991 #if 0 /* This would be clean, but maybe programs depend on
 992          a value of nil here.  */
 993           np = lim;
 994 #endif
 995         }
 996       else
 997         return Qnil;
 998     }
 999
1000   if (np < BEGV || np > ZV)
1001     abort ();
1002
1003   SET_PT (np);
1004
1005   return make_number (np);
1006 }
1007 \f
1008 /* Return 1 if REGEXP it matches just one constant string.  */
1009
1010 static int
1011 trivial_regexp_p (regexp)
1012      Lisp_Object regexp;
1013 {
1014   int len = SBYTES (regexp);
1015   unsigned char *s = SDATA (regexp);
1016   while (--len >= 0)
1017     {
1018       switch (*s++)
1019         {
1020         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1021           return 0;
1022         case '\\':
1023           if (--len < 0)
1024             return 0;
1025           switch (*s++)
1026             {
1027             case '|': case '(': case ')': case '`': case '\'': case 'b':
1028             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1029             case 'S': case '=': case '{': case '}': case '_':
1030             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1031             case '1': case '2': case '3': case '4': case '5':
1032             case '6': case '7': case '8': case '9':
1033               return 0;
1034             }
1035         }
1036     }
1037   return 1;
1038 }
1039
1040 /* Search for the n'th occurrence of STRING in the current buffer,
1041    starting at position POS and stopping at position LIM,
1042    treating STRING as a literal string if RE is false or as
1043    a regular expression if RE is true.
1044
1045    If N is positive, searching is forward and LIM must be greater than POS.
1046    If N is negative, searching is backward and LIM must be less than POS.
1047
1048    Returns -x if x occurrences remain to be found (x > 0),
1049    or else the position at the beginning of the Nth occurrence
1050    (if searching backward) or the end (if searching forward).
1051
1052    POSIX is nonzero if we want full backtracking (POSIX style)
1053    for this pattern.  0 means backtrack only enough to get a valid match.  */
1054
1055 #define TRANSLATE(out, trt, d)                  \
1056 do                                              \
1057   {                                             \
1058     if (! NILP (trt))                           \
1059       {                                         \
1060         Lisp_Object temp;                       \
1061         temp = Faref (trt, make_number (d));    \
1062         if (INTEGERP (temp))                    \
1063           out = XINT (temp);                    \
1064         else                                    \
1065           out = d;                              \
1066       }                                         \
1067     else                                        \
1068       out = d;                                  \
1069   }                                             \
1070 while (0)
1071
1072 /* Only used in search_buffer, to record the end position of the match
1073    when searching regexps and SEARCH_REGS should not be changed
1074    (i.e. Vinhibit_changing_match_data is non-nil).  */
1075 static struct re_registers search_regs_1;
1076
1077 static int
1078 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1079                RE, trt, inverse_trt, posix)
1080      Lisp_Object string;
1081      int pos;
1082      int pos_byte;
1083      int lim;
1084      int lim_byte;
1085      int n;
1086      int RE;
1087      Lisp_Object trt;
1088      Lisp_Object inverse_trt;
1089      int posix;
1090 {
1091   int len = SCHARS (string);
1092   int len_byte = SBYTES (string);
1093   register int i;
1094
1095   if (running_asynch_code)
1096     save_search_regs ();
1097
1098   /* Searching 0 times means don't move.  */
1099   /* Null string is found at starting position.  */
1100   if (len == 0 || n == 0)
1101     {
1102       set_search_regs (pos_byte, 0);
1103       return pos;
1104     }
1105
1106   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1107     {
1108       unsigned char *p1, *p2;
1109       int s1, s2;
1110       struct re_pattern_buffer *bufp;
1111
1112       bufp = compile_pattern (string,
1113                               (NILP (Vinhibit_changing_match_data)
1114                                ? &search_regs : &search_regs_1),
1115                               trt, posix,
1116                               !NILP (current_buffer->enable_multibyte_characters));
1117
1118       immediate_quit = 1;       /* Quit immediately if user types ^G,
1119                                    because letting this function finish
1120                                    can take too long. */
1121       QUIT;                     /* Do a pending quit right away,
1122                                    to avoid paradoxical behavior */
1123       /* Get pointers and sizes of the two strings
1124          that make up the visible portion of the buffer. */
1125
1126       p1 = BEGV_ADDR;
1127       s1 = GPT_BYTE - BEGV_BYTE;
1128       p2 = GAP_END_ADDR;
1129       s2 = ZV_BYTE - GPT_BYTE;
1130       if (s1 < 0)
1131         {
1132           p2 = p1;
1133           s2 = ZV_BYTE - BEGV_BYTE;
1134           s1 = 0;
1135         }
1136       if (s2 < 0)
1137         {
1138           s1 = ZV_BYTE - BEGV_BYTE;
1139           s2 = 0;
1140         }
1141       re_match_object = Qnil;
1142
1143       while (n < 0)
1144         {
1145           int val;
1146           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1147                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1148                              (NILP (Vinhibit_changing_match_data)
1149                               ? &search_regs : &search_regs_1),
1150                              /* Don't allow match past current point */
1151                              pos_byte - BEGV_BYTE);
1152           if (val == -2)
1153             {
1154               matcher_overflow ();
1155             }
1156           if (val >= 0)
1157             {
1158               if (NILP (Vinhibit_changing_match_data))
1159                 {
1160                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1161                   for (i = 0; i < search_regs.num_regs; i++)
1162                     if (search_regs.start[i] >= 0)
1163                       {
1164                         search_regs.start[i]
1165                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1166                         search_regs.end[i]
1167                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1168                       }
1169                   XSETBUFFER (last_thing_searched, current_buffer);
1170                   /* Set pos to the new position. */
1171                   pos = search_regs.start[0];
1172                 }
1173               else
1174                 {
1175                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1176                   /* Set pos to the new position.  */
1177                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1178                 }
1179             }
1180           else
1181             {
1182               immediate_quit = 0;
1183               return (n);
1184             }
1185           n++;
1186         }
1187       while (n > 0)
1188         {
1189           int val;
1190           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1191                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1192                              (NILP (Vinhibit_changing_match_data)
1193                               ? &search_regs : &search_regs_1),
1194                              lim_byte - BEGV_BYTE);
1195           if (val == -2)
1196             {
1197               matcher_overflow ();
1198             }
1199           if (val >= 0)
1200             {
1201               if (NILP (Vinhibit_changing_match_data))
1202                 {
1203                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1204                   for (i = 0; i < search_regs.num_regs; i++)
1205                     if (search_regs.start[i] >= 0)
1206                       {
1207                         search_regs.start[i]
1208                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1209                         search_regs.end[i]
1210                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1211                       }
1212                   XSETBUFFER (last_thing_searched, current_buffer);
1213                   pos = search_regs.end[0];
1214                 }
1215               else
1216                 {
1217                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1218                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1219                 }
1220             }
1221           else
1222             {
1223               immediate_quit = 0;
1224               return (0 - n);
1225             }
1226           n--;
1227         }
1228       immediate_quit = 0;
1229       return (pos);
1230     }
1231   else                          /* non-RE case */
1232     {
1233       unsigned char *raw_pattern, *pat;
1234       int raw_pattern_size;
1235       int raw_pattern_size_byte;
1236       unsigned char *patbuf;
1237       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1238       unsigned char *base_pat;
1239       /* Set to positive if we find a non-ASCII char that need
1240          translation.  Otherwise set to zero later.  */
1241       int char_base = -1;
1242       int boyer_moore_ok = 1;
1243
1244       /* MULTIBYTE says whether the text to be searched is multibyte.
1245          We must convert PATTERN to match that, or we will not really
1246          find things right.  */
1247
1248       if (multibyte == STRING_MULTIBYTE (string))
1249         {
1250           raw_pattern = (unsigned char *) SDATA (string);
1251           raw_pattern_size = SCHARS (string);
1252           raw_pattern_size_byte = SBYTES (string);
1253         }
1254       else if (multibyte)
1255         {
1256           raw_pattern_size = SCHARS (string);
1257           raw_pattern_size_byte
1258             = count_size_as_multibyte (SDATA (string),
1259                                        raw_pattern_size);
1260           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1261           copy_text (SDATA (string), raw_pattern,
1262                      SCHARS (string), 0, 1);
1263         }
1264       else
1265         {
1266           /* Converting multibyte to single-byte.
1267
1268              ??? Perhaps this conversion should be done in a special way
1269              by subtracting nonascii-insert-offset from each non-ASCII char,
1270              so that only the multibyte chars which really correspond to
1271              the chosen single-byte character set can possibly match.  */
1272           raw_pattern_size = SCHARS (string);
1273           raw_pattern_size_byte = SCHARS (string);
1274           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1275           copy_text (SDATA (string), raw_pattern,
1276                      SBYTES (string), 1, 0);
1277         }
1278
1279       /* Copy and optionally translate the pattern.  */
1280       len = raw_pattern_size;
1281       len_byte = raw_pattern_size_byte;
1282       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1283       pat = patbuf;
1284       base_pat = raw_pattern;
1285       if (multibyte)
1286         {
1287           /* Fill patbuf by translated characters in STRING while
1288              checking if we can use boyer-moore search.  If TRT is
1289              non-nil, we can use boyer-moore search only if TRT can be
1290              represented by the byte array of 256 elements.  For that,
1291              all non-ASCII case-equivalents of all case-senstive
1292              characters in STRING must belong to the same charset and
1293              row.  */
1294
1295           while (--len >= 0)
1296             {
1297               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1298               int c, translated, inverse;
1299               int in_charlen, charlen;
1300
1301               /* If we got here and the RE flag is set, it's because we're
1302                  dealing with a regexp known to be trivial, so the backslash
1303                  just quotes the next character.  */
1304               if (RE && *base_pat == '\\')
1305                 {
1306                   len--;
1307                   raw_pattern_size--;
1308                   len_byte--;
1309                   base_pat++;
1310                 }
1311
1312               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1313
1314               if (NILP (trt))
1315                 {
1316                   str = base_pat;
1317                   charlen = in_charlen;
1318                 }
1319               else
1320                 {
1321                   /* Translate the character.  */
1322                   TRANSLATE (translated, trt, c);
1323                   charlen = CHAR_STRING (translated, str_base);
1324                   str = str_base;
1325
1326                   /* Check if C has any other case-equivalents.  */
1327                   TRANSLATE (inverse, inverse_trt, c);
1328                   /* If so, check if we can use boyer-moore.  */
1329                   if (c != inverse && boyer_moore_ok)
1330                     {
1331                       /* Check if all equivalents belong to the same
1332                          group of characters.  Note that the check of C
1333                          itself is done by the last iteration.  */
1334                       int this_char_base = -1;
1335
1336                       while (boyer_moore_ok)
1337                         {
1338                           if (ASCII_BYTE_P (inverse))
1339                             {
1340                               if (this_char_base > 0)
1341                                 boyer_moore_ok = 0;
1342                               else
1343                                 this_char_base = 0;
1344                             }
1345                           else if (CHAR_BYTE8_P (inverse))
1346                             /* Boyer-moore search can't handle a
1347                                translation of an eight-bit
1348                                character.  */
1349                             boyer_moore_ok = 0;
1350                           else if (this_char_base < 0)
1351                             {
1352                               this_char_base = inverse & ~0x3F;
1353                               if (char_base < 0)
1354                                 char_base = this_char_base;
1355                               else if (this_char_base != char_base)
1356                                 boyer_moore_ok = 0;
1357                             }
1358                           else if ((inverse & ~0x3F) != this_char_base)
1359                             boyer_moore_ok = 0;
1360                           if (c == inverse)
1361                             break;
1362                           TRANSLATE (inverse, inverse_trt, inverse);
1363                         }
1364                     }
1365                 }
1366
1367               /* Store this character into the translated pattern.  */
1368               bcopy (str, pat, charlen);
1369               pat += charlen;
1370               base_pat += in_charlen;
1371               len_byte -= in_charlen;
1372             }
1373
1374           /* If char_base is still negative we didn't find any translated
1375              non-ASCII characters.  */
1376           if (char_base < 0)
1377             char_base = 0;
1378         }
1379       else
1380         {
1381           /* Unibyte buffer.  */
1382           char_base = 0;
1383           while (--len >= 0)
1384             {
1385               int c, translated;
1386
1387               /* If we got here and the RE flag is set, it's because we're
1388                  dealing with a regexp known to be trivial, so the backslash
1389                  just quotes the next character.  */
1390               if (RE && *base_pat == '\\')
1391                 {
1392                   len--;
1393                   raw_pattern_size--;
1394                   base_pat++;
1395                 }
1396               c = *base_pat++;
1397               TRANSLATE (translated, trt, c);
1398               *pat++ = translated;
1399             }
1400         }
1401
1402       len_byte = pat - patbuf;
1403       len = raw_pattern_size;
1404       pat = base_pat = patbuf;
1405
1406       if (boyer_moore_ok)
1407         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1408                             pos, pos_byte, lim, lim_byte,
1409                             char_base);
1410       else
1411         return simple_search (n, pat, len, len_byte, trt,
1412                               pos, pos_byte, lim, lim_byte);
1413     }
1414 }
1415 \f
1416 /* Do a simple string search N times for the string PAT,
1417    whose length is LEN/LEN_BYTE,
1418    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1419    TRT is the translation table.
1420
1421    Return the character position where the match is found.
1422    Otherwise, if M matches remained to be found, return -M.
1423
1424    This kind of search works regardless of what is in PAT and
1425    regardless of what is in TRT.  It is used in cases where
1426    boyer_moore cannot work.  */
1427
1428 static int
1429 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1430      int n;
1431      unsigned char *pat;
1432      int len, len_byte;
1433      Lisp_Object trt;
1434      int pos, pos_byte;
1435      int lim, lim_byte;
1436 {
1437   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1438   int forward = n > 0;
1439   /* Number of buffer bytes matched.  Note that this may be different
1440      from len_byte in a multibyte buffer.  */
1441   int match_byte;
1442
1443   if (lim > pos && multibyte)
1444     while (n > 0)
1445       {
1446         while (1)
1447           {
1448             /* Try matching at position POS.  */
1449             int this_pos = pos;
1450             int this_pos_byte = pos_byte;
1451             int this_len = len;
1452             int this_len_byte = len_byte;
1453             unsigned char *p = pat;
1454             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1455               goto stop;
1456
1457             while (this_len > 0)
1458               {
1459                 int charlen, buf_charlen;
1460                 int pat_ch, buf_ch;
1461
1462                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1463                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1464                                                  ZV_BYTE - this_pos_byte,
1465                                                  buf_charlen);
1466                 TRANSLATE (buf_ch, trt, buf_ch);
1467
1468                 if (buf_ch != pat_ch)
1469                   break;
1470
1471                 this_len_byte -= charlen;
1472                 this_len--;
1473                 p += charlen;
1474
1475                 this_pos_byte += buf_charlen;
1476                 this_pos++;
1477               }
1478
1479             if (this_len == 0)
1480               {
1481                 match_byte = this_pos_byte - pos_byte;
1482                 pos += len;
1483                 pos_byte += match_byte;
1484                 break;
1485               }
1486
1487             INC_BOTH (pos, pos_byte);
1488           }
1489
1490         n--;
1491       }
1492   else if (lim > pos)
1493     while (n > 0)
1494       {
1495         while (1)
1496           {
1497             /* Try matching at position POS.  */
1498             int this_pos = pos;
1499             int this_len = len;
1500             unsigned char *p = pat;
1501
1502             if (pos + len > lim)
1503               goto stop;
1504
1505             while (this_len > 0)
1506               {
1507                 int pat_ch = *p++;
1508                 int buf_ch = FETCH_BYTE (this_pos);
1509                 TRANSLATE (buf_ch, trt, buf_ch);
1510
1511                 if (buf_ch != pat_ch)
1512                   break;
1513
1514                 this_len--;
1515                 this_pos++;
1516               }
1517
1518             if (this_len == 0)
1519               {
1520                 match_byte = len;
1521                 pos += len;
1522                 break;
1523               }
1524
1525             pos++;
1526           }
1527
1528         n--;
1529       }
1530   /* Backwards search.  */
1531   else if (lim < pos && multibyte)
1532     while (n < 0)
1533       {
1534         while (1)
1535           {
1536             /* Try matching at position POS.  */
1537             int this_pos = pos - len;
1538             int this_pos_byte;
1539             int this_len = len;
1540             int this_len_byte = len_byte;
1541             unsigned char *p = pat;
1542
1543             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1544               goto stop;
1545             this_pos_byte = CHAR_TO_BYTE (this_pos);
1546             match_byte = pos_byte - this_pos_byte;
1547
1548             while (this_len > 0)
1549               {
1550                 int charlen, buf_charlen;
1551                 int pat_ch, buf_ch;
1552
1553                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1554                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1555                                                  ZV_BYTE - this_pos_byte,
1556                                                  buf_charlen);
1557                 TRANSLATE (buf_ch, trt, buf_ch);
1558
1559                 if (buf_ch != pat_ch)
1560                   break;
1561
1562                 this_len_byte -= charlen;
1563                 this_len--;
1564                 p += charlen;
1565                 this_pos_byte += buf_charlen;
1566                 this_pos++;
1567               }
1568
1569             if (this_len == 0)
1570               {
1571                 pos -= len;
1572                 pos_byte -= match_byte;
1573                 break;
1574               }
1575
1576             DEC_BOTH (pos, pos_byte);
1577           }
1578
1579         n++;
1580       }
1581   else if (lim < pos)
1582     while (n < 0)
1583       {
1584         while (1)
1585           {
1586             /* Try matching at position POS.  */
1587             int this_pos = pos - len;
1588             int this_len = len;
1589             unsigned char *p = pat;
1590
1591             if (this_pos < lim)
1592               goto stop;
1593
1594             while (this_len > 0)
1595               {
1596                 int pat_ch = *p++;
1597                 int buf_ch = FETCH_BYTE (this_pos);
1598                 TRANSLATE (buf_ch, trt, buf_ch);
1599
1600                 if (buf_ch != pat_ch)
1601                   break;
1602                 this_len--;
1603                 this_pos++;
1604               }
1605
1606             if (this_len == 0)
1607               {
1608                 match_byte = len;
1609                 pos -= len;
1610                 break;
1611               }
1612
1613             pos--;
1614           }
1615
1616         n++;
1617       }
1618
1619  stop:
1620   if (n == 0)
1621     {
1622       if (forward)
1623         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1624       else
1625         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1626
1627       return pos;
1628     }
1629   else if (n > 0)
1630     return -n;
1631   else
1632     return n;
1633 }
1634 \f
1635 /* Do Boyer-Moore search N times for the string BASE_PAT,
1636    whose length is LEN/LEN_BYTE,
1637    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1638    DIRECTION says which direction we search in.
1639    TRT and INVERSE_TRT are translation tables.
1640    Characters in PAT are already translated by TRT.
1641
1642    This kind of search works if all the characters in BASE_PAT that
1643    have nontrivial translation are the same aside from the last byte.
1644    This makes it possible to translate just the last byte of a
1645    character, and do so after just a simple test of the context.
1646    CHAR_BASE is nonzero if there is such a non-ASCII character.
1647
1648    If that criterion is not satisfied, do not call this function.  */
1649
1650 static int
1651 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1652              pos, pos_byte, lim, lim_byte, char_base)
1653      int n;
1654      unsigned char *base_pat;
1655      int len, len_byte;
1656      Lisp_Object trt;
1657      Lisp_Object inverse_trt;
1658      int pos, pos_byte;
1659      int lim, lim_byte;
1660      int char_base;
1661 {
1662   int direction = ((n > 0) ? 1 : -1);
1663   register int dirlen;
1664   int infinity, limit, stride_for_teases = 0;
1665   register int *BM_tab;
1666   int *BM_tab_base;
1667   register unsigned char *cursor, *p_limit;
1668   register int i, j;
1669   unsigned char *pat, *pat_end;
1670   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1671
1672   unsigned char simple_translate[0400];
1673   /* These are set to the preceding bytes of a byte to be translated
1674      if char_base is nonzero.  As the maximum byte length of a
1675      multibyte character is 5, we have to check at most four previous
1676      bytes.  */
1677   int translate_prev_byte1 = 0;
1678   int translate_prev_byte2 = 0;
1679   int translate_prev_byte3 = 0;
1680   int translate_prev_byte4 = 0;
1681
1682   BM_tab = (int *) alloca (0400 * sizeof (int));
1683
1684   /* The general approach is that we are going to maintain that we know */
1685   /* the first (closest to the present position, in whatever direction */
1686   /* we're searching) character that could possibly be the last */
1687   /* (furthest from present position) character of a valid match.  We */
1688   /* advance the state of our knowledge by looking at that character */
1689   /* and seeing whether it indeed matches the last character of the */
1690   /* pattern.  If it does, we take a closer look.  If it does not, we */
1691   /* move our pointer (to putative last characters) as far as is */
1692   /* logically possible.  This amount of movement, which I call a */
1693   /* stride, will be the length of the pattern if the actual character */
1694   /* appears nowhere in the pattern, otherwise it will be the distance */
1695   /* from the last occurrence of that character to the end of the */
1696   /* pattern. */
1697   /* As a coding trick, an enormous stride is coded into the table for */
1698   /* characters that match the last character.  This allows use of only */
1699   /* a single test, a test for having gone past the end of the */
1700   /* permissible match region, to test for both possible matches (when */
1701   /* the stride goes past the end immediately) and failure to */
1702   /* match (where you get nudged past the end one stride at a time). */
1703
1704   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1705   /* is determined only by the last character of the putative match. */
1706   /* If that character does not match, we will stride the proper */
1707   /* distance to propose a match that superimposes it on the last */
1708   /* instance of a character that matches it (per trt), or misses */
1709   /* it entirely if there is none. */
1710
1711   dirlen = len_byte * direction;
1712   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1713
1714   /* Record position after the end of the pattern.  */
1715   pat_end = base_pat + len_byte;
1716   /* BASE_PAT points to a character that we start scanning from.
1717      It is the first character in a forward search,
1718      the last character in a backward search.  */
1719   if (direction < 0)
1720     base_pat = pat_end - 1;
1721
1722   BM_tab_base = BM_tab;
1723   BM_tab += 0400;
1724   j = dirlen;           /* to get it in a register */
1725   /* A character that does not appear in the pattern induces a */
1726   /* stride equal to the pattern length. */
1727   while (BM_tab_base != BM_tab)
1728     {
1729       *--BM_tab = j;
1730       *--BM_tab = j;
1731       *--BM_tab = j;
1732       *--BM_tab = j;
1733     }
1734
1735   /* We use this for translation, instead of TRT itself.
1736      We fill this in to handle the characters that actually
1737      occur in the pattern.  Others don't matter anyway!  */
1738   bzero (simple_translate, sizeof simple_translate);
1739   for (i = 0; i < 0400; i++)
1740     simple_translate[i] = i;
1741
1742   if (char_base)
1743     {
1744       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1745          byte following them are the target of translation.  */
1746       unsigned char str[MAX_MULTIBYTE_LENGTH];
1747       int len = CHAR_STRING (char_base, str);
1748
1749       translate_prev_byte1 = str[len - 2];
1750       if (len > 2)
1751         {
1752           translate_prev_byte2 = str[len - 3];
1753           if (len > 3)
1754             {
1755               translate_prev_byte3 = str[len - 4];
1756               if (len > 4)
1757                 translate_prev_byte4 = str[len - 5];
1758             }
1759         }
1760     }
1761
1762   i = 0;
1763   while (i != infinity)
1764     {
1765       unsigned char *ptr = base_pat + i;
1766       i += direction;
1767       if (i == dirlen)
1768         i = infinity;
1769       if (! NILP (trt))
1770         {
1771           /* If the byte currently looking at is the last of a
1772              character to check case-equivalents, set CH to that
1773              character.  An ASCII character and a non-ASCII character
1774              matching with CHAR_BASE are to be checked.  */
1775           int ch = -1;
1776
1777           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1778             ch = *ptr;
1779           else if (char_base
1780                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1781             {
1782               unsigned char *charstart = ptr - 1;
1783
1784               while (! (CHAR_HEAD_P (*charstart)))
1785                 charstart--;
1786               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1787               if (char_base != (ch & ~0x3F))
1788                 ch = -1;
1789             }
1790
1791           if (ch >= 0200)
1792             j = (ch & 0x3F) | 0200;
1793           else
1794             j = *ptr;
1795
1796           if (i == infinity)
1797             stride_for_teases = BM_tab[j];
1798
1799           BM_tab[j] = dirlen - i;
1800           /* A translation table is accompanied by its inverse -- see */
1801           /* comment following downcase_table for details */
1802           if (ch >= 0)
1803             {
1804               int starting_ch = ch;
1805               int starting_j = j;
1806
1807               while (1)
1808                 {
1809                   TRANSLATE (ch, inverse_trt, ch);
1810                   if (ch >= 0200)
1811                     j = (ch & 0x3F) | 0200;
1812                   else
1813                     j = ch;
1814
1815                   /* For all the characters that map into CH,
1816                      set up simple_translate to map the last byte
1817                      into STARTING_J.  */
1818                   simple_translate[j] = starting_j;
1819                   if (ch == starting_ch)
1820                     break;
1821                   BM_tab[j] = dirlen - i;
1822                 }
1823             }
1824         }
1825       else
1826         {
1827           j = *ptr;
1828
1829           if (i == infinity)
1830             stride_for_teases = BM_tab[j];
1831           BM_tab[j] = dirlen - i;
1832         }
1833       /* stride_for_teases tells how much to stride if we get a */
1834       /* match on the far character but are subsequently */
1835       /* disappointed, by recording what the stride would have been */
1836       /* for that character if the last character had been */
1837       /* different. */
1838     }
1839   infinity = dirlen - infinity;
1840   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1841   /* loop invariant - POS_BYTE points at where last char (first
1842      char if reverse) of pattern would align in a possible match.  */
1843   while (n != 0)
1844     {
1845       int tail_end;
1846       unsigned char *tail_end_ptr;
1847
1848       /* It's been reported that some (broken) compiler thinks that
1849          Boolean expressions in an arithmetic context are unsigned.
1850          Using an explicit ?1:0 prevents this.  */
1851       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1852           < 0)
1853         return (n * (0 - direction));
1854       /* First we do the part we can by pointers (maybe nothing) */
1855       QUIT;
1856       pat = base_pat;
1857       limit = pos_byte - dirlen + direction;
1858       if (direction > 0)
1859         {
1860           limit = BUFFER_CEILING_OF (limit);
1861           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1862              can take on without hitting edge of buffer or the gap.  */
1863           limit = min (limit, pos_byte + 20000);
1864           limit = min (limit, lim_byte - 1);
1865         }
1866       else
1867         {
1868           limit = BUFFER_FLOOR_OF (limit);
1869           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1870              can take on without hitting edge of buffer or the gap.  */
1871           limit = max (limit, pos_byte - 20000);
1872           limit = max (limit, lim_byte);
1873         }
1874       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1875       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1876
1877       if ((limit - pos_byte) * direction > 20)
1878         {
1879           unsigned char *p2;
1880
1881           p_limit = BYTE_POS_ADDR (limit);
1882           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1883           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1884           while (1)             /* use one cursor setting as long as i can */
1885             {
1886               if (direction > 0) /* worth duplicating */
1887                 {
1888                   /* Use signed comparison if appropriate
1889                      to make cursor+infinity sure to be > p_limit.
1890                      Assuming that the buffer lies in a range of addresses
1891                      that are all "positive" (as ints) or all "negative",
1892                      either kind of comparison will work as long
1893                      as we don't step by infinity.  So pick the kind
1894                      that works when we do step by infinity.  */
1895                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1896                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1897                       cursor += BM_tab[*cursor];
1898                   else
1899                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1900                       cursor += BM_tab[*cursor];
1901                 }
1902               else
1903                 {
1904                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1905                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1906                       cursor += BM_tab[*cursor];
1907                   else
1908                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1909                       cursor += BM_tab[*cursor];
1910                 }
1911 /* If you are here, cursor is beyond the end of the searched region. */
1912 /* This can happen if you match on the far character of the pattern, */
1913 /* because the "stride" of that character is infinity, a number able */
1914 /* to throw you well beyond the end of the search.  It can also */
1915 /* happen if you fail to match within the permitted region and would */
1916 /* otherwise try a character beyond that region */
1917               if ((cursor - p_limit) * direction <= len_byte)
1918                 break;  /* a small overrun is genuine */
1919               cursor -= infinity; /* large overrun = hit */
1920               i = dirlen - direction;
1921               if (! NILP (trt))
1922                 {
1923                   while ((i -= direction) + direction != 0)
1924                     {
1925                       int ch;
1926                       cursor -= direction;
1927                       /* Translate only the last byte of a character.  */
1928                       if (! multibyte
1929                           || ((cursor == tail_end_ptr
1930                                || CHAR_HEAD_P (cursor[1]))
1931                               && (CHAR_HEAD_P (cursor[0])
1932                                   /* Check if this is the last byte of
1933                                      a translable character.  */
1934                                   || (translate_prev_byte1 == cursor[-1]
1935                                       && (CHAR_HEAD_P (translate_prev_byte1)
1936                                           || (translate_prev_byte2 == cursor[-2]
1937                                               && (CHAR_HEAD_P (translate_prev_byte2)
1938                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1939                         ch = simple_translate[*cursor];
1940                       else
1941                         ch = *cursor;
1942                       if (pat[i] != ch)
1943                         break;
1944                     }
1945                 }
1946               else
1947                 {
1948                   while ((i -= direction) + direction != 0)
1949                     {
1950                       cursor -= direction;
1951                       if (pat[i] != *cursor)
1952                         break;
1953                     }
1954                 }
1955               cursor += dirlen - i - direction; /* fix cursor */
1956               if (i + direction == 0)
1957                 {
1958                   int position, start, end;
1959
1960                   cursor -= direction;
1961
1962                   position = pos_byte + cursor - p2 + ((direction > 0)
1963                                                        ? 1 - len_byte : 0);
1964                   set_search_regs (position, len_byte);
1965
1966                   if (NILP (Vinhibit_changing_match_data))
1967                     {
1968                       start = search_regs.start[0];
1969                       end = search_regs.end[0];
1970                     }
1971                   else
1972                     /* If Vinhibit_changing_match_data is non-nil,
1973                        search_regs will not be changed.  So let's
1974                        compute start and end here.  */
1975                     {
1976                       start = BYTE_TO_CHAR (position);
1977                       end = BYTE_TO_CHAR (position + len_byte);
1978                     }
1979
1980                   if ((n -= direction) != 0)
1981                     cursor += dirlen; /* to resume search */
1982                   else
1983                     return direction > 0 ? end : start;
1984                 }
1985               else
1986                 cursor += stride_for_teases; /* <sigh> we lose -  */
1987             }
1988           pos_byte += cursor - p2;
1989         }
1990       else
1991         /* Now we'll pick up a clump that has to be done the hard */
1992         /* way because it covers a discontinuity */
1993         {
1994           limit = ((direction > 0)
1995                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1996                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1997           limit = ((direction > 0)
1998                    ? min (limit + len_byte, lim_byte - 1)
1999                    : max (limit - len_byte, lim_byte));
2000           /* LIMIT is now the last value POS_BYTE can have
2001              and still be valid for a possible match.  */
2002           while (1)
2003             {
2004               /* This loop can be coded for space rather than */
2005               /* speed because it will usually run only once. */
2006               /* (the reach is at most len + 21, and typically */
2007               /* does not exceed len) */
2008               while ((limit - pos_byte) * direction >= 0)
2009                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2010               /* now run the same tests to distinguish going off the */
2011               /* end, a match or a phony match. */
2012               if ((pos_byte - limit) * direction <= len_byte)
2013                 break;  /* ran off the end */
2014               /* Found what might be a match.
2015                  Set POS_BYTE back to last (first if reverse) pos.  */
2016               pos_byte -= infinity;
2017               i = dirlen - direction;
2018               while ((i -= direction) + direction != 0)
2019                 {
2020                   int ch;
2021                   unsigned char *ptr;
2022                   pos_byte -= direction;
2023                   ptr = BYTE_POS_ADDR (pos_byte);
2024                   /* Translate only the last byte of a character.  */
2025                   if (! multibyte
2026                       || ((ptr == tail_end_ptr
2027                            || CHAR_HEAD_P (ptr[1]))
2028                           && (CHAR_HEAD_P (ptr[0])
2029                               /* Check if this is the last byte of a
2030                                  translable character.  */
2031                               || (translate_prev_byte1 == ptr[-1]
2032                                   && (CHAR_HEAD_P (translate_prev_byte1)
2033                                       || (translate_prev_byte2 == ptr[-2]
2034                                           && (CHAR_HEAD_P (translate_prev_byte2)
2035                                               || translate_prev_byte3 == ptr[-3])))))))
2036                     ch = simple_translate[*ptr];
2037                   else
2038                     ch = *ptr;
2039                   if (pat[i] != ch)
2040                     break;
2041                 }
2042               /* Above loop has moved POS_BYTE part or all the way
2043                  back to the first pos (last pos if reverse).
2044                  Set it once again at the last (first if reverse) char.  */
2045               pos_byte += dirlen - i- direction;
2046               if (i + direction == 0)
2047                 {
2048                   int position, start, end;
2049                   pos_byte -= direction;
2050
2051                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2052                   set_search_regs (position, len_byte);
2053
2054                   if (NILP (Vinhibit_changing_match_data))
2055                     {
2056                       start = search_regs.start[0];
2057                       end = search_regs.end[0];
2058                     }
2059                   else
2060                     /* If Vinhibit_changing_match_data is non-nil,
2061                        search_regs will not be changed.  So let's
2062                        compute start and end here.  */
2063                     {
2064                       start = BYTE_TO_CHAR (position);
2065                       end = BYTE_TO_CHAR (position + len_byte);
2066                     }
2067
2068                   if ((n -= direction) != 0)
2069                     pos_byte += dirlen; /* to resume search */
2070                   else
2071                     return direction > 0 ? end : start;
2072                 }
2073               else
2074                 pos_byte += stride_for_teases;
2075             }
2076           }
2077       /* We have done one clump.  Can we continue? */
2078       if ((lim_byte - pos_byte) * direction < 0)
2079         return ((0 - n) * direction);
2080     }
2081   return BYTE_TO_CHAR (pos_byte);
2082 }
2083
2084 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2085    for the overall match just found in the current buffer.
2086    Also clear out the match data for registers 1 and up.  */
2087
2088 static void
2089 set_search_regs (beg_byte, nbytes)
2090      int beg_byte, nbytes;
2091 {
2092   int i;
2093
2094   if (!NILP (Vinhibit_changing_match_data))
2095     return;
2096
2097   /* Make sure we have registers in which to store
2098      the match position.  */
2099   if (search_regs.num_regs == 0)
2100     {
2101       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2102       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2103       search_regs.num_regs = 2;
2104     }
2105
2106   /* Clear out the other registers.  */
2107   for (i = 1; i < search_regs.num_regs; i++)
2108     {
2109       search_regs.start[i] = -1;
2110       search_regs.end[i] = -1;
2111     }
2112
2113   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2114   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2115   XSETBUFFER (last_thing_searched, current_buffer);
2116 }
2117 \f
2118 /* Given a string of words separated by word delimiters,
2119   compute a regexp that matches those exact words
2120   separated by arbitrary punctuation.  */
2121
2122 static Lisp_Object
2123 wordify (string)
2124      Lisp_Object string;
2125 {
2126   register unsigned char *p, *o;
2127   register int i, i_byte, len, punct_count = 0, word_count = 0;
2128   Lisp_Object val;
2129   int prev_c = 0;
2130   int adjust;
2131
2132   CHECK_STRING (string);
2133   p = SDATA (string);
2134   len = SCHARS (string);
2135
2136   for (i = 0, i_byte = 0; i < len; )
2137     {
2138       int c;
2139
2140       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2141
2142       if (SYNTAX (c) != Sword)
2143         {
2144           punct_count++;
2145           if (i > 0 && SYNTAX (prev_c) == Sword)
2146             word_count++;
2147         }
2148
2149       prev_c = c;
2150     }
2151
2152   if (SYNTAX (prev_c) == Sword)
2153     word_count++;
2154   if (!word_count)
2155     return empty_unibyte_string;
2156
2157   adjust = - punct_count + 5 * (word_count - 1) + 4;
2158   if (STRING_MULTIBYTE (string))
2159     val = make_uninit_multibyte_string (len + adjust,
2160                                         SBYTES (string)
2161                                         + adjust);
2162   else
2163     val = make_uninit_string (len + adjust);
2164
2165   o = SDATA (val);
2166   *o++ = '\\';
2167   *o++ = 'b';
2168   prev_c = 0;
2169
2170   for (i = 0, i_byte = 0; i < len; )
2171     {
2172       int c;
2173       int i_byte_orig = i_byte;
2174
2175       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2176
2177       if (SYNTAX (c) == Sword)
2178         {
2179           bcopy (SDATA (string) + i_byte_orig, o,
2180                  i_byte - i_byte_orig);
2181           o += i_byte - i_byte_orig;
2182         }
2183       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2184         {
2185           *o++ = '\\';
2186           *o++ = 'W';
2187           *o++ = '\\';
2188           *o++ = 'W';
2189           *o++ = '*';
2190         }
2191
2192       prev_c = c;
2193     }
2194
2195   *o++ = '\\';
2196   *o++ = 'b';
2197
2198   return val;
2199 }
2200 \f
2201 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2202        "MSearch backward: ",
2203        doc: /* Search backward from point for STRING.
2204 Set point to the beginning of the occurrence found, and return point.
2205 An optional second argument bounds the search; it is a buffer position.
2206 The match found must not extend before that position.
2207 Optional third argument, if t, means if fail just return nil (no error).
2208  If not nil and not t, position at limit of search and return nil.
2209 Optional fourth argument is repeat count--search for successive occurrences.
2210
2211 Search case-sensitivity is determined by the value of the variable
2212 `case-fold-search', which see.
2213
2214 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2215      (string, bound, noerror, count)
2216      Lisp_Object string, bound, noerror, count;
2217 {
2218   return search_command (string, bound, noerror, count, -1, 0, 0);
2219 }
2220
2221 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2222        doc: /* Search forward from point for STRING.
2223 Set point to the end of the occurrence found, and return point.
2224 An optional second argument bounds the search; it is a buffer position.
2225 The match found must not extend after that position.  A value of nil is
2226   equivalent to (point-max).
2227 Optional third argument, if t, means if fail just return nil (no error).
2228   If not nil and not t, move to limit of search and return nil.
2229 Optional fourth argument is repeat count--search for successive occurrences.
2230
2231 Search case-sensitivity is determined by the value of the variable
2232 `case-fold-search', which see.
2233
2234 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2235      (string, bound, noerror, count)
2236      Lisp_Object string, bound, noerror, count;
2237 {
2238   return search_command (string, bound, noerror, count, 1, 0, 0);
2239 }
2240
2241 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2242        "sWord search backward: ",
2243        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2244 Set point to the beginning of the occurrence found, and return point.
2245 An optional second argument bounds the search; it is a buffer position.
2246 The match found must not extend before that position.
2247 Optional third argument, if t, means if fail just return nil (no error).
2248   If not nil and not t, move to limit of search and return nil.
2249 Optional fourth argument is repeat count--search for successive occurrences.  */)
2250      (string, bound, noerror, count)
2251      Lisp_Object string, bound, noerror, count;
2252 {
2253   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2254 }
2255
2256 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2257        "sWord search: ",
2258        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2259 Set point to the end of the occurrence found, and return point.
2260 An optional second argument bounds the search; it is a buffer position.
2261 The match found must not extend after that position.
2262 Optional third argument, if t, means if fail just return nil (no error).
2263   If not nil and not t, move to limit of search and return nil.
2264 Optional fourth argument is repeat count--search for successive occurrences.  */)
2265      (string, bound, noerror, count)
2266      Lisp_Object string, bound, noerror, count;
2267 {
2268   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2269 }
2270
2271 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2272        "sRE search backward: ",
2273        doc: /* Search backward from point for match for regular expression REGEXP.
2274 Set point to the beginning of the match, and return point.
2275 The match found is the one starting last in the buffer
2276 and yet ending before the origin of the search.
2277 An optional second argument bounds the search; it is a buffer position.
2278 The match found must start at or after that position.
2279 Optional third argument, if t, means if fail just return nil (no error).
2280   If not nil and not t, move to limit of search and return nil.
2281 Optional fourth argument is repeat count--search for successive occurrences.
2282 See also the functions `match-beginning', `match-end', `match-string',
2283 and `replace-match'.  */)
2284      (regexp, bound, noerror, count)
2285      Lisp_Object regexp, bound, noerror, count;
2286 {
2287   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2288 }
2289
2290 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2291        "sRE search: ",
2292        doc: /* Search forward from point for regular expression REGEXP.
2293 Set point to the end of the occurrence found, and return point.
2294 An optional second argument bounds the search; it is a buffer position.
2295 The match found must not extend after that position.
2296 Optional third argument, if t, means if fail just return nil (no error).
2297   If not nil and not t, move to limit of search and return nil.
2298 Optional fourth argument is repeat count--search for successive occurrences.
2299 See also the functions `match-beginning', `match-end', `match-string',
2300 and `replace-match'.  */)
2301      (regexp, bound, noerror, count)
2302      Lisp_Object regexp, bound, noerror, count;
2303 {
2304   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2305 }
2306
2307 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2308        "sPosix search backward: ",
2309        doc: /* Search backward from point for match for regular expression REGEXP.
2310 Find the longest match in accord with Posix regular expression rules.
2311 Set point to the beginning of the match, and return point.
2312 The match found is the one starting last in the buffer
2313 and yet ending before the origin of the search.
2314 An optional second argument bounds the search; it is a buffer position.
2315 The match found must start at or after that position.
2316 Optional third argument, if t, means if fail just return nil (no error).
2317   If not nil and not t, move to limit of search and return nil.
2318 Optional fourth argument is repeat count--search for successive occurrences.
2319 See also the functions `match-beginning', `match-end', `match-string',
2320 and `replace-match'.  */)
2321      (regexp, bound, noerror, count)
2322      Lisp_Object regexp, bound, noerror, count;
2323 {
2324   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2325 }
2326
2327 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2328        "sPosix search: ",
2329        doc: /* Search forward from point for regular expression REGEXP.
2330 Find the longest match in accord with Posix regular expression rules.
2331 Set point to the end of the occurrence found, and return point.
2332 An optional second argument bounds the search; it is a buffer position.
2333 The match found must not extend after that position.
2334 Optional third argument, if t, means if fail just return nil (no error).
2335   If not nil and not t, move to limit of search and return nil.
2336 Optional fourth argument is repeat count--search for successive occurrences.
2337 See also the functions `match-beginning', `match-end', `match-string',
2338 and `replace-match'.  */)
2339      (regexp, bound, noerror, count)
2340      Lisp_Object regexp, bound, noerror, count;
2341 {
2342   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2343 }
2344 \f
2345 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2346        doc: /* Replace text matched by last search with NEWTEXT.
2347 Leave point at the end of the replacement text.
2348
2349 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2350 Otherwise maybe capitalize the whole text, or maybe just word initials,
2351 based on the replaced text.
2352 If the replaced text has only capital letters
2353 and has at least one multiletter word, convert NEWTEXT to all caps.
2354 Otherwise if all words are capitalized in the replaced text,
2355 capitalize each word in NEWTEXT.
2356
2357 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2358 Otherwise treat `\\' as special:
2359   `\\&' in NEWTEXT means substitute original matched text.
2360   `\\N' means substitute what matched the Nth `\\(...\\)'.
2361        If Nth parens didn't match, substitute nothing.
2362   `\\\\' means insert one `\\'.
2363 Case conversion does not apply to these substitutions.
2364
2365 FIXEDCASE and LITERAL are optional arguments.
2366
2367 The optional fourth argument STRING can be a string to modify.
2368 This is meaningful when the previous match was done against STRING,
2369 using `string-match'.  When used this way, `replace-match'
2370 creates and returns a new string made by copying STRING and replacing
2371 the part of STRING that was matched.
2372
2373 The optional fifth argument SUBEXP specifies a subexpression;
2374 it says to replace just that subexpression with NEWTEXT,
2375 rather than replacing the entire matched text.
2376 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2377 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2378 NEWTEXT in place of subexp N.
2379 This is useful only after a regular expression search or match,
2380 since only regular expressions have distinguished subexpressions.  */)
2381      (newtext, fixedcase, literal, string, subexp)
2382      Lisp_Object newtext, fixedcase, literal, string, subexp;
2383 {
2384   enum { nochange, all_caps, cap_initial } case_action;
2385   register int pos, pos_byte;
2386   int some_multiletter_word;
2387   int some_lowercase;
2388   int some_uppercase;
2389   int some_nonuppercase_initial;
2390   register int c, prevc;
2391   int sub;
2392   int opoint, newpoint;
2393
2394   CHECK_STRING (newtext);
2395
2396   if (! NILP (string))
2397     CHECK_STRING (string);
2398
2399   case_action = nochange;       /* We tried an initialization */
2400                                 /* but some C compilers blew it */
2401
2402   if (search_regs.num_regs <= 0)
2403     error ("`replace-match' called before any match found");
2404
2405   if (NILP (subexp))
2406     sub = 0;
2407   else
2408     {
2409       CHECK_NUMBER (subexp);
2410       sub = XINT (subexp);
2411       if (sub < 0 || sub >= search_regs.num_regs)
2412         args_out_of_range (subexp, make_number (search_regs.num_regs));
2413     }
2414
2415   if (NILP (string))
2416     {
2417       if (search_regs.start[sub] < BEGV
2418           || search_regs.start[sub] > search_regs.end[sub]
2419           || search_regs.end[sub] > ZV)
2420         args_out_of_range (make_number (search_regs.start[sub]),
2421                            make_number (search_regs.end[sub]));
2422     }
2423   else
2424     {
2425       if (search_regs.start[sub] < 0
2426           || search_regs.start[sub] > search_regs.end[sub]
2427           || search_regs.end[sub] > SCHARS (string))
2428         args_out_of_range (make_number (search_regs.start[sub]),
2429                            make_number (search_regs.end[sub]));
2430     }
2431
2432   if (NILP (fixedcase))
2433     {
2434       /* Decide how to casify by examining the matched text. */
2435       int last;
2436
2437       pos = search_regs.start[sub];
2438       last = search_regs.end[sub];
2439
2440       if (NILP (string))
2441         pos_byte = CHAR_TO_BYTE (pos);
2442       else
2443         pos_byte = string_char_to_byte (string, pos);
2444
2445       prevc = '\n';
2446       case_action = all_caps;
2447
2448       /* some_multiletter_word is set nonzero if any original word
2449          is more than one letter long. */
2450       some_multiletter_word = 0;
2451       some_lowercase = 0;
2452       some_nonuppercase_initial = 0;
2453       some_uppercase = 0;
2454
2455       while (pos < last)
2456         {
2457           if (NILP (string))
2458             {
2459               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2460               INC_BOTH (pos, pos_byte);
2461             }
2462           else
2463             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2464
2465           if (LOWERCASEP (c))
2466             {
2467               /* Cannot be all caps if any original char is lower case */
2468
2469               some_lowercase = 1;
2470               if (SYNTAX (prevc) != Sword)
2471                 some_nonuppercase_initial = 1;
2472               else
2473                 some_multiletter_word = 1;
2474             }
2475           else if (UPPERCASEP (c))
2476             {
2477               some_uppercase = 1;
2478               if (SYNTAX (prevc) != Sword)
2479                 ;
2480               else
2481                 some_multiletter_word = 1;
2482             }
2483           else
2484             {
2485               /* If the initial is a caseless word constituent,
2486                  treat that like a lowercase initial.  */
2487               if (SYNTAX (prevc) != Sword)
2488                 some_nonuppercase_initial = 1;
2489             }
2490
2491           prevc = c;
2492         }
2493
2494       /* Convert to all caps if the old text is all caps
2495          and has at least one multiletter word.  */
2496       if (! some_lowercase && some_multiletter_word)
2497         case_action = all_caps;
2498       /* Capitalize each word, if the old text has all capitalized words.  */
2499       else if (!some_nonuppercase_initial && some_multiletter_word)
2500         case_action = cap_initial;
2501       else if (!some_nonuppercase_initial && some_uppercase)
2502         /* Should x -> yz, operating on X, give Yz or YZ?
2503            We'll assume the latter.  */
2504         case_action = all_caps;
2505       else
2506         case_action = nochange;
2507     }
2508
2509   /* Do replacement in a string.  */
2510   if (!NILP (string))
2511     {
2512       Lisp_Object before, after;
2513
2514       before = Fsubstring (string, make_number (0),
2515                            make_number (search_regs.start[sub]));
2516       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2517
2518       /* Substitute parts of the match into NEWTEXT
2519          if desired.  */
2520       if (NILP (literal))
2521         {
2522           int lastpos = 0;
2523           int lastpos_byte = 0;
2524           /* We build up the substituted string in ACCUM.  */
2525           Lisp_Object accum;
2526           Lisp_Object middle;
2527           int length = SBYTES (newtext);
2528
2529           accum = Qnil;
2530
2531           for (pos_byte = 0, pos = 0; pos_byte < length;)
2532             {
2533               int substart = -1;
2534               int subend = 0;
2535               int delbackslash = 0;
2536
2537               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2538
2539               if (c == '\\')
2540                 {
2541                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2542
2543                   if (c == '&')
2544                     {
2545                       substart = search_regs.start[sub];
2546                       subend = search_regs.end[sub];
2547                     }
2548                   else if (c >= '1' && c <= '9')
2549                     {
2550                       if (search_regs.start[c - '0'] >= 0
2551                           && c <= search_regs.num_regs + '0')
2552                         {
2553                           substart = search_regs.start[c - '0'];
2554                           subend = search_regs.end[c - '0'];
2555                         }
2556                       else
2557                         {
2558                           /* If that subexp did not match,
2559                              replace \\N with nothing.  */
2560                           substart = 0;
2561                           subend = 0;
2562                         }
2563                     }
2564                   else if (c == '\\')
2565                     delbackslash = 1;
2566                   else
2567                     error ("Invalid use of `\\' in replacement text");
2568                 }
2569               if (substart >= 0)
2570                 {
2571                   if (pos - 2 != lastpos)
2572                     middle = substring_both (newtext, lastpos,
2573                                              lastpos_byte,
2574                                              pos - 2, pos_byte - 2);
2575                   else
2576                     middle = Qnil;
2577                   accum = concat3 (accum, middle,
2578                                    Fsubstring (string,
2579                                                make_number (substart),
2580                                                make_number (subend)));
2581                   lastpos = pos;
2582                   lastpos_byte = pos_byte;
2583                 }
2584               else if (delbackslash)
2585                 {
2586                   middle = substring_both (newtext, lastpos,
2587                                            lastpos_byte,
2588                                            pos - 1, pos_byte - 1);
2589
2590                   accum = concat2 (accum, middle);
2591                   lastpos = pos;
2592                   lastpos_byte = pos_byte;
2593                 }
2594             }
2595
2596           if (pos != lastpos)
2597             middle = substring_both (newtext, lastpos,
2598                                      lastpos_byte,
2599                                      pos, pos_byte);
2600           else
2601             middle = Qnil;
2602
2603           newtext = concat2 (accum, middle);
2604         }
2605
2606       /* Do case substitution in NEWTEXT if desired.  */
2607       if (case_action == all_caps)
2608         newtext = Fupcase (newtext);
2609       else if (case_action == cap_initial)
2610         newtext = Fupcase_initials (newtext);
2611
2612       return concat3 (before, newtext, after);
2613     }
2614
2615   /* Record point, then move (quietly) to the start of the match.  */
2616   if (PT >= search_regs.end[sub])
2617     opoint = PT - ZV;
2618   else if (PT > search_regs.start[sub])
2619     opoint = search_regs.end[sub] - ZV;
2620   else
2621     opoint = PT;
2622
2623   /* If we want non-literal replacement,
2624      perform substitution on the replacement string.  */
2625   if (NILP (literal))
2626     {
2627       int length = SBYTES (newtext);
2628       unsigned char *substed;
2629       int substed_alloc_size, substed_len;
2630       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2631       int str_multibyte = STRING_MULTIBYTE (newtext);
2632       Lisp_Object rev_tbl;
2633       int really_changed = 0;
2634
2635       rev_tbl = Qnil;
2636
2637       substed_alloc_size = length * 2 + 100;
2638       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2639       substed_len = 0;
2640
2641       /* Go thru NEWTEXT, producing the actual text to insert in
2642          SUBSTED while adjusting multibyteness to that of the current
2643          buffer.  */
2644
2645       for (pos_byte = 0, pos = 0; pos_byte < length;)
2646         {
2647           unsigned char str[MAX_MULTIBYTE_LENGTH];
2648           unsigned char *add_stuff = NULL;
2649           int add_len = 0;
2650           int idx = -1;
2651
2652           if (str_multibyte)
2653             {
2654               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2655               if (!buf_multibyte)
2656                 c = multibyte_char_to_unibyte (c, rev_tbl);
2657             }
2658           else
2659             {
2660               /* Note that we don't have to increment POS.  */
2661               c = SREF (newtext, pos_byte++);
2662               if (buf_multibyte)
2663                 c = unibyte_char_to_multibyte (c);
2664             }
2665
2666           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2667              or set IDX to a match index, which means put that part
2668              of the buffer text into SUBSTED.  */
2669
2670           if (c == '\\')
2671             {
2672               really_changed = 1;
2673
2674               if (str_multibyte)
2675                 {
2676                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2677                                                       pos, pos_byte);
2678                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2679                     c = multibyte_char_to_unibyte (c, rev_tbl);
2680                 }
2681               else
2682                 {
2683                   c = SREF (newtext, pos_byte++);
2684                   if (buf_multibyte)
2685                     c = unibyte_char_to_multibyte (c);
2686                 }
2687
2688               if (c == '&')
2689                 idx = sub;
2690               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2691                 {
2692                   if (search_regs.start[c - '0'] >= 1)
2693                     idx = c - '0';
2694                 }
2695               else if (c == '\\')
2696                 add_len = 1, add_stuff = "\\";
2697               else
2698                 {
2699                   xfree (substed);
2700                   error ("Invalid use of `\\' in replacement text");
2701                 }
2702             }
2703           else
2704             {
2705               add_len = CHAR_STRING (c, str);
2706               add_stuff = str;
2707             }
2708
2709           /* If we want to copy part of a previous match,
2710              set up ADD_STUFF and ADD_LEN to point to it.  */
2711           if (idx >= 0)
2712             {
2713               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2714               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2715               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2716                 move_gap (search_regs.start[idx]);
2717               add_stuff = BYTE_POS_ADDR (begbyte);
2718             }
2719
2720           /* Now the stuff we want to add to SUBSTED
2721              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2722
2723           /* Make sure SUBSTED is big enough.  */
2724           if (substed_len + add_len >= substed_alloc_size)
2725             {
2726               substed_alloc_size = substed_len + add_len + 500;
2727               substed = (unsigned char *) xrealloc (substed,
2728                                                     substed_alloc_size + 1);
2729             }
2730
2731           /* Now add to the end of SUBSTED.  */
2732           if (add_stuff)
2733             {
2734               bcopy (add_stuff, substed + substed_len, add_len);
2735               substed_len += add_len;
2736             }
2737         }
2738
2739       if (really_changed)
2740         {
2741           if (buf_multibyte)
2742             {
2743               int nchars = multibyte_chars_in_text (substed, substed_len);
2744
2745               newtext = make_multibyte_string (substed, nchars, substed_len);
2746             }
2747           else
2748             newtext = make_unibyte_string (substed, substed_len);
2749         }
2750       xfree (substed);
2751     }
2752
2753   /* Replace the old text with the new in the cleanest possible way.  */
2754   replace_range (search_regs.start[sub], search_regs.end[sub],
2755                  newtext, 1, 0, 1);
2756   newpoint = search_regs.start[sub] + SCHARS (newtext);
2757
2758   if (case_action == all_caps)
2759     Fupcase_region (make_number (search_regs.start[sub]),
2760                     make_number (newpoint));
2761   else if (case_action == cap_initial)
2762     Fupcase_initials_region (make_number (search_regs.start[sub]),
2763                              make_number (newpoint));
2764
2765   /* Adjust search data for this change.  */
2766   {
2767     int oldend = search_regs.end[sub];
2768     int oldstart = search_regs.start[sub];
2769     int change = newpoint - search_regs.end[sub];
2770     int i;
2771
2772     for (i = 0; i < search_regs.num_regs; i++)
2773       {
2774         if (search_regs.start[i] >= oldend)
2775           search_regs.start[i] += change;
2776         else if (search_regs.start[i] > oldstart)
2777           search_regs.start[i] = oldstart;
2778         if (search_regs.end[i] >= oldend)
2779           search_regs.end[i] += change;
2780         else if (search_regs.end[i] > oldstart)
2781           search_regs.end[i] = oldstart;
2782       }
2783   }
2784
2785   /* Put point back where it was in the text.  */
2786   if (opoint <= 0)
2787     TEMP_SET_PT (opoint + ZV);
2788   else
2789     TEMP_SET_PT (opoint);
2790
2791   /* Now move point "officially" to the start of the inserted replacement.  */
2792   move_if_not_intangible (newpoint);
2793
2794   return Qnil;
2795 }
2796 \f
2797 static Lisp_Object
2798 match_limit (num, beginningp)
2799      Lisp_Object num;
2800      int beginningp;
2801 {
2802   register int n;
2803
2804   CHECK_NUMBER (num);
2805   n = XINT (num);
2806   if (n < 0)
2807     args_out_of_range (num, make_number (0));
2808   if (search_regs.num_regs <= 0)
2809     error ("No match data, because no search succeeded");
2810   if (n >= search_regs.num_regs
2811       || search_regs.start[n] < 0)
2812     return Qnil;
2813   return (make_number ((beginningp) ? search_regs.start[n]
2814                                     : search_regs.end[n]));
2815 }
2816
2817 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2818        doc: /* Return position of start of text matched by last search.
2819 SUBEXP, a number, specifies which parenthesized expression in the last
2820   regexp.
2821 Value is nil if SUBEXPth pair didn't match, or there were less than
2822   SUBEXP pairs.
2823 Zero means the entire text matched by the whole regexp or whole string.  */)
2824      (subexp)
2825      Lisp_Object subexp;
2826 {
2827   return match_limit (subexp, 1);
2828 }
2829
2830 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2831        doc: /* Return position of end of text matched by last search.
2832 SUBEXP, a number, specifies which parenthesized expression in the last
2833   regexp.
2834 Value is nil if SUBEXPth pair didn't match, or there were less than
2835   SUBEXP pairs.
2836 Zero means the entire text matched by the whole regexp or whole string.  */)
2837      (subexp)
2838      Lisp_Object subexp;
2839 {
2840   return match_limit (subexp, 0);
2841 }
2842
2843 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2844        doc: /* Return a list containing all info on what the last search matched.
2845 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2846 All the elements are markers or nil (nil if the Nth pair didn't match)
2847 if the last match was on a buffer; integers or nil if a string was matched.
2848 Use `store-match-data' to reinstate the data in this list.
2849
2850 If INTEGERS (the optional first argument) is non-nil, always use
2851 integers \(rather than markers) to represent buffer positions.  In
2852 this case, and if the last match was in a buffer, the buffer will get
2853 stored as one additional element at the end of the list.
2854
2855 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2856 enough to hold all the values, and if INTEGERS is non-nil, no consing
2857 is done.
2858
2859 If optional third arg RESEAT is non-nil, any previous markers on the
2860 REUSE list will be modified to point to nowhere.
2861
2862 Return value is undefined if the last search failed.  */)
2863   (integers, reuse, reseat)
2864      Lisp_Object integers, reuse, reseat;
2865 {
2866   Lisp_Object tail, prev;
2867   Lisp_Object *data;
2868   int i, len;
2869
2870   if (!NILP (reseat))
2871     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2872       if (MARKERP (XCAR (tail)))
2873         {
2874           unchain_marker (XMARKER (XCAR (tail)));
2875           XSETCAR (tail, Qnil);
2876         }
2877
2878   if (NILP (last_thing_searched))
2879     return Qnil;
2880
2881   prev = Qnil;
2882
2883   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2884                                  * sizeof (Lisp_Object));
2885
2886   len = 0;
2887   for (i = 0; i < search_regs.num_regs; i++)
2888     {
2889       int start = search_regs.start[i];
2890       if (start >= 0)
2891         {
2892           if (EQ (last_thing_searched, Qt)
2893               || ! NILP (integers))
2894             {
2895               XSETFASTINT (data[2 * i], start);
2896               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2897             }
2898           else if (BUFFERP (last_thing_searched))
2899             {
2900               data[2 * i] = Fmake_marker ();
2901               Fset_marker (data[2 * i],
2902                            make_number (start),
2903                            last_thing_searched);
2904               data[2 * i + 1] = Fmake_marker ();
2905               Fset_marker (data[2 * i + 1],
2906                            make_number (search_regs.end[i]),
2907                            last_thing_searched);
2908             }
2909           else
2910             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2911             abort ();
2912
2913           len = 2 * i + 2;
2914         }
2915       else
2916         data[2 * i] = data[2 * i + 1] = Qnil;
2917     }
2918
2919   if (BUFFERP (last_thing_searched) && !NILP (integers))
2920     {
2921       data[len] = last_thing_searched;
2922       len++;
2923     }
2924
2925   /* If REUSE is not usable, cons up the values and return them.  */
2926   if (! CONSP (reuse))
2927     return Flist (len, data);
2928
2929   /* If REUSE is a list, store as many value elements as will fit
2930      into the elements of REUSE.  */
2931   for (i = 0, tail = reuse; CONSP (tail);
2932        i++, tail = XCDR (tail))
2933     {
2934       if (i < len)
2935         XSETCAR (tail, data[i]);
2936       else
2937         XSETCAR (tail, Qnil);
2938       prev = tail;
2939     }
2940
2941   /* If we couldn't fit all value elements into REUSE,
2942      cons up the rest of them and add them to the end of REUSE.  */
2943   if (i < len)
2944     XSETCDR (prev, Flist (len - i, data + i));
2945
2946   return reuse;
2947 }
2948
2949 /* We used to have an internal use variant of `reseat' described as:
2950
2951       If RESEAT is `evaporate', put the markers back on the free list
2952       immediately.  No other references to the markers must exist in this
2953       case, so it is used only internally on the unwind stack and
2954       save-match-data from Lisp.
2955
2956    But it was ill-conceived: those supposedly-internal markers get exposed via
2957    the undo-list, so freeing them here is unsafe.  */
2958
2959 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2960        doc: /* Set internal data on last search match from elements of LIST.
2961 LIST should have been created by calling `match-data' previously.
2962
2963 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2964     (list, reseat)
2965      register Lisp_Object list, reseat;
2966 {
2967   register int i;
2968   register Lisp_Object marker;
2969
2970   if (running_asynch_code)
2971     save_search_regs ();
2972
2973   CHECK_LIST (list);
2974
2975   /* Unless we find a marker with a buffer or an explicit buffer
2976      in LIST, assume that this match data came from a string.  */
2977   last_thing_searched = Qt;
2978
2979   /* Allocate registers if they don't already exist.  */
2980   {
2981     int length = XFASTINT (Flength (list)) / 2;
2982
2983     if (length > search_regs.num_regs)
2984       {
2985         if (search_regs.num_regs == 0)
2986           {
2987             search_regs.start
2988               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2989             search_regs.end
2990               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2991           }
2992         else
2993           {
2994             search_regs.start
2995               = (regoff_t *) xrealloc (search_regs.start,
2996                                        length * sizeof (regoff_t));
2997             search_regs.end
2998               = (regoff_t *) xrealloc (search_regs.end,
2999                                        length * sizeof (regoff_t));
3000           }
3001
3002         for (i = search_regs.num_regs; i < length; i++)
3003           search_regs.start[i] = -1;
3004
3005         search_regs.num_regs = length;
3006       }
3007
3008     for (i = 0; CONSP (list); i++)
3009       {
3010         marker = XCAR (list);
3011         if (BUFFERP (marker))
3012           {
3013             last_thing_searched = marker;
3014             break;
3015           }
3016         if (i >= length)
3017           break;
3018         if (NILP (marker))
3019           {
3020             search_regs.start[i] = -1;
3021             list = XCDR (list);
3022           }
3023         else
3024           {
3025             int from;
3026             Lisp_Object m;
3027
3028             m = marker;
3029             if (MARKERP (marker))
3030               {
3031                 if (XMARKER (marker)->buffer == 0)
3032                   XSETFASTINT (marker, 0);
3033                 else
3034                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3035               }
3036
3037             CHECK_NUMBER_COERCE_MARKER (marker);
3038             from = XINT (marker);
3039
3040             if (!NILP (reseat) && MARKERP (m))
3041               {
3042                 unchain_marker (XMARKER (m));
3043                 XSETCAR (list, Qnil);
3044               }
3045
3046             if ((list = XCDR (list), !CONSP (list)))
3047               break;
3048
3049             m = marker = XCAR (list);
3050
3051             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3052               XSETFASTINT (marker, 0);
3053
3054             CHECK_NUMBER_COERCE_MARKER (marker);
3055             search_regs.start[i] = from;
3056             search_regs.end[i] = XINT (marker);
3057
3058             if (!NILP (reseat) && MARKERP (m))
3059               {
3060                 unchain_marker (XMARKER (m));
3061                 XSETCAR (list, Qnil);
3062               }
3063           }
3064         list = XCDR (list);
3065       }
3066
3067     for (; i < search_regs.num_regs; i++)
3068       search_regs.start[i] = -1;
3069   }
3070
3071   return Qnil;
3072 }
3073
3074 /* If non-zero the match data have been saved in saved_search_regs
3075    during the execution of a sentinel or filter. */
3076 static int search_regs_saved;
3077 static struct re_registers saved_search_regs;
3078 static Lisp_Object saved_last_thing_searched;
3079
3080 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3081    if asynchronous code (filter or sentinel) is running. */
3082 static void
3083 save_search_regs ()
3084 {
3085   if (!search_regs_saved)
3086     {
3087       saved_search_regs.num_regs = search_regs.num_regs;
3088       saved_search_regs.start = search_regs.start;
3089       saved_search_regs.end = search_regs.end;
3090       saved_last_thing_searched = last_thing_searched;
3091       last_thing_searched = Qnil;
3092       search_regs.num_regs = 0;
3093       search_regs.start = 0;
3094       search_regs.end = 0;
3095
3096       search_regs_saved = 1;
3097     }
3098 }
3099
3100 /* Called upon exit from filters and sentinels. */
3101 void
3102 restore_search_regs ()
3103 {
3104   if (search_regs_saved)
3105     {
3106       if (search_regs.num_regs > 0)
3107         {
3108           xfree (search_regs.start);
3109           xfree (search_regs.end);
3110         }
3111       search_regs.num_regs = saved_search_regs.num_regs;
3112       search_regs.start = saved_search_regs.start;
3113       search_regs.end = saved_search_regs.end;
3114       last_thing_searched = saved_last_thing_searched;
3115       saved_last_thing_searched = Qnil;
3116       search_regs_saved = 0;
3117     }
3118 }
3119
3120 static Lisp_Object
3121 unwind_set_match_data (list)
3122      Lisp_Object list;
3123 {
3124   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3125   return Fset_match_data (list, Qt);
3126 }
3127
3128 /* Called to unwind protect the match data.  */
3129 void
3130 record_unwind_save_match_data ()
3131 {
3132   record_unwind_protect (unwind_set_match_data,
3133                          Fmatch_data (Qnil, Qnil, Qnil));
3134 }
3135
3136 /* Quote a string to inactivate reg-expr chars */
3137
3138 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3139        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3140      (string)
3141      Lisp_Object string;
3142 {
3143   register unsigned char *in, *out, *end;
3144   register unsigned char *temp;
3145   int backslashes_added = 0;
3146
3147   CHECK_STRING (string);
3148
3149   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3150
3151   /* Now copy the data into the new string, inserting escapes. */
3152
3153   in = SDATA (string);
3154   end = in + SBYTES (string);
3155   out = temp;
3156
3157   for (; in != end; in++)
3158     {
3159       if (*in == '['
3160           || *in == '*' || *in == '.' || *in == '\\'
3161           || *in == '?' || *in == '+'
3162           || *in == '^' || *in == '$')
3163         *out++ = '\\', backslashes_added++;
3164       *out++ = *in;
3165     }
3166
3167   return make_specified_string (temp,
3168                                 SCHARS (string) + backslashes_added,
3169                                 out - temp,
3170                                 STRING_MULTIBYTE (string));
3171 }
3172 \f
3173 void
3174 syms_of_search ()
3175 {
3176   register int i;
3177
3178   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3179     {
3180       searchbufs[i].buf.allocated = 100;
3181       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3182       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3183       searchbufs[i].regexp = Qnil;
3184       searchbufs[i].whitespace_regexp = Qnil;
3185       searchbufs[i].syntax_table = Qnil;
3186       staticpro (&searchbufs[i].regexp);
3187       staticpro (&searchbufs[i].whitespace_regexp);
3188       staticpro (&searchbufs[i].syntax_table);
3189       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3190     }
3191   searchbuf_head = &searchbufs[0];
3192
3193   Qsearch_failed = intern ("search-failed");
3194   staticpro (&Qsearch_failed);
3195   Qinvalid_regexp = intern ("invalid-regexp");
3196   staticpro (&Qinvalid_regexp);
3197
3198   Fput (Qsearch_failed, Qerror_conditions,
3199         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3200   Fput (Qsearch_failed, Qerror_message,
3201         build_string ("Search failed"));
3202
3203   Fput (Qinvalid_regexp, Qerror_conditions,
3204         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3205   Fput (Qinvalid_regexp, Qerror_message,
3206         build_string ("Invalid regexp"));
3207
3208   last_thing_searched = Qnil;
3209   staticpro (&last_thing_searched);
3210
3211   saved_last_thing_searched = Qnil;
3212   staticpro (&saved_last_thing_searched);
3213
3214   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3215       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3216 Some commands use this for user-specified regexps.
3217 Spaces that occur inside character classes or repetition operators
3218 or other such regexp constructs are not replaced with this.
3219 A value of nil (which is the normal value) means treat spaces literally.  */);
3220   Vsearch_spaces_regexp = Qnil;
3221
3222   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3223       doc: /* Internal use only.
3224 If non-nil, the primitive searching and matching functions
3225 such as `looking-at', `string-match', `re-search-forward', etc.,
3226 do not set the match data.  The proper way to use this variable
3227 is to bind it with `let' around a small expression.  */);
3228   Vinhibit_changing_match_data = Qnil;
3229
3230   defsubr (&Slooking_at);
3231   defsubr (&Sposix_looking_at);
3232   defsubr (&Sstring_match);
3233   defsubr (&Sposix_string_match);
3234   defsubr (&Ssearch_forward);
3235   defsubr (&Ssearch_backward);
3236   defsubr (&Sword_search_forward);
3237   defsubr (&Sword_search_backward);
3238   defsubr (&Sre_search_forward);
3239   defsubr (&Sre_search_backward);
3240   defsubr (&Sposix_search_forward);
3241   defsubr (&Sposix_search_backward);
3242   defsubr (&Sreplace_match);
3243   defsubr (&Smatch_beginning);
3244   defsubr (&Smatch_end);
3245   defsubr (&Smatch_data);
3246   defsubr (&Sset_match_data);
3247   defsubr (&Sregexp_quote);
3248 }
3249
3250 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3251    (do not change this comment) */