src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21 Boston, MA 02110-1301, USA.  */
  22
  23
  24 #include <config.h>
  25 #include "lisp.h"
  26 #include "syntax.h"
  27 #include "category.h"
  28 #include "buffer.h"
  29 #include "character.h"
  30 #include "charset.h"
  31 #include "region-cache.h"
  32 #include "commands.h"
  33 #include "blockinput.h"
  34 #include "intervals.h"
  35
  36 #include <sys/types.h>
  37 #include "regex.h"
  38
  39 #define REGEXP_CACHE_SIZE 20
  40
  41 /* If the regexp is non-nil, then the buffer contains the compiled form
  42    of that regexp, suitable for searching.  */
  43 struct regexp_cache
  44 {
  45   struct regexp_cache *next;
  46   Lisp_Object regexp, whitespace_regexp;
  47   /* Syntax table for which the regexp applies.  We need this because
  48      of character classes.  If this is t, then the compiled pattern is valid
  49      for any syntax-table.  */
  50   Lisp_Object syntax_table;
  51   struct re_pattern_buffer buf;
  52   char fastmap[0400];
  53   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  54   char posix;
  55 };
  56
  57 /* The instances of that struct.  */
  58 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  59
  60 /* The head of the linked list; points to the most recently used buffer.  */
  61 struct regexp_cache *searchbuf_head;
  62
  63
  64 /* Every call to re_match, etc., must pass &search_regs as the regs
  65    argument unless you can show it is unnecessary (i.e., if re_match
  66    is certainly going to be called again before region-around-match
  67    can be called).
  68
  69    Since the registers are now dynamically allocated, we need to make
  70    sure not to refer to the Nth register before checking that it has
  71    been allocated by checking search_regs.num_regs.
  72
  73    The regex code keeps track of whether it has allocated the search
  74    buffer using bits in the re_pattern_buffer.  This means that whenever
  75    you compile a new pattern, it completely forgets whether it has
  76    allocated any registers, and will allocate new registers the next
  77    time you call a searching or matching function.  Therefore, we need
  78    to call re_set_registers after compiling a new pattern or after
  79    setting the match registers, so that the regex functions will be
  80    able to free or re-allocate it properly.  */
  81 static struct re_registers search_regs;
  82
  83 /* The buffer in which the last search was performed, or
  84    Qt if the last search was done in a string;
  85    Qnil if no searching has been done yet.  */
  86 static Lisp_Object last_thing_searched;
  87
  88 /* error condition signaled when regexp compile_pattern fails */
  89
  90 Lisp_Object Qinvalid_regexp;
  91
  92 /* Error condition used for failing searches */
  93 Lisp_Object Qsearch_failed;
  94
  95 Lisp_Object Vsearch_spaces_regexp;
  96
  97 /* If non-nil, the match data will not be changed during call to
  98    searching or matching functions.  This variable is for internal use
  99    only.  */
 100 Lisp_Object Vinhibit_changing_match_data;
 101
 102 static void set_search_regs ();
 103 static void save_search_regs ();
 104 static int simple_search ();
 105 static int boyer_moore ();
 106 static int search_buffer ();
 107 static void matcher_overflow () NO_RETURN;
 108
 109 static void
 110 matcher_overflow ()
 111 {
 112   error ("Stack overflow in regexp matcher");
 113 }
 114
 115 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 116    PATTERN is the pattern to compile.
 117    CP is the place to put the result.
 118    TRANSLATE is a translation table for ignoring case, or nil for none.
 119    REGP is the structure that says where to store the "register"
 120    values that will result from matching this pattern.
 121    If it is 0, we should compile the pattern not to record any
 122    subexpression bounds.
 123    POSIX is nonzero if we want full backtracking (POSIX style)
 124    for this pattern.  0 means backtrack only enough to get a valid match.
 125
 126    The behavior also depends on Vsearch_spaces_regexp.  */
 127
 128 static void
 129 compile_pattern_1 (cp, pattern, translate, regp, posix)
 130      struct regexp_cache *cp;
 131      Lisp_Object pattern;
 132      Lisp_Object translate;
 133      struct re_registers *regp;
 134      int posix;
 135 {
 136   char *val;
 137   reg_syntax_t old;
 138
 139   cp->regexp = Qnil;
 140   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 141   cp->posix = posix;
 142   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 143   cp->buf.charset_unibyte = charset_unibyte;
 144   cp->whitespace_regexp = Vsearch_spaces_regexp;
 145   /* rms: I think BLOCK_INPUT is not needed here any more,
 146      because regex.c defines malloc to call xmalloc.
 147      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 148      So let's turn it off.  */
 149   /*  BLOCK_INPUT;  */
 150   old = re_set_syntax (RE_SYNTAX_EMACS
 151                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 152   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 153                             : SDATA (Vsearch_spaces_regexp));
 154
 155   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 156                                      SBYTES (pattern), &cp->buf);
 157
 158   /* If the compiled pattern hard codes some of the contents of the
 159      syntax-table, it can only be reused with *this* syntax table.  */
 160   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 161
 162   re_set_whitespace_regexp (NULL);
 163
 164   re_set_syntax (old);
 165   /* UNBLOCK_INPUT;  */
 166   if (val)
 167     xsignal1 (Qinvalid_regexp, build_string (val));
 168
 169   cp->regexp = Fcopy_sequence (pattern);
 170 }
 171
 172 /* Shrink each compiled regexp buffer in the cache
 173    to the size actually used right now.
 174    This is called from garbage collection.  */
 175
 176 void
 177 shrink_regexp_cache ()
 178 {
 179   struct regexp_cache *cp;
 180
 181   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 182     {
 183       cp->buf.allocated = cp->buf.used;
 184       cp->buf.buffer
 185         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 186     }
 187 }
 188
 189 /* Clear the regexp cache w.r.t. a particular syntax table,
 190    because it was changed.
 191    There is no danger of memory leak here because re_compile_pattern
 192    automagically manages the memory in each re_pattern_buffer struct,
 193    based on its `allocated' and `buffer' values.  */
 194 void
 195 clear_regexp_cache ()
 196 {
 197   int i;
 198
 199   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 200     /* It's tempting to compare with the syntax-table we've actually changd,
 201        but it's not sufficient because char-table inheritance mewans that
 202        modifying one syntax-table can change others at the same time.  */
 203     if (!EQ (searchbufs[i].syntax_table, Qt))
 204       searchbufs[i].regexp = Qnil;
 205 }
 206
 207 /* Compile a regexp if necessary, but first check to see if there's one in
 208    the cache.
 209    PATTERN is the pattern to compile.
 210    TRANSLATE is a translation table for ignoring case, or nil for none.
 211    REGP is the structure that says where to store the "register"
 212    values that will result from matching this pattern.
 213    If it is 0, we should compile the pattern not to record any
 214    subexpression bounds.
 215    POSIX is nonzero if we want full backtracking (POSIX style)
 216    for this pattern.  0 means backtrack only enough to get a valid match.  */
 217
 218 struct re_pattern_buffer *
 219 compile_pattern (pattern, regp, translate, posix, multibyte)
 220      Lisp_Object pattern;
 221      struct re_registers *regp;
 222      Lisp_Object translate;
 223      int posix, multibyte;
 224 {
 225   struct regexp_cache *cp, **cpp;
 226
 227   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 228     {
 229       cp = *cpp;
 230       /* Entries are initialized to nil, and may be set to nil by
 231          compile_pattern_1 if the pattern isn't valid.  Don't apply
 232          string accessors in those cases.  However, compile_pattern_1
 233          is only applied to the cache entry we pick here to reuse.  So
 234          nil should never appear before a non-nil entry.  */
 235       if (NILP (cp->regexp))
 236         goto compile_it;
 237       if (SCHARS (cp->regexp) == SCHARS (pattern)
 238           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 239           && !NILP (Fstring_equal (cp->regexp, pattern))
 240           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 241           && cp->posix == posix
 242           && (EQ (cp->syntax_table, Qt)
 243               || EQ (cp->syntax_table, current_buffer->syntax_table))
 244           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 245           && cp->buf.charset_unibyte == charset_unibyte)
 246         break;
 247
 248       /* If we're at the end of the cache, compile into the nil cell
 249          we found, or the last (least recently used) cell with a
 250          string value.  */
 251       if (cp->next == 0)
 252         {
 253         compile_it:
 254           compile_pattern_1 (cp, pattern, translate, regp, posix);
 255           break;
 256         }
 257     }
 258
 259   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 260      either because we found it in the cache or because we just compiled it.
 261      Move it to the front of the queue to mark it as most recently used.  */
 262   *cpp = cp->next;
 263   cp->next = searchbuf_head;
 264   searchbuf_head = cp;
 265
 266   /* Advise the searching functions about the space we have allocated
 267      for register data.  */
 268   if (regp)
 269     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 270
 271   /* The compiled pattern can be used both for mulitbyte and unibyte
 272      target.  But, we have to tell which the pattern is used for. */
 273   cp->buf.target_multibyte = multibyte;
 274
 275   return &cp->buf;
 276 }
 277
 278 \f
 279 static Lisp_Object
 280 looking_at_1 (string, posix)
 281      Lisp_Object string;
 282      int posix;
 283 {
 284   Lisp_Object val;
 285   unsigned char *p1, *p2;
 286   int s1, s2;
 287   register int i;
 288   struct re_pattern_buffer *bufp;
 289
 290   if (running_asynch_code)
 291     save_search_regs ();
 292
 293   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 294   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 295     = current_buffer->case_eqv_table;
 296
 297   CHECK_STRING (string);
 298   bufp = compile_pattern (string,
 299                           (NILP (Vinhibit_changing_match_data)
 300                            ? &search_regs : NULL),
 301                           (!NILP (current_buffer->case_fold_search)
 302                            ? current_buffer->case_canon_table : Qnil),
 303                           posix,
 304                           !NILP (current_buffer->enable_multibyte_characters));
 305
 306   immediate_quit = 1;
 307   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 308
 309   /* Get pointers and sizes of the two strings
 310      that make up the visible portion of the buffer. */
 311
 312   p1 = BEGV_ADDR;
 313   s1 = GPT_BYTE - BEGV_BYTE;
 314   p2 = GAP_END_ADDR;
 315   s2 = ZV_BYTE - GPT_BYTE;
 316   if (s1 < 0)
 317     {
 318       p2 = p1;
 319       s2 = ZV_BYTE - BEGV_BYTE;
 320       s1 = 0;
 321     }
 322   if (s2 < 0)
 323     {
 324       s1 = ZV_BYTE - BEGV_BYTE;
 325       s2 = 0;
 326     }
 327
 328   re_match_object = Qnil;
 329
 330   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 331                   PT_BYTE - BEGV_BYTE,
 332                   (NILP (Vinhibit_changing_match_data)
 333                    ? &search_regs : NULL),
 334                   ZV_BYTE - BEGV_BYTE);
 335   immediate_quit = 0;
 336
 337   if (i == -2)
 338     matcher_overflow ();
 339
 340   val = (0 <= i ? Qt : Qnil);
 341   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 342     for (i = 0; i < search_regs.num_regs; i++)
 343       if (search_regs.start[i] >= 0)
 344         {
 345           search_regs.start[i]
 346             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 347           search_regs.end[i]
 348             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 349         }
 350
 351   /* Set last_thing_searched only when match data is changed.  */
 352   if (NILP (Vinhibit_changing_match_data))
 353     XSETBUFFER (last_thing_searched, current_buffer);
 354
 355   return val;
 356 }
 357
 358 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 359        doc: /* Return t if text after point matches regular expression REGEXP.
 360 This function modifies the match data that `match-beginning',
 361 `match-end' and `match-data' access; save and restore the match
 362 data if you want to preserve them.  */)
 363      (regexp)
 364      Lisp_Object regexp;
 365 {
 366   return looking_at_1 (regexp, 0);
 367 }
 368
 369 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 370        doc: /* Return t if text after point matches regular expression REGEXP.
 371 Find the longest match, in accord with Posix regular expression rules.
 372 This function modifies the match data that `match-beginning',
 373 `match-end' and `match-data' access; save and restore the match
 374 data if you want to preserve them.  */)
 375      (regexp)
 376      Lisp_Object regexp;
 377 {
 378   return looking_at_1 (regexp, 1);
 379 }
 380 \f
 381 static Lisp_Object
 382 string_match_1 (regexp, string, start, posix)
 383      Lisp_Object regexp, string, start;
 384      int posix;
 385 {
 386   int val;
 387   struct re_pattern_buffer *bufp;
 388   int pos, pos_byte;
 389   int i;
 390
 391   if (running_asynch_code)
 392     save_search_regs ();
 393
 394   CHECK_STRING (regexp);
 395   CHECK_STRING (string);
 396
 397   if (NILP (start))
 398     pos = 0, pos_byte = 0;
 399   else
 400     {
 401       int len = SCHARS (string);
 402
 403       CHECK_NUMBER (start);
 404       pos = XINT (start);
 405       if (pos < 0 && -pos <= len)
 406         pos = len + pos;
 407       else if (0 > pos || pos > len)
 408         args_out_of_range (string, start);
 409       pos_byte = string_char_to_byte (string, pos);
 410     }
 411
 412   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 413   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 414     = current_buffer->case_eqv_table;
 415
 416   bufp = compile_pattern (regexp,
 417                           (NILP (Vinhibit_changing_match_data)
 418                            ? &search_regs : NULL),
 419                           (!NILP (current_buffer->case_fold_search)
 420                            ? current_buffer->case_canon_table : Qnil),
 421                           posix,
 422                           STRING_MULTIBYTE (string));
 423   immediate_quit = 1;
 424   re_match_object = string;
 425
 426   val = re_search (bufp, (char *) SDATA (string),
 427                    SBYTES (string), pos_byte,
 428                    SBYTES (string) - pos_byte,
 429                    (NILP (Vinhibit_changing_match_data)
 430                     ? &search_regs : NULL));
 431   immediate_quit = 0;
 432
 433   /* Set last_thing_searched only when match data is changed.  */
 434   if (NILP (Vinhibit_changing_match_data))
 435     last_thing_searched = Qt;
 436
 437   if (val == -2)
 438     matcher_overflow ();
 439   if (val < 0) return Qnil;
 440
 441   if (NILP (Vinhibit_changing_match_data))
 442     for (i = 0; i < search_regs.num_regs; i++)
 443       if (search_regs.start[i] >= 0)
 444         {
 445           search_regs.start[i]
 446             = string_byte_to_char (string, search_regs.start[i]);
 447           search_regs.end[i]
 448             = string_byte_to_char (string, search_regs.end[i]);
 449         }
 450
 451   return make_number (string_byte_to_char (string, val));
 452 }
 453
 454 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 455        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 456 Matching ignores case if `case-fold-search' is non-nil.
 457 If third arg START is non-nil, start search at that index in STRING.
 458 For index of first char beyond the match, do (match-end 0).
 459 `match-end' and `match-beginning' also give indices of substrings
 460 matched by parenthesis constructs in the pattern.
 461
 462 You can use the function `match-string' to extract the substrings
 463 matched by the parenthesis constructions in REGEXP. */)
 464      (regexp, string, start)
 465      Lisp_Object regexp, string, start;
 466 {
 467   return string_match_1 (regexp, string, start, 0);
 468 }
 469
 470 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 471        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 472 Find the longest match, in accord with Posix regular expression rules.
 473 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 474 If third arg START is non-nil, start search at that index in STRING.
 475 For index of first char beyond the match, do (match-end 0).
 476 `match-end' and `match-beginning' also give indices of substrings
 477 matched by parenthesis constructs in the pattern.  */)
 478      (regexp, string, start)
 479      Lisp_Object regexp, string, start;
 480 {
 481   return string_match_1 (regexp, string, start, 1);
 482 }
 483
 484 /* Match REGEXP against STRING, searching all of STRING,
 485    and return the index of the match, or negative on failure.
 486    This does not clobber the match data.  */
 487
 488 int
 489 fast_string_match (regexp, string)
 490      Lisp_Object regexp, string;
 491 {
 492   int val;
 493   struct re_pattern_buffer *bufp;
 494
 495   bufp = compile_pattern (regexp, 0, Qnil,
 496                           0, STRING_MULTIBYTE (string));
 497   immediate_quit = 1;
 498   re_match_object = string;
 499
 500   val = re_search (bufp, (char *) SDATA (string),
 501                    SBYTES (string), 0,
 502                    SBYTES (string), 0);
 503   immediate_quit = 0;
 504   return val;
 505 }
 506
 507 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 508    and return the index of the match, or negative on failure.
 509    This does not clobber the match data.
 510    We assume that STRING contains single-byte characters.  */
 511
 512 extern Lisp_Object Vascii_downcase_table;
 513
 514 int
 515 fast_c_string_match_ignore_case (regexp, string)
 516      Lisp_Object regexp;
 517      const char *string;
 518 {
 519   int val;
 520   struct re_pattern_buffer *bufp;
 521   int len = strlen (string);
 522
 523   regexp = string_make_unibyte (regexp);
 524   re_match_object = Qt;
 525   bufp = compile_pattern (regexp, 0,
 526                           Vascii_canon_table, 0,
 527                           0);
 528   immediate_quit = 1;
 529   val = re_search (bufp, string, len, 0, len, 0);
 530   immediate_quit = 0;
 531   return val;
 532 }
 533
 534 /* Like fast_string_match but ignore case.  */
 535
 536 int
 537 fast_string_match_ignore_case (regexp, string)
 538      Lisp_Object regexp, string;
 539 {
 540   int val;
 541   struct re_pattern_buffer *bufp;
 542
 543   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 544                           0, STRING_MULTIBYTE (string));
 545   immediate_quit = 1;
 546   re_match_object = string;
 547
 548   val = re_search (bufp, (char *) SDATA (string),
 549                    SBYTES (string), 0,
 550                    SBYTES (string), 0);
 551   immediate_quit = 0;
 552   return val;
 553 }
 554 \f
 555 /* The newline cache: remembering which sections of text have no newlines.  */
 556
 557 /* If the user has requested newline caching, make sure it's on.
 558    Otherwise, make sure it's off.
 559    This is our cheezy way of associating an action with the change of
 560    state of a buffer-local variable.  */
 561 static void
 562 newline_cache_on_off (buf)
 563      struct buffer *buf;
 564 {
 565   if (NILP (buf->cache_long_line_scans))
 566     {
 567       /* It should be off.  */
 568       if (buf->newline_cache)
 569         {
 570           free_region_cache (buf->newline_cache);
 571           buf->newline_cache = 0;
 572         }
 573     }
 574   else
 575     {
 576       /* It should be on.  */
 577       if (buf->newline_cache == 0)
 578         buf->newline_cache = new_region_cache ();
 579     }
 580 }
 581
 582 \f
 583 /* Search for COUNT instances of the character TARGET between START and END.
 584
 585    If COUNT is positive, search forwards; END must be >= START.
 586    If COUNT is negative, search backwards for the -COUNTth instance;
 587       END must be <= START.
 588    If COUNT is zero, do anything you please; run rogue, for all I care.
 589
 590    If END is zero, use BEGV or ZV instead, as appropriate for the
 591    direction indicated by COUNT.
 592
 593    If we find COUNT instances, set *SHORTAGE to zero, and return the
 594    position past the COUNTth match.  Note that for reverse motion
 595    this is not the same as the usual convention for Emacs motion commands.
 596
 597    If we don't find COUNT instances before reaching END, set *SHORTAGE
 598    to the number of TARGETs left unfound, and return END.
 599
 600    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 601    except when inside redisplay.  */
 602
 603 int
 604 scan_buffer (target, start, end, count, shortage, allow_quit)
 605      register int target;
 606      int start, end;
 607      int count;
 608      int *shortage;
 609      int allow_quit;
 610 {
 611   struct region_cache *newline_cache;
 612   int direction;
 613
 614   if (count > 0)
 615     {
 616       direction = 1;
 617       if (! end) end = ZV;
 618     }
 619   else
 620     {
 621       direction = -1;
 622       if (! end) end = BEGV;
 623     }
 624
 625   newline_cache_on_off (current_buffer);
 626   newline_cache = current_buffer->newline_cache;
 627
 628   if (shortage != 0)
 629     *shortage = 0;
 630
 631   immediate_quit = allow_quit;
 632
 633   if (count > 0)
 634     while (start != end)
 635       {
 636         /* Our innermost scanning loop is very simple; it doesn't know
 637            about gaps, buffer ends, or the newline cache.  ceiling is
 638            the position of the last character before the next such
 639            obstacle --- the last character the dumb search loop should
 640            examine.  */
 641         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 642         int start_byte = CHAR_TO_BYTE (start);
 643         int tem;
 644
 645         /* If we're looking for a newline, consult the newline cache
 646            to see where we can avoid some scanning.  */
 647         if (target == '\n' && newline_cache)
 648           {
 649             int next_change;
 650             immediate_quit = 0;
 651             while (region_cache_forward
 652                    (current_buffer, newline_cache, start_byte, &next_change))
 653               start_byte = next_change;
 654             immediate_quit = allow_quit;
 655
 656             /* START should never be after END.  */
 657             if (start_byte > ceiling_byte)
 658               start_byte = ceiling_byte;
 659
 660             /* Now the text after start is an unknown region, and
 661                next_change is the position of the next known region. */
 662             ceiling_byte = min (next_change - 1, ceiling_byte);
 663           }
 664
 665         /* The dumb loop can only scan text stored in contiguous
 666            bytes. BUFFER_CEILING_OF returns the last character
 667            position that is contiguous, so the ceiling is the
 668            position after that.  */
 669         tem = BUFFER_CEILING_OF (start_byte);
 670         ceiling_byte = min (tem, ceiling_byte);
 671
 672         {
 673           /* The termination address of the dumb loop.  */
 674           register unsigned char *ceiling_addr
 675             = BYTE_POS_ADDR (ceiling_byte) + 1;
 676           register unsigned char *cursor
 677             = BYTE_POS_ADDR (start_byte);
 678           unsigned char *base = cursor;
 679
 680           while (cursor < ceiling_addr)
 681             {
 682               unsigned char *scan_start = cursor;
 683
 684               /* The dumb loop.  */
 685               while (*cursor != target && ++cursor < ceiling_addr)
 686                 ;
 687
 688               /* If we're looking for newlines, cache the fact that
 689                  the region from start to cursor is free of them. */
 690               if (target == '\n' && newline_cache)
 691                 know_region_cache (current_buffer, newline_cache,
 692                                    start_byte + scan_start - base,
 693                                    start_byte + cursor - base);
 694
 695               /* Did we find the target character?  */
 696               if (cursor < ceiling_addr)
 697                 {
 698                   if (--count == 0)
 699                     {
 700                       immediate_quit = 0;
 701                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 702                     }
 703                   cursor++;
 704                 }
 705             }
 706
 707           start = BYTE_TO_CHAR (start_byte + cursor - base);
 708         }
 709       }
 710   else
 711     while (start > end)
 712       {
 713         /* The last character to check before the next obstacle.  */
 714         int ceiling_byte = CHAR_TO_BYTE (end);
 715         int start_byte = CHAR_TO_BYTE (start);
 716         int tem;
 717
 718         /* Consult the newline cache, if appropriate.  */
 719         if (target == '\n' && newline_cache)
 720           {
 721             int next_change;
 722             immediate_quit = 0;
 723             while (region_cache_backward
 724                    (current_buffer, newline_cache, start_byte, &next_change))
 725               start_byte = next_change;
 726             immediate_quit = allow_quit;
 727
 728             /* Start should never be at or before end.  */
 729             if (start_byte <= ceiling_byte)
 730               start_byte = ceiling_byte + 1;
 731
 732             /* Now the text before start is an unknown region, and
 733                next_change is the position of the next known region. */
 734             ceiling_byte = max (next_change, ceiling_byte);
 735           }
 736
 737         /* Stop scanning before the gap.  */
 738         tem = BUFFER_FLOOR_OF (start_byte - 1);
 739         ceiling_byte = max (tem, ceiling_byte);
 740
 741         {
 742           /* The termination address of the dumb loop.  */
 743           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 744           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 745           unsigned char *base = cursor;
 746
 747           while (cursor >= ceiling_addr)
 748             {
 749               unsigned char *scan_start = cursor;
 750
 751               while (*cursor != target && --cursor >= ceiling_addr)
 752                 ;
 753
 754               /* If we're looking for newlines, cache the fact that
 755                  the region from after the cursor to start is free of them.  */
 756               if (target == '\n' && newline_cache)
 757                 know_region_cache (current_buffer, newline_cache,
 758                                    start_byte + cursor - base,
 759                                    start_byte + scan_start - base);
 760
 761               /* Did we find the target character?  */
 762               if (cursor >= ceiling_addr)
 763                 {
 764                   if (++count >= 0)
 765                     {
 766                       immediate_quit = 0;
 767                       return BYTE_TO_CHAR (start_byte + cursor - base);
 768                     }
 769                   cursor--;
 770                 }
 771             }
 772
 773           start = BYTE_TO_CHAR (start_byte + cursor - base);
 774         }
 775       }
 776
 777   immediate_quit = 0;
 778   if (shortage != 0)
 779     *shortage = count * direction;
 780   return start;
 781 }
 782 \f
 783 /* Search for COUNT instances of a line boundary, which means either a
 784    newline or (if selective display enabled) a carriage return.
 785    Start at START.  If COUNT is negative, search backwards.
 786
 787    We report the resulting position by calling TEMP_SET_PT_BOTH.
 788
 789    If we find COUNT instances. we position after (always after,
 790    even if scanning backwards) the COUNTth match, and return 0.
 791
 792    If we don't find COUNT instances before reaching the end of the
 793    buffer (or the beginning, if scanning backwards), we return
 794    the number of line boundaries left unfound, and position at
 795    the limit we bumped up against.
 796
 797    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 798    except in special cases.  */
 799
 800 int
 801 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 802      int start, start_byte;
 803      int limit, limit_byte;
 804      register int count;
 805      int allow_quit;
 806 {
 807   int direction = ((count > 0) ? 1 : -1);
 808
 809   register unsigned char *cursor;
 810   unsigned char *base;
 811
 812   register int ceiling;
 813   register unsigned char *ceiling_addr;
 814
 815   int old_immediate_quit = immediate_quit;
 816
 817   /* The code that follows is like scan_buffer
 818      but checks for either newline or carriage return.  */
 819
 820   if (allow_quit)
 821     immediate_quit++;
 822
 823   start_byte = CHAR_TO_BYTE (start);
 824
 825   if (count > 0)
 826     {
 827       while (start_byte < limit_byte)
 828         {
 829           ceiling =  BUFFER_CEILING_OF (start_byte);
 830           ceiling = min (limit_byte - 1, ceiling);
 831           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 832           base = (cursor = BYTE_POS_ADDR (start_byte));
 833           while (1)
 834             {
 835               while (*cursor != '\n' && ++cursor != ceiling_addr)
 836                 ;
 837
 838               if (cursor != ceiling_addr)
 839                 {
 840                   if (--count == 0)
 841                     {
 842                       immediate_quit = old_immediate_quit;
 843                       start_byte = start_byte + cursor - base + 1;
 844                       start = BYTE_TO_CHAR (start_byte);
 845                       TEMP_SET_PT_BOTH (start, start_byte);
 846                       return 0;
 847                     }
 848                   else
 849                     if (++cursor == ceiling_addr)
 850                       break;
 851                 }
 852               else
 853                 break;
 854             }
 855           start_byte += cursor - base;
 856         }
 857     }
 858   else
 859     {
 860       while (start_byte > limit_byte)
 861         {
 862           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 863           ceiling = max (limit_byte, ceiling);
 864           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 865           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 866           while (1)
 867             {
 868               while (--cursor != ceiling_addr && *cursor != '\n')
 869                 ;
 870
 871               if (cursor != ceiling_addr)
 872                 {
 873                   if (++count == 0)
 874                     {
 875                       immediate_quit = old_immediate_quit;
 876                       /* Return the position AFTER the match we found.  */
 877                       start_byte = start_byte + cursor - base + 1;
 878                       start = BYTE_TO_CHAR (start_byte);
 879                       TEMP_SET_PT_BOTH (start, start_byte);
 880                       return 0;
 881                     }
 882                 }
 883               else
 884                 break;
 885             }
 886           /* Here we add 1 to compensate for the last decrement
 887              of CURSOR, which took it past the valid range.  */
 888           start_byte += cursor - base + 1;
 889         }
 890     }
 891
 892   TEMP_SET_PT_BOTH (limit, limit_byte);
 893   immediate_quit = old_immediate_quit;
 894
 895   return count * direction;
 896 }
 897
 898 int
 899 find_next_newline_no_quit (from, cnt)
 900      register int from, cnt;
 901 {
 902   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 903 }
 904
 905 /* Like find_next_newline, but returns position before the newline,
 906    not after, and only search up to TO.  This isn't just
 907    find_next_newline (...)-1, because you might hit TO.  */
 908
 909 int
 910 find_before_next_newline (from, to, cnt)
 911      int from, to, cnt;
 912 {
 913   int shortage;
 914   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 915
 916   if (shortage == 0)
 917     pos--;
 918
 919   return pos;
 920 }
 921 \f
 922 /* Subroutines of Lisp buffer search functions. */
 923
 924 static Lisp_Object
 925 search_command (string, bound, noerror, count, direction, RE, posix)
 926      Lisp_Object string, bound, noerror, count;
 927      int direction;
 928      int RE;
 929      int posix;
 930 {
 931   register int np;
 932   int lim, lim_byte;
 933   int n = direction;
 934
 935   if (!NILP (count))
 936     {
 937       CHECK_NUMBER (count);
 938       n *= XINT (count);
 939     }
 940
 941   CHECK_STRING (string);
 942   if (NILP (bound))
 943     {
 944       if (n > 0)
 945         lim = ZV, lim_byte = ZV_BYTE;
 946       else
 947         lim = BEGV, lim_byte = BEGV_BYTE;
 948     }
 949   else
 950     {
 951       CHECK_NUMBER_COERCE_MARKER (bound);
 952       lim = XINT (bound);
 953       if (n > 0 ? lim < PT : lim > PT)
 954         error ("Invalid search bound (wrong side of point)");
 955       if (lim > ZV)
 956         lim = ZV, lim_byte = ZV_BYTE;
 957       else if (lim < BEGV)
 958         lim = BEGV, lim_byte = BEGV_BYTE;
 959       else
 960         lim_byte = CHAR_TO_BYTE (lim);
 961     }
 962
 963   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 964   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 965     = current_buffer->case_eqv_table;
 966
 967   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 968                       (!NILP (current_buffer->case_fold_search)
 969                        ? current_buffer->case_canon_table
 970                        : Qnil),
 971                       (!NILP (current_buffer->case_fold_search)
 972                        ? current_buffer->case_eqv_table
 973                        : Qnil),
 974                       posix);
 975   if (np <= 0)
 976     {
 977       if (NILP (noerror))
 978         xsignal1 (Qsearch_failed, string);
 979
 980       if (!EQ (noerror, Qt))
 981         {
 982           if (lim < BEGV || lim > ZV)
 983             abort ();
 984           SET_PT_BOTH (lim, lim_byte);
 985           return Qnil;
 986 #if 0 /* This would be clean, but maybe programs depend on
 987          a value of nil here.  */
 988           np = lim;
 989 #endif
 990         }
 991       else
 992         return Qnil;
 993     }
 994
 995   if (np < BEGV || np > ZV)
 996     abort ();
 997
 998   SET_PT (np);
 999
1000   return make_number (np);
1001 }
1002 \f
1003 /* Return 1 if REGEXP it matches just one constant string.  */
1004
1005 static int
1006 trivial_regexp_p (regexp)
1007      Lisp_Object regexp;
1008 {
1009   int len = SBYTES (regexp);
1010   unsigned char *s = SDATA (regexp);
1011   while (--len >= 0)
1012     {
1013       switch (*s++)
1014         {
1015         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1016           return 0;
1017         case '\\':
1018           if (--len < 0)
1019             return 0;
1020           switch (*s++)
1021             {
1022             case '|': case '(': case ')': case '`': case '\'': case 'b':
1023             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1024             case 'S': case '=': case '{': case '}': case '_':
1025             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1026             case '1': case '2': case '3': case '4': case '5':
1027             case '6': case '7': case '8': case '9':
1028               return 0;
1029             }
1030         }
1031     }
1032   return 1;
1033 }
1034
1035 /* Search for the n'th occurrence of STRING in the current buffer,
1036    starting at position POS and stopping at position LIM,
1037    treating STRING as a literal string if RE is false or as
1038    a regular expression if RE is true.
1039
1040    If N is positive, searching is forward and LIM must be greater than POS.
1041    If N is negative, searching is backward and LIM must be less than POS.
1042
1043    Returns -x if x occurrences remain to be found (x > 0),
1044    or else the position at the beginning of the Nth occurrence
1045    (if searching backward) or the end (if searching forward).
1046
1047    POSIX is nonzero if we want full backtracking (POSIX style)
1048    for this pattern.  0 means backtrack only enough to get a valid match.  */
1049
1050 #define TRANSLATE(out, trt, d)                  \
1051 do                                              \
1052   {                                             \
1053     if (! NILP (trt))                           \
1054       {                                         \
1055         Lisp_Object temp;                       \
1056         temp = Faref (trt, make_number (d));    \
1057         if (INTEGERP (temp))                    \
1058           out = XINT (temp);                    \
1059         else                                    \
1060           out = d;                              \
1061       }                                         \
1062     else                                        \
1063       out = d;                                  \
1064   }                                             \
1065 while (0)
1066
1067 /* Only used in search_buffer, to record the end position of the match
1068    when searching regexps and SEARCH_REGS should not be changed
1069    (i.e. Vinhibit_changing_match_data is non-nil).  */
1070 static struct re_registers search_regs_1;
1071
1072 static int
1073 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1074                RE, trt, inverse_trt, posix)
1075      Lisp_Object string;
1076      int pos;
1077      int pos_byte;
1078      int lim;
1079      int lim_byte;
1080      int n;
1081      int RE;
1082      Lisp_Object trt;
1083      Lisp_Object inverse_trt;
1084      int posix;
1085 {
1086   int len = SCHARS (string);
1087   int len_byte = SBYTES (string);
1088   register int i;
1089
1090   if (running_asynch_code)
1091     save_search_regs ();
1092
1093   /* Searching 0 times means don't move.  */
1094   /* Null string is found at starting position.  */
1095   if (len == 0 || n == 0)
1096     {
1097       set_search_regs (pos_byte, 0);
1098       return pos;
1099     }
1100
1101   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1102     {
1103       unsigned char *p1, *p2;
1104       int s1, s2;
1105       struct re_pattern_buffer *bufp;
1106
1107       bufp = compile_pattern (string,
1108                               (NILP (Vinhibit_changing_match_data)
1109                                ? &search_regs : &search_regs_1),
1110                               trt, posix,
1111                               !NILP (current_buffer->enable_multibyte_characters));
1112
1113       immediate_quit = 1;       /* Quit immediately if user types ^G,
1114                                    because letting this function finish
1115                                    can take too long. */
1116       QUIT;                     /* Do a pending quit right away,
1117                                    to avoid paradoxical behavior */
1118       /* Get pointers and sizes of the two strings
1119          that make up the visible portion of the buffer. */
1120
1121       p1 = BEGV_ADDR;
1122       s1 = GPT_BYTE - BEGV_BYTE;
1123       p2 = GAP_END_ADDR;
1124       s2 = ZV_BYTE - GPT_BYTE;
1125       if (s1 < 0)
1126         {
1127           p2 = p1;
1128           s2 = ZV_BYTE - BEGV_BYTE;
1129           s1 = 0;
1130         }
1131       if (s2 < 0)
1132         {
1133           s1 = ZV_BYTE - BEGV_BYTE;
1134           s2 = 0;
1135         }
1136       re_match_object = Qnil;
1137
1138       while (n < 0)
1139         {
1140           int val;
1141           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1142                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1143                              (NILP (Vinhibit_changing_match_data)
1144                               ? &search_regs : &search_regs_1),
1145                              /* Don't allow match past current point */
1146                              pos_byte - BEGV_BYTE);
1147           if (val == -2)
1148             {
1149               matcher_overflow ();
1150             }
1151           if (val >= 0)
1152             {
1153               if (NILP (Vinhibit_changing_match_data))
1154                 {
1155                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1156                   for (i = 0; i < search_regs.num_regs; i++)
1157                     if (search_regs.start[i] >= 0)
1158                       {
1159                         search_regs.start[i]
1160                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1161                         search_regs.end[i]
1162                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1163                       }
1164                   XSETBUFFER (last_thing_searched, current_buffer);
1165                   /* Set pos to the new position. */
1166                   pos = search_regs.start[0];
1167                 }
1168               else
1169                 {
1170                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1171                   /* Set pos to the new position.  */
1172                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1173                 }
1174             }
1175           else
1176             {
1177               immediate_quit = 0;
1178               return (n);
1179             }
1180           n++;
1181         }
1182       while (n > 0)
1183         {
1184           int val;
1185           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1186                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1187                              (NILP (Vinhibit_changing_match_data)
1188                               ? &search_regs : &search_regs_1),
1189                              lim_byte - BEGV_BYTE);
1190           if (val == -2)
1191             {
1192               matcher_overflow ();
1193             }
1194           if (val >= 0)
1195             {
1196               if (NILP (Vinhibit_changing_match_data))
1197                 {
1198                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1199                   for (i = 0; i < search_regs.num_regs; i++)
1200                     if (search_regs.start[i] >= 0)
1201                       {
1202                         search_regs.start[i]
1203                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1204                         search_regs.end[i]
1205                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1206                       }
1207                   XSETBUFFER (last_thing_searched, current_buffer);
1208                   pos = search_regs.end[0];
1209                 }
1210               else
1211                 {
1212                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1213                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1214                 }
1215             }
1216           else
1217             {
1218               immediate_quit = 0;
1219               return (0 - n);
1220             }
1221           n--;
1222         }
1223       immediate_quit = 0;
1224       return (pos);
1225     }
1226   else                          /* non-RE case */
1227     {
1228       unsigned char *raw_pattern, *pat;
1229       int raw_pattern_size;
1230       int raw_pattern_size_byte;
1231       unsigned char *patbuf;
1232       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1233       unsigned char *base_pat;
1234       /* Set to positive if we find a non-ASCII char that need
1235          translation.  Otherwise set to zero later.  */
1236       int char_base = -1;
1237       int boyer_moore_ok = 1;
1238
1239       /* MULTIBYTE says whether the text to be searched is multibyte.
1240          We must convert PATTERN to match that, or we will not really
1241          find things right.  */
1242
1243       if (multibyte == STRING_MULTIBYTE (string))
1244         {
1245           raw_pattern = (unsigned char *) SDATA (string);
1246           raw_pattern_size = SCHARS (string);
1247           raw_pattern_size_byte = SBYTES (string);
1248         }
1249       else if (multibyte)
1250         {
1251           raw_pattern_size = SCHARS (string);
1252           raw_pattern_size_byte
1253             = count_size_as_multibyte (SDATA (string),
1254                                        raw_pattern_size);
1255           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1256           copy_text (SDATA (string), raw_pattern,
1257                      SCHARS (string), 0, 1);
1258         }
1259       else
1260         {
1261           /* Converting multibyte to single-byte.
1262
1263              ??? Perhaps this conversion should be done in a special way
1264              by subtracting nonascii-insert-offset from each non-ASCII char,
1265              so that only the multibyte chars which really correspond to
1266              the chosen single-byte character set can possibly match.  */
1267           raw_pattern_size = SCHARS (string);
1268           raw_pattern_size_byte = SCHARS (string);
1269           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1270           copy_text (SDATA (string), raw_pattern,
1271                      SBYTES (string), 1, 0);
1272         }
1273
1274       /* Copy and optionally translate the pattern.  */
1275       len = raw_pattern_size;
1276       len_byte = raw_pattern_size_byte;
1277       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1278       pat = patbuf;
1279       base_pat = raw_pattern;
1280       if (multibyte)
1281         {
1282           /* Fill patbuf by translated characters in STRING while
1283              checking if we can use boyer-moore search.  If TRT is
1284              non-nil, we can use boyer-moore search only if TRT can be
1285              represented by the byte array of 256 elements.  For that,
1286              all non-ASCII case-equivalents of all case-senstive
1287              characters in STRING must belong to the same charset and
1288              row.  */
1289
1290           while (--len >= 0)
1291             {
1292               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1293               int c, translated, inverse;
1294               int in_charlen, charlen;
1295
1296               /* If we got here and the RE flag is set, it's because we're
1297                  dealing with a regexp known to be trivial, so the backslash
1298                  just quotes the next character.  */
1299               if (RE && *base_pat == '\\')
1300                 {
1301                   len--;
1302                   raw_pattern_size--;
1303                   len_byte--;
1304                   base_pat++;
1305                 }
1306
1307               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1308
1309               if (NILP (trt))
1310                 {
1311                   str = base_pat;
1312                   charlen = in_charlen;
1313                 }
1314               else
1315                 {
1316                   /* Translate the character.  */
1317                   TRANSLATE (translated, trt, c);
1318                   charlen = CHAR_STRING (translated, str_base);
1319                   str = str_base;
1320
1321                   /* Check if C has any other case-equivalents.  */
1322                   TRANSLATE (inverse, inverse_trt, c);
1323                   /* If so, check if we can use boyer-moore.  */
1324                   if (c != inverse && boyer_moore_ok)
1325                     {
1326                       /* Check if all equivalents belong to the same
1327                          group of characters.  Note that the check of C
1328                          itself is done by the last iteration.  */
1329                       int this_char_base = -1;
1330
1331                       while (boyer_moore_ok)
1332                         {
1333                           if (ASCII_BYTE_P (inverse))
1334                             {
1335                               if (this_char_base > 0)
1336                                 boyer_moore_ok = 0;
1337                               else
1338                                 {
1339                                   this_char_base = 0;
1340                                   if (char_base < 0)
1341                                     char_base = this_char_base;
1342                                 }
1343                             }
1344                           else if (CHAR_BYTE8_P (inverse))
1345                             /* Boyer-moore search can't handle a
1346                                translation of an eight-bit
1347                                character.  */
1348                             boyer_moore_ok = 0;
1349                           else if (this_char_base < 0)
1350                             {
1351                               this_char_base = inverse & ~0x3F;
1352                               if (char_base < 0)
1353                                 char_base = this_char_base;
1354                               else if (char_base > 0
1355                                        && this_char_base != char_base)
1356                                 boyer_moore_ok = 0;
1357                             }
1358                           else if ((inverse & ~0x3F) != this_char_base)
1359                             boyer_moore_ok = 0;
1360                           if (c == inverse)
1361                             break;
1362                           TRANSLATE (inverse, inverse_trt, inverse);
1363                         }
1364                     }
1365                 }
1366               if (char_base < 0)
1367                 char_base = 0;
1368
1369               /* Store this character into the translated pattern.  */
1370               bcopy (str, pat, charlen);
1371               pat += charlen;
1372               base_pat += in_charlen;
1373               len_byte -= in_charlen;
1374             }
1375         }
1376       else
1377         {
1378           /* Unibyte buffer.  */
1379           char_base = 0;
1380           while (--len >= 0)
1381             {
1382               int c, translated;
1383
1384               /* If we got here and the RE flag is set, it's because we're
1385                  dealing with a regexp known to be trivial, so the backslash
1386                  just quotes the next character.  */
1387               if (RE && *base_pat == '\\')
1388                 {
1389                   len--;
1390                   raw_pattern_size--;
1391                   base_pat++;
1392                 }
1393               c = *base_pat++;
1394               TRANSLATE (translated, trt, c);
1395               *pat++ = translated;
1396             }
1397         }
1398
1399       len_byte = pat - patbuf;
1400       len = raw_pattern_size;
1401       pat = base_pat = patbuf;
1402
1403       if (boyer_moore_ok)
1404         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1405                             pos, pos_byte, lim, lim_byte,
1406                             char_base);
1407       else
1408         return simple_search (n, pat, len, len_byte, trt,
1409                               pos, pos_byte, lim, lim_byte);
1410     }
1411 }
1412 \f
1413 /* Do a simple string search N times for the string PAT,
1414    whose length is LEN/LEN_BYTE,
1415    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1416    TRT is the translation table.
1417
1418    Return the character position where the match is found.
1419    Otherwise, if M matches remained to be found, return -M.
1420
1421    This kind of search works regardless of what is in PAT and
1422    regardless of what is in TRT.  It is used in cases where
1423    boyer_moore cannot work.  */
1424
1425 static int
1426 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1427      int n;
1428      unsigned char *pat;
1429      int len, len_byte;
1430      Lisp_Object trt;
1431      int pos, pos_byte;
1432      int lim, lim_byte;
1433 {
1434   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1435   int forward = n > 0;
1436   /* Number of buffer bytes matched.  Note that this may be different
1437      from len_byte in a multibyte buffer.  */
1438   int match_byte;
1439
1440   if (lim > pos && multibyte)
1441     while (n > 0)
1442       {
1443         while (1)
1444           {
1445             /* Try matching at position POS.  */
1446             int this_pos = pos;
1447             int this_pos_byte = pos_byte;
1448             int this_len = len;
1449             int this_len_byte = len_byte;
1450             unsigned char *p = pat;
1451             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1452               goto stop;
1453
1454             while (this_len > 0)
1455               {
1456                 int charlen, buf_charlen;
1457                 int pat_ch, buf_ch;
1458
1459                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1460                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1461                                                  ZV_BYTE - this_pos_byte,
1462                                                  buf_charlen);
1463                 TRANSLATE (buf_ch, trt, buf_ch);
1464
1465                 if (buf_ch != pat_ch)
1466                   break;
1467
1468                 this_len_byte -= charlen;
1469                 this_len--;
1470                 p += charlen;
1471
1472                 this_pos_byte += buf_charlen;
1473                 this_pos++;
1474               }
1475
1476             if (this_len == 0)
1477               {
1478                 match_byte = this_pos_byte - pos_byte;
1479                 pos += len;
1480                 pos_byte += match_byte;
1481                 break;
1482               }
1483
1484             INC_BOTH (pos, pos_byte);
1485           }
1486
1487         n--;
1488       }
1489   else if (lim > pos)
1490     while (n > 0)
1491       {
1492         while (1)
1493           {
1494             /* Try matching at position POS.  */
1495             int this_pos = pos;
1496             int this_len = len;
1497             unsigned char *p = pat;
1498
1499             if (pos + len > lim)
1500               goto stop;
1501
1502             while (this_len > 0)
1503               {
1504                 int pat_ch = *p++;
1505                 int buf_ch = FETCH_BYTE (this_pos);
1506                 TRANSLATE (buf_ch, trt, buf_ch);
1507
1508                 if (buf_ch != pat_ch)
1509                   break;
1510
1511                 this_len--;
1512                 this_pos++;
1513               }
1514
1515             if (this_len == 0)
1516               {
1517                 match_byte = len;
1518                 pos += len;
1519                 break;
1520               }
1521
1522             pos++;
1523           }
1524
1525         n--;
1526       }
1527   /* Backwards search.  */
1528   else if (lim < pos && multibyte)
1529     while (n < 0)
1530       {
1531         while (1)
1532           {
1533             /* Try matching at position POS.  */
1534             int this_pos = pos - len;
1535             int this_pos_byte;
1536             int this_len = len;
1537             int this_len_byte = len_byte;
1538             unsigned char *p = pat;
1539
1540             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1541               goto stop;
1542             this_pos_byte = CHAR_TO_BYTE (this_pos);
1543             match_byte = pos_byte - this_pos_byte;
1544
1545             while (this_len > 0)
1546               {
1547                 int charlen, buf_charlen;
1548                 int pat_ch, buf_ch;
1549
1550                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1551                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1552                                                  ZV_BYTE - this_pos_byte,
1553                                                  buf_charlen);
1554                 TRANSLATE (buf_ch, trt, buf_ch);
1555
1556                 if (buf_ch != pat_ch)
1557                   break;
1558
1559                 this_len_byte -= charlen;
1560                 this_len--;
1561                 p += charlen;
1562                 this_pos_byte += buf_charlen;
1563                 this_pos++;
1564               }
1565
1566             if (this_len == 0)
1567               {
1568                 pos -= len;
1569                 pos_byte -= match_byte;
1570                 break;
1571               }
1572
1573             DEC_BOTH (pos, pos_byte);
1574           }
1575
1576         n++;
1577       }
1578   else if (lim < pos)
1579     while (n < 0)
1580       {
1581         while (1)
1582           {
1583             /* Try matching at position POS.  */
1584             int this_pos = pos - len;
1585             int this_len = len;
1586             unsigned char *p = pat;
1587
1588             if (this_pos < lim)
1589               goto stop;
1590
1591             while (this_len > 0)
1592               {
1593                 int pat_ch = *p++;
1594                 int buf_ch = FETCH_BYTE (this_pos);
1595                 TRANSLATE (buf_ch, trt, buf_ch);
1596
1597                 if (buf_ch != pat_ch)
1598                   break;
1599                 this_len--;
1600                 this_pos++;
1601               }
1602
1603             if (this_len == 0)
1604               {
1605                 match_byte = len;
1606                 pos -= len;
1607                 break;
1608               }
1609
1610             pos--;
1611           }
1612
1613         n++;
1614       }
1615
1616  stop:
1617   if (n == 0)
1618     {
1619       if (forward)
1620         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1621       else
1622         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1623
1624       return pos;
1625     }
1626   else if (n > 0)
1627     return -n;
1628   else
1629     return n;
1630 }
1631 \f
1632 /* Do Boyer-Moore search N times for the string BASE_PAT,
1633    whose length is LEN/LEN_BYTE,
1634    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1635    DIRECTION says which direction we search in.
1636    TRT and INVERSE_TRT are translation tables.
1637    Characters in PAT are already translated by TRT.
1638
1639    This kind of search works if all the characters in BASE_PAT that
1640    have nontrivial translation are the same aside from the last byte.
1641    This makes it possible to translate just the last byte of a
1642    character, and do so after just a simple test of the context.
1643    CHAR_BASE is nonzero if there is such a non-ASCII character.
1644
1645    If that criterion is not satisfied, do not call this function.  */
1646
1647 static int
1648 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1649              pos, pos_byte, lim, lim_byte, char_base)
1650      int n;
1651      unsigned char *base_pat;
1652      int len, len_byte;
1653      Lisp_Object trt;
1654      Lisp_Object inverse_trt;
1655      int pos, pos_byte;
1656      int lim, lim_byte;
1657      int char_base;
1658 {
1659   int direction = ((n > 0) ? 1 : -1);
1660   register int dirlen;
1661   int infinity, limit, stride_for_teases = 0;
1662   register int *BM_tab;
1663   int *BM_tab_base;
1664   register unsigned char *cursor, *p_limit;
1665   register int i, j;
1666   unsigned char *pat, *pat_end;
1667   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1668
1669   unsigned char simple_translate[0400];
1670   /* These are set to the preceding bytes of a byte to be translated
1671      if char_base is nonzero.  As the maximum byte length of a
1672      multibyte character is 5, we have to check at most four previous
1673      bytes.  */
1674   int translate_prev_byte1 = 0;
1675   int translate_prev_byte2 = 0;
1676   int translate_prev_byte3 = 0;
1677   int translate_prev_byte4 = 0;
1678
1679   BM_tab = (int *) alloca (0400 * sizeof (int));
1680
1681   /* The general approach is that we are going to maintain that we know */
1682   /* the first (closest to the present position, in whatever direction */
1683   /* we're searching) character that could possibly be the last */
1684   /* (furthest from present position) character of a valid match.  We */
1685   /* advance the state of our knowledge by looking at that character */
1686   /* and seeing whether it indeed matches the last character of the */
1687   /* pattern.  If it does, we take a closer look.  If it does not, we */
1688   /* move our pointer (to putative last characters) as far as is */
1689   /* logically possible.  This amount of movement, which I call a */
1690   /* stride, will be the length of the pattern if the actual character */
1691   /* appears nowhere in the pattern, otherwise it will be the distance */
1692   /* from the last occurrence of that character to the end of the */
1693   /* pattern. */
1694   /* As a coding trick, an enormous stride is coded into the table for */
1695   /* characters that match the last character.  This allows use of only */
1696   /* a single test, a test for having gone past the end of the */
1697   /* permissible match region, to test for both possible matches (when */
1698   /* the stride goes past the end immediately) and failure to */
1699   /* match (where you get nudged past the end one stride at a time). */
1700
1701   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1702   /* is determined only by the last character of the putative match. */
1703   /* If that character does not match, we will stride the proper */
1704   /* distance to propose a match that superimposes it on the last */
1705   /* instance of a character that matches it (per trt), or misses */
1706   /* it entirely if there is none. */
1707
1708   dirlen = len_byte * direction;
1709   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1710
1711   /* Record position after the end of the pattern.  */
1712   pat_end = base_pat + len_byte;
1713   /* BASE_PAT points to a character that we start scanning from.
1714      It is the first character in a forward search,
1715      the last character in a backward search.  */
1716   if (direction < 0)
1717     base_pat = pat_end - 1;
1718
1719   BM_tab_base = BM_tab;
1720   BM_tab += 0400;
1721   j = dirlen;           /* to get it in a register */
1722   /* A character that does not appear in the pattern induces a */
1723   /* stride equal to the pattern length. */
1724   while (BM_tab_base != BM_tab)
1725     {
1726       *--BM_tab = j;
1727       *--BM_tab = j;
1728       *--BM_tab = j;
1729       *--BM_tab = j;
1730     }
1731
1732   /* We use this for translation, instead of TRT itself.
1733      We fill this in to handle the characters that actually
1734      occur in the pattern.  Others don't matter anyway!  */
1735   bzero (simple_translate, sizeof simple_translate);
1736   for (i = 0; i < 0400; i++)
1737     simple_translate[i] = i;
1738
1739   if (char_base)
1740     {
1741       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1742          byte following them are the target of translation.  */
1743       unsigned char str[MAX_MULTIBYTE_LENGTH];
1744       int len = CHAR_STRING (char_base, str);
1745
1746       translate_prev_byte1 = str[len - 2];
1747       if (len > 2)
1748         {
1749           translate_prev_byte2 = str[len - 3];
1750           if (len > 3)
1751             {
1752               translate_prev_byte3 = str[len - 4];
1753               if (len > 4)
1754                 translate_prev_byte4 = str[len - 5];
1755             }
1756         }
1757     }
1758
1759   i = 0;
1760   while (i != infinity)
1761     {
1762       unsigned char *ptr = base_pat + i;
1763       i += direction;
1764       if (i == dirlen)
1765         i = infinity;
1766       if (! NILP (trt))
1767         {
1768           /* If the byte currently looking at is the last of a
1769              character to check case-equivalents, set CH to that
1770              character.  An ASCII character and a non-ASCII character
1771              matching with CHAR_BASE are to be checked.  */
1772           int ch = -1;
1773
1774           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1775             ch = *ptr;
1776           else if (char_base
1777                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1778             {
1779               unsigned char *charstart = ptr - 1;
1780
1781               while (! (CHAR_HEAD_P (*charstart)))
1782                 charstart--;
1783               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1784               if (char_base != (ch & ~0x3F))
1785                 ch = -1;
1786             }
1787
1788           if (ch >= 0200)
1789             j = (ch & 0x3F) | 0200;
1790           else
1791             j = *ptr;
1792
1793           if (i == infinity)
1794             stride_for_teases = BM_tab[j];
1795
1796           BM_tab[j] = dirlen - i;
1797           /* A translation table is accompanied by its inverse -- see */
1798           /* comment following downcase_table for details */
1799           if (ch >= 0)
1800             {
1801               int starting_ch = ch;
1802               int starting_j = j;
1803
1804               while (1)
1805                 {
1806                   TRANSLATE (ch, inverse_trt, ch);
1807                   if (ch >= 0200)
1808                     j = (ch & 0x3F) | 0200;
1809                   else
1810                     j = ch;
1811
1812                   /* For all the characters that map into CH,
1813                      set up simple_translate to map the last byte
1814                      into STARTING_J.  */
1815                   simple_translate[j] = starting_j;
1816                   if (ch == starting_ch)
1817                     break;
1818                   BM_tab[j] = dirlen - i;
1819                 }
1820             }
1821         }
1822       else
1823         {
1824           j = *ptr;
1825
1826           if (i == infinity)
1827             stride_for_teases = BM_tab[j];
1828           BM_tab[j] = dirlen - i;
1829         }
1830       /* stride_for_teases tells how much to stride if we get a */
1831       /* match on the far character but are subsequently */
1832       /* disappointed, by recording what the stride would have been */
1833       /* for that character if the last character had been */
1834       /* different. */
1835     }
1836   infinity = dirlen - infinity;
1837   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1838   /* loop invariant - POS_BYTE points at where last char (first
1839      char if reverse) of pattern would align in a possible match.  */
1840   while (n != 0)
1841     {
1842       int tail_end;
1843       unsigned char *tail_end_ptr;
1844
1845       /* It's been reported that some (broken) compiler thinks that
1846          Boolean expressions in an arithmetic context are unsigned.
1847          Using an explicit ?1:0 prevents this.  */
1848       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1849           < 0)
1850         return (n * (0 - direction));
1851       /* First we do the part we can by pointers (maybe nothing) */
1852       QUIT;
1853       pat = base_pat;
1854       limit = pos_byte - dirlen + direction;
1855       if (direction > 0)
1856         {
1857           limit = BUFFER_CEILING_OF (limit);
1858           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1859              can take on without hitting edge of buffer or the gap.  */
1860           limit = min (limit, pos_byte + 20000);
1861           limit = min (limit, lim_byte - 1);
1862         }
1863       else
1864         {
1865           limit = BUFFER_FLOOR_OF (limit);
1866           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1867              can take on without hitting edge of buffer or the gap.  */
1868           limit = max (limit, pos_byte - 20000);
1869           limit = max (limit, lim_byte);
1870         }
1871       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1872       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1873
1874       if ((limit - pos_byte) * direction > 20)
1875         {
1876           unsigned char *p2;
1877
1878           p_limit = BYTE_POS_ADDR (limit);
1879           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1880           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1881           while (1)             /* use one cursor setting as long as i can */
1882             {
1883               if (direction > 0) /* worth duplicating */
1884                 {
1885                   /* Use signed comparison if appropriate
1886                      to make cursor+infinity sure to be > p_limit.
1887                      Assuming that the buffer lies in a range of addresses
1888                      that are all "positive" (as ints) or all "negative",
1889                      either kind of comparison will work as long
1890                      as we don't step by infinity.  So pick the kind
1891                      that works when we do step by infinity.  */
1892                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1893                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1894                       cursor += BM_tab[*cursor];
1895                   else
1896                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1897                       cursor += BM_tab[*cursor];
1898                 }
1899               else
1900                 {
1901                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1902                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1903                       cursor += BM_tab[*cursor];
1904                   else
1905                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1906                       cursor += BM_tab[*cursor];
1907                 }
1908 /* If you are here, cursor is beyond the end of the searched region. */
1909 /* This can happen if you match on the far character of the pattern, */
1910 /* because the "stride" of that character is infinity, a number able */
1911 /* to throw you well beyond the end of the search.  It can also */
1912 /* happen if you fail to match within the permitted region and would */
1913 /* otherwise try a character beyond that region */
1914               if ((cursor - p_limit) * direction <= len_byte)
1915                 break;  /* a small overrun is genuine */
1916               cursor -= infinity; /* large overrun = hit */
1917               i = dirlen - direction;
1918               if (! NILP (trt))
1919                 {
1920                   while ((i -= direction) + direction != 0)
1921                     {
1922                       int ch;
1923                       cursor -= direction;
1924                       /* Translate only the last byte of a character.  */
1925                       if (! multibyte
1926                           || ((cursor == tail_end_ptr
1927                                || CHAR_HEAD_P (cursor[1]))
1928                               && (CHAR_HEAD_P (cursor[0])
1929                                   /* Check if this is the last byte of
1930                                      a translable character.  */
1931                                   || (translate_prev_byte1 == cursor[-1]
1932                                       && (CHAR_HEAD_P (translate_prev_byte1)
1933                                           || (translate_prev_byte2 == cursor[-2]
1934                                               && (CHAR_HEAD_P (translate_prev_byte2)
1935                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1936                         ch = simple_translate[*cursor];
1937                       else
1938                         ch = *cursor;
1939                       if (pat[i] != ch)
1940                         break;
1941                     }
1942                 }
1943               else
1944                 {
1945                   while ((i -= direction) + direction != 0)
1946                     {
1947                       cursor -= direction;
1948                       if (pat[i] != *cursor)
1949                         break;
1950                     }
1951                 }
1952               cursor += dirlen - i - direction; /* fix cursor */
1953               if (i + direction == 0)
1954                 {
1955                   int position, start, end;
1956
1957                   cursor -= direction;
1958
1959                   position = pos_byte + cursor - p2 + ((direction > 0)
1960                                                        ? 1 - len_byte : 0);
1961                   set_search_regs (position, len_byte);
1962
1963                   if (NILP (Vinhibit_changing_match_data))
1964                     {
1965                       start = search_regs.start[0];
1966                       end = search_regs.end[0];
1967                     }
1968                   else
1969                     /* If Vinhibit_changing_match_data is non-nil,
1970                        search_regs will not be changed.  So let's
1971                        compute start and end here.  */
1972                     {
1973                       start = BYTE_TO_CHAR (position);
1974                       end = BYTE_TO_CHAR (position + len_byte);
1975                     }
1976
1977                   if ((n -= direction) != 0)
1978                     cursor += dirlen; /* to resume search */
1979                   else
1980                     return direction > 0 ? end : start;
1981                 }
1982               else
1983                 cursor += stride_for_teases; /* <sigh> we lose -  */
1984             }
1985           pos_byte += cursor - p2;
1986         }
1987       else
1988         /* Now we'll pick up a clump that has to be done the hard */
1989         /* way because it covers a discontinuity */
1990         {
1991           limit = ((direction > 0)
1992                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1993                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1994           limit = ((direction > 0)
1995                    ? min (limit + len_byte, lim_byte - 1)
1996                    : max (limit - len_byte, lim_byte));
1997           /* LIMIT is now the last value POS_BYTE can have
1998              and still be valid for a possible match.  */
1999           while (1)
2000             {
2001               /* This loop can be coded for space rather than */
2002               /* speed because it will usually run only once. */
2003               /* (the reach is at most len + 21, and typically */
2004               /* does not exceed len) */
2005               while ((limit - pos_byte) * direction >= 0)
2006                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2007               /* now run the same tests to distinguish going off the */
2008               /* end, a match or a phony match. */
2009               if ((pos_byte - limit) * direction <= len_byte)
2010                 break;  /* ran off the end */
2011               /* Found what might be a match.
2012                  Set POS_BYTE back to last (first if reverse) pos.  */
2013               pos_byte -= infinity;
2014               i = dirlen - direction;
2015               while ((i -= direction) + direction != 0)
2016                 {
2017                   int ch;
2018                   unsigned char *ptr;
2019                   pos_byte -= direction;
2020                   ptr = BYTE_POS_ADDR (pos_byte);
2021                   /* Translate only the last byte of a character.  */
2022                   if (! multibyte
2023                       || ((ptr == tail_end_ptr
2024                            || CHAR_HEAD_P (ptr[1]))
2025                           && (CHAR_HEAD_P (ptr[0])
2026                               /* Check if this is the last byte of a
2027                                  translable character.  */
2028                               || (translate_prev_byte1 == ptr[-1]
2029                                   && (CHAR_HEAD_P (translate_prev_byte1)
2030                                       || (translate_prev_byte2 == ptr[-2]
2031                                           && (CHAR_HEAD_P (translate_prev_byte2)
2032                                               || translate_prev_byte3 == ptr[-3])))))))
2033                     ch = simple_translate[*ptr];
2034                   else
2035                     ch = *ptr;
2036                   if (pat[i] != ch)
2037                     break;
2038                 }
2039               /* Above loop has moved POS_BYTE part or all the way
2040                  back to the first pos (last pos if reverse).
2041                  Set it once again at the last (first if reverse) char.  */
2042               pos_byte += dirlen - i- direction;
2043               if (i + direction == 0)
2044                 {
2045                   int position, start, end;
2046                   pos_byte -= direction;
2047
2048                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2049                   set_search_regs (position, len_byte);
2050
2051                   if (NILP (Vinhibit_changing_match_data))
2052                     {
2053                       start = search_regs.start[0];
2054                       end = search_regs.end[0];
2055                     }
2056                   else
2057                     /* If Vinhibit_changing_match_data is non-nil,
2058                        search_regs will not be changed.  So let's
2059                        compute start and end here.  */
2060                     {
2061                       start = BYTE_TO_CHAR (position);
2062                       end = BYTE_TO_CHAR (position + len_byte);
2063                     }
2064
2065                   if ((n -= direction) != 0)
2066                     pos_byte += dirlen; /* to resume search */
2067                   else
2068                     return direction > 0 ? end : start;
2069                 }
2070               else
2071                 pos_byte += stride_for_teases;
2072             }
2073           }
2074       /* We have done one clump.  Can we continue? */
2075       if ((lim_byte - pos_byte) * direction < 0)
2076         return ((0 - n) * direction);
2077     }
2078   return BYTE_TO_CHAR (pos_byte);
2079 }
2080
2081 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2082    for the overall match just found in the current buffer.
2083    Also clear out the match data for registers 1 and up.  */
2084
2085 static void
2086 set_search_regs (beg_byte, nbytes)
2087      int beg_byte, nbytes;
2088 {
2089   int i;
2090
2091   if (!NILP (Vinhibit_changing_match_data))
2092     return;
2093
2094   /* Make sure we have registers in which to store
2095      the match position.  */
2096   if (search_regs.num_regs == 0)
2097     {
2098       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2099       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2100       search_regs.num_regs = 2;
2101     }
2102
2103   /* Clear out the other registers.  */
2104   for (i = 1; i < search_regs.num_regs; i++)
2105     {
2106       search_regs.start[i] = -1;
2107       search_regs.end[i] = -1;
2108     }
2109
2110   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2111   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2112   XSETBUFFER (last_thing_searched, current_buffer);
2113 }
2114 \f
2115 /* Given a string of words separated by word delimiters,
2116   compute a regexp that matches those exact words
2117   separated by arbitrary punctuation.  */
2118
2119 static Lisp_Object
2120 wordify (string)
2121      Lisp_Object string;
2122 {
2123   register unsigned char *p, *o;
2124   register int i, i_byte, len, punct_count = 0, word_count = 0;
2125   Lisp_Object val;
2126   int prev_c = 0;
2127   int adjust;
2128
2129   CHECK_STRING (string);
2130   p = SDATA (string);
2131   len = SCHARS (string);
2132
2133   for (i = 0, i_byte = 0; i < len; )
2134     {
2135       int c;
2136
2137       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2138
2139       if (SYNTAX (c) != Sword)
2140         {
2141           punct_count++;
2142           if (i > 0 && SYNTAX (prev_c) == Sword)
2143             word_count++;
2144         }
2145
2146       prev_c = c;
2147     }
2148
2149   if (SYNTAX (prev_c) == Sword)
2150     word_count++;
2151   if (!word_count)
2152     return empty_unibyte_string;
2153
2154   adjust = - punct_count + 5 * (word_count - 1) + 4;
2155   if (STRING_MULTIBYTE (string))
2156     val = make_uninit_multibyte_string (len + adjust,
2157                                         SBYTES (string)
2158                                         + adjust);
2159   else
2160     val = make_uninit_string (len + adjust);
2161
2162   o = SDATA (val);
2163   *o++ = '\\';
2164   *o++ = 'b';
2165   prev_c = 0;
2166
2167   for (i = 0, i_byte = 0; i < len; )
2168     {
2169       int c;
2170       int i_byte_orig = i_byte;
2171
2172       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2173
2174       if (SYNTAX (c) == Sword)
2175         {
2176           bcopy (SDATA (string) + i_byte_orig, o,
2177                  i_byte - i_byte_orig);
2178           o += i_byte - i_byte_orig;
2179         }
2180       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2181         {
2182           *o++ = '\\';
2183           *o++ = 'W';
2184           *o++ = '\\';
2185           *o++ = 'W';
2186           *o++ = '*';
2187         }
2188
2189       prev_c = c;
2190     }
2191
2192   *o++ = '\\';
2193   *o++ = 'b';
2194
2195   return val;
2196 }
2197 \f
2198 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2199        "MSearch backward: ",
2200        doc: /* Search backward from point for STRING.
2201 Set point to the beginning of the occurrence found, and return point.
2202 An optional second argument bounds the search; it is a buffer position.
2203 The match found must not extend before that position.
2204 Optional third argument, if t, means if fail just return nil (no error).
2205  If not nil and not t, position at limit of search and return nil.
2206 Optional fourth argument is repeat count--search for successive occurrences.
2207
2208 Search case-sensitivity is determined by the value of the variable
2209 `case-fold-search', which see.
2210
2211 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2212      (string, bound, noerror, count)
2213      Lisp_Object string, bound, noerror, count;
2214 {
2215   return search_command (string, bound, noerror, count, -1, 0, 0);
2216 }
2217
2218 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2219        doc: /* Search forward from point for STRING.
2220 Set point to the end of the occurrence found, and return point.
2221 An optional second argument bounds the search; it is a buffer position.
2222 The match found must not extend after that position.  A value of nil is
2223   equivalent to (point-max).
2224 Optional third argument, if t, means if fail just return nil (no error).
2225   If not nil and not t, move to limit of search and return nil.
2226 Optional fourth argument is repeat count--search for successive occurrences.
2227
2228 Search case-sensitivity is determined by the value of the variable
2229 `case-fold-search', which see.
2230
2231 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2232      (string, bound, noerror, count)
2233      Lisp_Object string, bound, noerror, count;
2234 {
2235   return search_command (string, bound, noerror, count, 1, 0, 0);
2236 }
2237
2238 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2239        "sWord search backward: ",
2240        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2241 Set point to the beginning of the occurrence found, and return point.
2242 An optional second argument bounds the search; it is a buffer position.
2243 The match found must not extend before that position.
2244 Optional third argument, if t, means if fail just return nil (no error).
2245   If not nil and not t, move to limit of search and return nil.
2246 Optional fourth argument is repeat count--search for successive occurrences.  */)
2247      (string, bound, noerror, count)
2248      Lisp_Object string, bound, noerror, count;
2249 {
2250   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2251 }
2252
2253 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2254        "sWord search: ",
2255        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2256 Set point to the end of the occurrence found, and return point.
2257 An optional second argument bounds the search; it is a buffer position.
2258 The match found must not extend after that position.
2259 Optional third argument, if t, means if fail just return nil (no error).
2260   If not nil and not t, move to limit of search and return nil.
2261 Optional fourth argument is repeat count--search for successive occurrences.  */)
2262      (string, bound, noerror, count)
2263      Lisp_Object string, bound, noerror, count;
2264 {
2265   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2266 }
2267
2268 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2269        "sRE search backward: ",
2270        doc: /* Search backward from point for match for regular expression REGEXP.
2271 Set point to the beginning of the match, and return point.
2272 The match found is the one starting last in the buffer
2273 and yet ending before the origin of the search.
2274 An optional second argument bounds the search; it is a buffer position.
2275 The match found must start at or after that position.
2276 Optional third argument, if t, means if fail just return nil (no error).
2277   If not nil and not t, move to limit of search and return nil.
2278 Optional fourth argument is repeat count--search for successive occurrences.
2279 See also the functions `match-beginning', `match-end', `match-string',
2280 and `replace-match'.  */)
2281      (regexp, bound, noerror, count)
2282      Lisp_Object regexp, bound, noerror, count;
2283 {
2284   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2285 }
2286
2287 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2288        "sRE search: ",
2289        doc: /* Search forward from point for regular expression REGEXP.
2290 Set point to the end of the occurrence found, and return point.
2291 An optional second argument bounds the search; it is a buffer position.
2292 The match found must not extend after that position.
2293 Optional third argument, if t, means if fail just return nil (no error).
2294   If not nil and not t, move to limit of search and return nil.
2295 Optional fourth argument is repeat count--search for successive occurrences.
2296 See also the functions `match-beginning', `match-end', `match-string',
2297 and `replace-match'.  */)
2298      (regexp, bound, noerror, count)
2299      Lisp_Object regexp, bound, noerror, count;
2300 {
2301   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2302 }
2303
2304 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2305        "sPosix search backward: ",
2306        doc: /* Search backward from point for match for regular expression REGEXP.
2307 Find the longest match in accord with Posix regular expression rules.
2308 Set point to the beginning of the match, and return point.
2309 The match found is the one starting last in the buffer
2310 and yet ending before the origin of the search.
2311 An optional second argument bounds the search; it is a buffer position.
2312 The match found must start at or after that position.
2313 Optional third argument, if t, means if fail just return nil (no error).
2314   If not nil and not t, move to limit of search and return nil.
2315 Optional fourth argument is repeat count--search for successive occurrences.
2316 See also the functions `match-beginning', `match-end', `match-string',
2317 and `replace-match'.  */)
2318      (regexp, bound, noerror, count)
2319      Lisp_Object regexp, bound, noerror, count;
2320 {
2321   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2322 }
2323
2324 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2325        "sPosix search: ",
2326        doc: /* Search forward from point for regular expression REGEXP.
2327 Find the longest match in accord with Posix regular expression rules.
2328 Set point to the end of the occurrence found, and return point.
2329 An optional second argument bounds the search; it is a buffer position.
2330 The match found must not extend after that position.
2331 Optional third argument, if t, means if fail just return nil (no error).
2332   If not nil and not t, move to limit of search and return nil.
2333 Optional fourth argument is repeat count--search for successive occurrences.
2334 See also the functions `match-beginning', `match-end', `match-string',
2335 and `replace-match'.  */)
2336      (regexp, bound, noerror, count)
2337      Lisp_Object regexp, bound, noerror, count;
2338 {
2339   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2340 }
2341 \f
2342 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2343        doc: /* Replace text matched by last search with NEWTEXT.
2344 Leave point at the end of the replacement text.
2345
2346 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2347 Otherwise maybe capitalize the whole text, or maybe just word initials,
2348 based on the replaced text.
2349 If the replaced text has only capital letters
2350 and has at least one multiletter word, convert NEWTEXT to all caps.
2351 Otherwise if all words are capitalized in the replaced text,
2352 capitalize each word in NEWTEXT.
2353
2354 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2355 Otherwise treat `\\' as special:
2356   `\\&' in NEWTEXT means substitute original matched text.
2357   `\\N' means substitute what matched the Nth `\\(...\\)'.
2358        If Nth parens didn't match, substitute nothing.
2359   `\\\\' means insert one `\\'.
2360 Case conversion does not apply to these substitutions.
2361
2362 FIXEDCASE and LITERAL are optional arguments.
2363
2364 The optional fourth argument STRING can be a string to modify.
2365 This is meaningful when the previous match was done against STRING,
2366 using `string-match'.  When used this way, `replace-match'
2367 creates and returns a new string made by copying STRING and replacing
2368 the part of STRING that was matched.
2369
2370 The optional fifth argument SUBEXP specifies a subexpression;
2371 it says to replace just that subexpression with NEWTEXT,
2372 rather than replacing the entire matched text.
2373 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2374 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2375 NEWTEXT in place of subexp N.
2376 This is useful only after a regular expression search or match,
2377 since only regular expressions have distinguished subexpressions.  */)
2378      (newtext, fixedcase, literal, string, subexp)
2379      Lisp_Object newtext, fixedcase, literal, string, subexp;
2380 {
2381   enum { nochange, all_caps, cap_initial } case_action;
2382   register int pos, pos_byte;
2383   int some_multiletter_word;
2384   int some_lowercase;
2385   int some_uppercase;
2386   int some_nonuppercase_initial;
2387   register int c, prevc;
2388   int sub;
2389   int opoint, newpoint;
2390
2391   CHECK_STRING (newtext);
2392
2393   if (! NILP (string))
2394     CHECK_STRING (string);
2395
2396   case_action = nochange;       /* We tried an initialization */
2397                                 /* but some C compilers blew it */
2398
2399   if (search_regs.num_regs <= 0)
2400     error ("`replace-match' called before any match found");
2401
2402   if (NILP (subexp))
2403     sub = 0;
2404   else
2405     {
2406       CHECK_NUMBER (subexp);
2407       sub = XINT (subexp);
2408       if (sub < 0 || sub >= search_regs.num_regs)
2409         args_out_of_range (subexp, make_number (search_regs.num_regs));
2410     }
2411
2412   if (NILP (string))
2413     {
2414       if (search_regs.start[sub] < BEGV
2415           || search_regs.start[sub] > search_regs.end[sub]
2416           || search_regs.end[sub] > ZV)
2417         args_out_of_range (make_number (search_regs.start[sub]),
2418                            make_number (search_regs.end[sub]));
2419     }
2420   else
2421     {
2422       if (search_regs.start[sub] < 0
2423           || search_regs.start[sub] > search_regs.end[sub]
2424           || search_regs.end[sub] > SCHARS (string))
2425         args_out_of_range (make_number (search_regs.start[sub]),
2426                            make_number (search_regs.end[sub]));
2427     }
2428
2429   if (NILP (fixedcase))
2430     {
2431       /* Decide how to casify by examining the matched text. */
2432       int last;
2433
2434       pos = search_regs.start[sub];
2435       last = search_regs.end[sub];
2436
2437       if (NILP (string))
2438         pos_byte = CHAR_TO_BYTE (pos);
2439       else
2440         pos_byte = string_char_to_byte (string, pos);
2441
2442       prevc = '\n';
2443       case_action = all_caps;
2444
2445       /* some_multiletter_word is set nonzero if any original word
2446          is more than one letter long. */
2447       some_multiletter_word = 0;
2448       some_lowercase = 0;
2449       some_nonuppercase_initial = 0;
2450       some_uppercase = 0;
2451
2452       while (pos < last)
2453         {
2454           if (NILP (string))
2455             {
2456               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2457               INC_BOTH (pos, pos_byte);
2458             }
2459           else
2460             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2461
2462           if (LOWERCASEP (c))
2463             {
2464               /* Cannot be all caps if any original char is lower case */
2465
2466               some_lowercase = 1;
2467               if (SYNTAX (prevc) != Sword)
2468                 some_nonuppercase_initial = 1;
2469               else
2470                 some_multiletter_word = 1;
2471             }
2472           else if (UPPERCASEP (c))
2473             {
2474               some_uppercase = 1;
2475               if (SYNTAX (prevc) != Sword)
2476                 ;
2477               else
2478                 some_multiletter_word = 1;
2479             }
2480           else
2481             {
2482               /* If the initial is a caseless word constituent,
2483                  treat that like a lowercase initial.  */
2484               if (SYNTAX (prevc) != Sword)
2485                 some_nonuppercase_initial = 1;
2486             }
2487
2488           prevc = c;
2489         }
2490
2491       /* Convert to all caps if the old text is all caps
2492          and has at least one multiletter word.  */
2493       if (! some_lowercase && some_multiletter_word)
2494         case_action = all_caps;
2495       /* Capitalize each word, if the old text has all capitalized words.  */
2496       else if (!some_nonuppercase_initial && some_multiletter_word)
2497         case_action = cap_initial;
2498       else if (!some_nonuppercase_initial && some_uppercase)
2499         /* Should x -> yz, operating on X, give Yz or YZ?
2500            We'll assume the latter.  */
2501         case_action = all_caps;
2502       else
2503         case_action = nochange;
2504     }
2505
2506   /* Do replacement in a string.  */
2507   if (!NILP (string))
2508     {
2509       Lisp_Object before, after;
2510
2511       before = Fsubstring (string, make_number (0),
2512                            make_number (search_regs.start[sub]));
2513       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2514
2515       /* Substitute parts of the match into NEWTEXT
2516          if desired.  */
2517       if (NILP (literal))
2518         {
2519           int lastpos = 0;
2520           int lastpos_byte = 0;
2521           /* We build up the substituted string in ACCUM.  */
2522           Lisp_Object accum;
2523           Lisp_Object middle;
2524           int length = SBYTES (newtext);
2525
2526           accum = Qnil;
2527
2528           for (pos_byte = 0, pos = 0; pos_byte < length;)
2529             {
2530               int substart = -1;
2531               int subend = 0;
2532               int delbackslash = 0;
2533
2534               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2535
2536               if (c == '\\')
2537                 {
2538                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2539
2540                   if (c == '&')
2541                     {
2542                       substart = search_regs.start[sub];
2543                       subend = search_regs.end[sub];
2544                     }
2545                   else if (c >= '1' && c <= '9')
2546                     {
2547                       if (search_regs.start[c - '0'] >= 0
2548                           && c <= search_regs.num_regs + '0')
2549                         {
2550                           substart = search_regs.start[c - '0'];
2551                           subend = search_regs.end[c - '0'];
2552                         }
2553                       else
2554                         {
2555                           /* If that subexp did not match,
2556                              replace \\N with nothing.  */
2557                           substart = 0;
2558                           subend = 0;
2559                         }
2560                     }
2561                   else if (c == '\\')
2562                     delbackslash = 1;
2563                   else
2564                     error ("Invalid use of `\\' in replacement text");
2565                 }
2566               if (substart >= 0)
2567                 {
2568                   if (pos - 2 != lastpos)
2569                     middle = substring_both (newtext, lastpos,
2570                                              lastpos_byte,
2571                                              pos - 2, pos_byte - 2);
2572                   else
2573                     middle = Qnil;
2574                   accum = concat3 (accum, middle,
2575                                    Fsubstring (string,
2576                                                make_number (substart),
2577                                                make_number (subend)));
2578                   lastpos = pos;
2579                   lastpos_byte = pos_byte;
2580                 }
2581               else if (delbackslash)
2582                 {
2583                   middle = substring_both (newtext, lastpos,
2584                                            lastpos_byte,
2585                                            pos - 1, pos_byte - 1);
2586
2587                   accum = concat2 (accum, middle);
2588                   lastpos = pos;
2589                   lastpos_byte = pos_byte;
2590                 }
2591             }
2592
2593           if (pos != lastpos)
2594             middle = substring_both (newtext, lastpos,
2595                                      lastpos_byte,
2596                                      pos, pos_byte);
2597           else
2598             middle = Qnil;
2599
2600           newtext = concat2 (accum, middle);
2601         }
2602
2603       /* Do case substitution in NEWTEXT if desired.  */
2604       if (case_action == all_caps)
2605         newtext = Fupcase (newtext);
2606       else if (case_action == cap_initial)
2607         newtext = Fupcase_initials (newtext);
2608
2609       return concat3 (before, newtext, after);
2610     }
2611
2612   /* Record point, then move (quietly) to the start of the match.  */
2613   if (PT >= search_regs.end[sub])
2614     opoint = PT - ZV;
2615   else if (PT > search_regs.start[sub])
2616     opoint = search_regs.end[sub] - ZV;
2617   else
2618     opoint = PT;
2619
2620   /* If we want non-literal replacement,
2621      perform substitution on the replacement string.  */
2622   if (NILP (literal))
2623     {
2624       int length = SBYTES (newtext);
2625       unsigned char *substed;
2626       int substed_alloc_size, substed_len;
2627       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2628       int str_multibyte = STRING_MULTIBYTE (newtext);
2629       Lisp_Object rev_tbl;
2630       int really_changed = 0;
2631
2632       rev_tbl = Qnil;
2633
2634       substed_alloc_size = length * 2 + 100;
2635       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2636       substed_len = 0;
2637
2638       /* Go thru NEWTEXT, producing the actual text to insert in
2639          SUBSTED while adjusting multibyteness to that of the current
2640          buffer.  */
2641
2642       for (pos_byte = 0, pos = 0; pos_byte < length;)
2643         {
2644           unsigned char str[MAX_MULTIBYTE_LENGTH];
2645           unsigned char *add_stuff = NULL;
2646           int add_len = 0;
2647           int idx = -1;
2648
2649           if (str_multibyte)
2650             {
2651               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2652               if (!buf_multibyte)
2653                 c = multibyte_char_to_unibyte (c, rev_tbl);
2654             }
2655           else
2656             {
2657               /* Note that we don't have to increment POS.  */
2658               c = SREF (newtext, pos_byte++);
2659               if (buf_multibyte)
2660                 c = unibyte_char_to_multibyte (c);
2661             }
2662
2663           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2664              or set IDX to a match index, which means put that part
2665              of the buffer text into SUBSTED.  */
2666
2667           if (c == '\\')
2668             {
2669               really_changed = 1;
2670
2671               if (str_multibyte)
2672                 {
2673                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2674                                                       pos, pos_byte);
2675                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2676                     c = multibyte_char_to_unibyte (c, rev_tbl);
2677                 }
2678               else
2679                 {
2680                   c = SREF (newtext, pos_byte++);
2681                   if (buf_multibyte)
2682                     c = unibyte_char_to_multibyte (c);
2683                 }
2684
2685               if (c == '&')
2686                 idx = sub;
2687               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2688                 {
2689                   if (search_regs.start[c - '0'] >= 1)
2690                     idx = c - '0';
2691                 }
2692               else if (c == '\\')
2693                 add_len = 1, add_stuff = "\\";
2694               else
2695                 {
2696                   xfree (substed);
2697                   error ("Invalid use of `\\' in replacement text");
2698                 }
2699             }
2700           else
2701             {
2702               add_len = CHAR_STRING (c, str);
2703               add_stuff = str;
2704             }
2705
2706           /* If we want to copy part of a previous match,
2707              set up ADD_STUFF and ADD_LEN to point to it.  */
2708           if (idx >= 0)
2709             {
2710               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2711               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2712               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2713                 move_gap (search_regs.start[idx]);
2714               add_stuff = BYTE_POS_ADDR (begbyte);
2715             }
2716
2717           /* Now the stuff we want to add to SUBSTED
2718              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2719
2720           /* Make sure SUBSTED is big enough.  */
2721           if (substed_len + add_len >= substed_alloc_size)
2722             {
2723               substed_alloc_size = substed_len + add_len + 500;
2724               substed = (unsigned char *) xrealloc (substed,
2725                                                     substed_alloc_size + 1);
2726             }
2727
2728           /* Now add to the end of SUBSTED.  */
2729           if (add_stuff)
2730             {
2731               bcopy (add_stuff, substed + substed_len, add_len);
2732               substed_len += add_len;
2733             }
2734         }
2735
2736       if (really_changed)
2737         {
2738           if (buf_multibyte)
2739             {
2740               int nchars = multibyte_chars_in_text (substed, substed_len);
2741
2742               newtext = make_multibyte_string (substed, nchars, substed_len);
2743             }
2744           else
2745             newtext = make_unibyte_string (substed, substed_len);
2746         }
2747       xfree (substed);
2748     }
2749
2750   /* Replace the old text with the new in the cleanest possible way.  */
2751   replace_range (search_regs.start[sub], search_regs.end[sub],
2752                  newtext, 1, 0, 1);
2753   newpoint = search_regs.start[sub] + SCHARS (newtext);
2754
2755   if (case_action == all_caps)
2756     Fupcase_region (make_number (search_regs.start[sub]),
2757                     make_number (newpoint));
2758   else if (case_action == cap_initial)
2759     Fupcase_initials_region (make_number (search_regs.start[sub]),
2760                              make_number (newpoint));
2761
2762   /* Adjust search data for this change.  */
2763   {
2764     int oldend = search_regs.end[sub];
2765     int oldstart = search_regs.start[sub];
2766     int change = newpoint - search_regs.end[sub];
2767     int i;
2768
2769     for (i = 0; i < search_regs.num_regs; i++)
2770       {
2771         if (search_regs.start[i] >= oldend)
2772           search_regs.start[i] += change;
2773         else if (search_regs.start[i] > oldstart)
2774           search_regs.start[i] = oldstart;
2775         if (search_regs.end[i] >= oldend)
2776           search_regs.end[i] += change;
2777         else if (search_regs.end[i] > oldstart)
2778           search_regs.end[i] = oldstart;
2779       }
2780   }
2781
2782   /* Put point back where it was in the text.  */
2783   if (opoint <= 0)
2784     TEMP_SET_PT (opoint + ZV);
2785   else
2786     TEMP_SET_PT (opoint);
2787
2788   /* Now move point "officially" to the start of the inserted replacement.  */
2789   move_if_not_intangible (newpoint);
2790
2791   return Qnil;
2792 }
2793 \f
2794 static Lisp_Object
2795 match_limit (num, beginningp)
2796      Lisp_Object num;
2797      int beginningp;
2798 {
2799   register int n;
2800
2801   CHECK_NUMBER (num);
2802   n = XINT (num);
2803   if (n < 0)
2804     args_out_of_range (num, make_number (0));
2805   if (search_regs.num_regs <= 0)
2806     error ("No match data, because no search succeeded");
2807   if (n >= search_regs.num_regs
2808       || search_regs.start[n] < 0)
2809     return Qnil;
2810   return (make_number ((beginningp) ? search_regs.start[n]
2811                                     : search_regs.end[n]));
2812 }
2813
2814 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2815        doc: /* Return position of start of text matched by last search.
2816 SUBEXP, a number, specifies which parenthesized expression in the last
2817   regexp.
2818 Value is nil if SUBEXPth pair didn't match, or there were less than
2819   SUBEXP pairs.
2820 Zero means the entire text matched by the whole regexp or whole string.  */)
2821      (subexp)
2822      Lisp_Object subexp;
2823 {
2824   return match_limit (subexp, 1);
2825 }
2826
2827 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2828        doc: /* Return position of end of text matched by last search.
2829 SUBEXP, a number, specifies which parenthesized expression in the last
2830   regexp.
2831 Value is nil if SUBEXPth pair didn't match, or there were less than
2832   SUBEXP pairs.
2833 Zero means the entire text matched by the whole regexp or whole string.  */)
2834      (subexp)
2835      Lisp_Object subexp;
2836 {
2837   return match_limit (subexp, 0);
2838 }
2839
2840 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2841        doc: /* Return a list containing all info on what the last search matched.
2842 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2843 All the elements are markers or nil (nil if the Nth pair didn't match)
2844 if the last match was on a buffer; integers or nil if a string was matched.
2845 Use `store-match-data' to reinstate the data in this list.
2846
2847 If INTEGERS (the optional first argument) is non-nil, always use
2848 integers \(rather than markers) to represent buffer positions.  In
2849 this case, and if the last match was in a buffer, the buffer will get
2850 stored as one additional element at the end of the list.
2851
2852 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2853 enough to hold all the values, and if INTEGERS is non-nil, no consing
2854 is done.
2855
2856 If optional third arg RESEAT is non-nil, any previous markers on the
2857 REUSE list will be modified to point to nowhere.
2858
2859 Return value is undefined if the last search failed.  */)
2860   (integers, reuse, reseat)
2861      Lisp_Object integers, reuse, reseat;
2862 {
2863   Lisp_Object tail, prev;
2864   Lisp_Object *data;
2865   int i, len;
2866
2867   if (!NILP (reseat))
2868     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2869       if (MARKERP (XCAR (tail)))
2870         {
2871           unchain_marker (XMARKER (XCAR (tail)));
2872           XSETCAR (tail, Qnil);
2873         }
2874
2875   if (NILP (last_thing_searched))
2876     return Qnil;
2877
2878   prev = Qnil;
2879
2880   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2881                                  * sizeof (Lisp_Object));
2882
2883   len = 0;
2884   for (i = 0; i < search_regs.num_regs; i++)
2885     {
2886       int start = search_regs.start[i];
2887       if (start >= 0)
2888         {
2889           if (EQ (last_thing_searched, Qt)
2890               || ! NILP (integers))
2891             {
2892               XSETFASTINT (data[2 * i], start);
2893               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2894             }
2895           else if (BUFFERP (last_thing_searched))
2896             {
2897               data[2 * i] = Fmake_marker ();
2898               Fset_marker (data[2 * i],
2899                            make_number (start),
2900                            last_thing_searched);
2901               data[2 * i + 1] = Fmake_marker ();
2902               Fset_marker (data[2 * i + 1],
2903                            make_number (search_regs.end[i]),
2904                            last_thing_searched);
2905             }
2906           else
2907             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2908             abort ();
2909
2910           len = 2 * i + 2;
2911         }
2912       else
2913         data[2 * i] = data[2 * i + 1] = Qnil;
2914     }
2915
2916   if (BUFFERP (last_thing_searched) && !NILP (integers))
2917     {
2918       data[len] = last_thing_searched;
2919       len++;
2920     }
2921
2922   /* If REUSE is not usable, cons up the values and return them.  */
2923   if (! CONSP (reuse))
2924     return Flist (len, data);
2925
2926   /* If REUSE is a list, store as many value elements as will fit
2927      into the elements of REUSE.  */
2928   for (i = 0, tail = reuse; CONSP (tail);
2929        i++, tail = XCDR (tail))
2930     {
2931       if (i < len)
2932         XSETCAR (tail, data[i]);
2933       else
2934         XSETCAR (tail, Qnil);
2935       prev = tail;
2936     }
2937
2938   /* If we couldn't fit all value elements into REUSE,
2939      cons up the rest of them and add them to the end of REUSE.  */
2940   if (i < len)
2941     XSETCDR (prev, Flist (len - i, data + i));
2942
2943   return reuse;
2944 }
2945
2946 /* We used to have an internal use variant of `reseat' described as:
2947
2948       If RESEAT is `evaporate', put the markers back on the free list
2949       immediately.  No other references to the markers must exist in this
2950       case, so it is used only internally on the unwind stack and
2951       save-match-data from Lisp.
2952
2953    But it was ill-conceived: those supposedly-internal markers get exposed via
2954    the undo-list, so freeing them here is unsafe.  */
2955
2956 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2957        doc: /* Set internal data on last search match from elements of LIST.
2958 LIST should have been created by calling `match-data' previously.
2959
2960 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2961     (list, reseat)
2962      register Lisp_Object list, reseat;
2963 {
2964   register int i;
2965   register Lisp_Object marker;
2966
2967   if (running_asynch_code)
2968     save_search_regs ();
2969
2970   CHECK_LIST (list);
2971
2972   /* Unless we find a marker with a buffer or an explicit buffer
2973      in LIST, assume that this match data came from a string.  */
2974   last_thing_searched = Qt;
2975
2976   /* Allocate registers if they don't already exist.  */
2977   {
2978     int length = XFASTINT (Flength (list)) / 2;
2979
2980     if (length > search_regs.num_regs)
2981       {
2982         if (search_regs.num_regs == 0)
2983           {
2984             search_regs.start
2985               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2986             search_regs.end
2987               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2988           }
2989         else
2990           {
2991             search_regs.start
2992               = (regoff_t *) xrealloc (search_regs.start,
2993                                        length * sizeof (regoff_t));
2994             search_regs.end
2995               = (regoff_t *) xrealloc (search_regs.end,
2996                                        length * sizeof (regoff_t));
2997           }
2998
2999         for (i = search_regs.num_regs; i < length; i++)
3000           search_regs.start[i] = -1;
3001
3002         search_regs.num_regs = length;
3003       }
3004
3005     for (i = 0; CONSP (list); i++)
3006       {
3007         marker = XCAR (list);
3008         if (BUFFERP (marker))
3009           {
3010             last_thing_searched = marker;
3011             break;
3012           }
3013         if (i >= length)
3014           break;
3015         if (NILP (marker))
3016           {
3017             search_regs.start[i] = -1;
3018             list = XCDR (list);
3019           }
3020         else
3021           {
3022             int from;
3023             Lisp_Object m;
3024
3025             m = marker;
3026             if (MARKERP (marker))
3027               {
3028                 if (XMARKER (marker)->buffer == 0)
3029                   XSETFASTINT (marker, 0);
3030                 else
3031                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3032               }
3033
3034             CHECK_NUMBER_COERCE_MARKER (marker);
3035             from = XINT (marker);
3036
3037             if (!NILP (reseat) && MARKERP (m))
3038               {
3039                 unchain_marker (XMARKER (m));
3040                 XSETCAR (list, Qnil);
3041               }
3042
3043             if ((list = XCDR (list), !CONSP (list)))
3044               break;
3045
3046             m = marker = XCAR (list);
3047
3048             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3049               XSETFASTINT (marker, 0);
3050
3051             CHECK_NUMBER_COERCE_MARKER (marker);
3052             search_regs.start[i] = from;
3053             search_regs.end[i] = XINT (marker);
3054
3055             if (!NILP (reseat) && MARKERP (m))
3056               {
3057                 unchain_marker (XMARKER (m));
3058                 XSETCAR (list, Qnil);
3059               }
3060           }
3061         list = XCDR (list);
3062       }
3063
3064     for (; i < search_regs.num_regs; i++)
3065       search_regs.start[i] = -1;
3066   }
3067
3068   return Qnil;
3069 }
3070
3071 /* If non-zero the match data have been saved in saved_search_regs
3072    during the execution of a sentinel or filter. */
3073 static int search_regs_saved;
3074 static struct re_registers saved_search_regs;
3075 static Lisp_Object saved_last_thing_searched;
3076
3077 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3078    if asynchronous code (filter or sentinel) is running. */
3079 static void
3080 save_search_regs ()
3081 {
3082   if (!search_regs_saved)
3083     {
3084       saved_search_regs.num_regs = search_regs.num_regs;
3085       saved_search_regs.start = search_regs.start;
3086       saved_search_regs.end = search_regs.end;
3087       saved_last_thing_searched = last_thing_searched;
3088       last_thing_searched = Qnil;
3089       search_regs.num_regs = 0;
3090       search_regs.start = 0;
3091       search_regs.end = 0;
3092
3093       search_regs_saved = 1;
3094     }
3095 }
3096
3097 /* Called upon exit from filters and sentinels. */
3098 void
3099 restore_search_regs ()
3100 {
3101   if (search_regs_saved)
3102     {
3103       if (search_regs.num_regs > 0)
3104         {
3105           xfree (search_regs.start);
3106           xfree (search_regs.end);
3107         }
3108       search_regs.num_regs = saved_search_regs.num_regs;
3109       search_regs.start = saved_search_regs.start;
3110       search_regs.end = saved_search_regs.end;
3111       last_thing_searched = saved_last_thing_searched;
3112       saved_last_thing_searched = Qnil;
3113       search_regs_saved = 0;
3114     }
3115 }
3116
3117 static Lisp_Object
3118 unwind_set_match_data (list)
3119      Lisp_Object list;
3120 {
3121   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3122   return Fset_match_data (list, Qt);
3123 }
3124
3125 /* Called to unwind protect the match data.  */
3126 void
3127 record_unwind_save_match_data ()
3128 {
3129   record_unwind_protect (unwind_set_match_data,
3130                          Fmatch_data (Qnil, Qnil, Qnil));
3131 }
3132
3133 /* Quote a string to inactivate reg-expr chars */
3134
3135 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3136        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3137      (string)
3138      Lisp_Object string;
3139 {
3140   register unsigned char *in, *out, *end;
3141   register unsigned char *temp;
3142   int backslashes_added = 0;
3143
3144   CHECK_STRING (string);
3145
3146   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3147
3148   /* Now copy the data into the new string, inserting escapes. */
3149
3150   in = SDATA (string);
3151   end = in + SBYTES (string);
3152   out = temp;
3153
3154   for (; in != end; in++)
3155     {
3156       if (*in == '['
3157           || *in == '*' || *in == '.' || *in == '\\'
3158           || *in == '?' || *in == '+'
3159           || *in == '^' || *in == '$')
3160         *out++ = '\\', backslashes_added++;
3161       *out++ = *in;
3162     }
3163
3164   return make_specified_string (temp,
3165                                 SCHARS (string) + backslashes_added,
3166                                 out - temp,
3167                                 STRING_MULTIBYTE (string));
3168 }
3169 \f
3170 void
3171 syms_of_search ()
3172 {
3173   register int i;
3174
3175   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3176     {
3177       searchbufs[i].buf.allocated = 100;
3178       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3179       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3180       searchbufs[i].regexp = Qnil;
3181       searchbufs[i].whitespace_regexp = Qnil;
3182       searchbufs[i].syntax_table = Qnil;
3183       staticpro (&searchbufs[i].regexp);
3184       staticpro (&searchbufs[i].whitespace_regexp);
3185       staticpro (&searchbufs[i].syntax_table);
3186       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3187     }
3188   searchbuf_head = &searchbufs[0];
3189
3190   Qsearch_failed = intern ("search-failed");
3191   staticpro (&Qsearch_failed);
3192   Qinvalid_regexp = intern ("invalid-regexp");
3193   staticpro (&Qinvalid_regexp);
3194
3195   Fput (Qsearch_failed, Qerror_conditions,
3196         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3197   Fput (Qsearch_failed, Qerror_message,
3198         build_string ("Search failed"));
3199
3200   Fput (Qinvalid_regexp, Qerror_conditions,
3201         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3202   Fput (Qinvalid_regexp, Qerror_message,
3203         build_string ("Invalid regexp"));
3204
3205   last_thing_searched = Qnil;
3206   staticpro (&last_thing_searched);
3207
3208   saved_last_thing_searched = Qnil;
3209   staticpro (&saved_last_thing_searched);
3210
3211   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3212       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3213 Some commands use this for user-specified regexps.
3214 Spaces that occur inside character classes or repetition operators
3215 or other such regexp constructs are not replaced with this.
3216 A value of nil (which is the normal value) means treat spaces literally.  */);
3217   Vsearch_spaces_regexp = Qnil;
3218
3219   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3220       doc: /* Internal use only.
3221 If non-nil, the primitive searching and matching functions
3222 such as `looking-at', `string-match', `re-search-forward', etc.,
3223 do not set the match data.  The proper way to use this variable
3224 is to bind it with `let' around a small expression.  */);
3225   Vinhibit_changing_match_data = Qnil;
3226
3227   defsubr (&Slooking_at);
3228   defsubr (&Sposix_looking_at);
3229   defsubr (&Sstring_match);
3230   defsubr (&Sposix_string_match);
3231   defsubr (&Ssearch_forward);
3232   defsubr (&Ssearch_backward);
3233   defsubr (&Sword_search_forward);
3234   defsubr (&Sword_search_backward);
3235   defsubr (&Sre_search_forward);
3236   defsubr (&Sre_search_backward);
3237   defsubr (&Sposix_search_forward);
3238   defsubr (&Sposix_search_backward);
3239   defsubr (&Sreplace_match);
3240   defsubr (&Smatch_beginning);
3241   defsubr (&Smatch_end);
3242   defsubr (&Smatch_data);
3243   defsubr (&Sset_match_data);
3244   defsubr (&Sregexp_quote);
3245 }
3246
3247 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3248    (do not change this comment) */