src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs ();
 101 static void save_search_regs ();
 102 static int simple_search ();
 103 static int boyer_moore ();
 104 static int search_buffer ();
 105 static void matcher_overflow () NO_RETURN;
 106
 107 static void
 108 matcher_overflow ()
 109 {
 110   error ("Stack overflow in regexp matcher");
 111 }
 112
 113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 114    PATTERN is the pattern to compile.
 115    CP is the place to put the result.
 116    TRANSLATE is a translation table for ignoring case, or nil for none.
 117    REGP is the structure that says where to store the "register"
 118    values that will result from matching this pattern.
 119    If it is 0, we should compile the pattern not to record any
 120    subexpression bounds.
 121    POSIX is nonzero if we want full backtracking (POSIX style)
 122    for this pattern.  0 means backtrack only enough to get a valid match.
 123
 124    The behavior also depends on Vsearch_spaces_regexp.  */
 125
 126 static void
 127 compile_pattern_1 (cp, pattern, translate, regp, posix)
 128      struct regexp_cache *cp;
 129      Lisp_Object pattern;
 130      Lisp_Object translate;
 131      struct re_registers *regp;
 132      int posix;
 133 {
 134   char *val;
 135   reg_syntax_t old;
 136
 137   cp->regexp = Qnil;
 138   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 139   cp->posix = posix;
 140   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 141   cp->buf.charset_unibyte = charset_unibyte;
 142   if (STRINGP (Vsearch_spaces_regexp))
 143     cp->whitespace_regexp = Vsearch_spaces_regexp;
 144   else
 145     cp->whitespace_regexp = Qnil;
 146
 147   /* rms: I think BLOCK_INPUT is not needed here any more,
 148      because regex.c defines malloc to call xmalloc.
 149      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 150      So let's turn it off.  */
 151   /*  BLOCK_INPUT;  */
 152   old = re_set_syntax (RE_SYNTAX_EMACS
 153                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 154
 155   if (STRINGP (Vsearch_spaces_regexp))
 156     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 157   else
 158     re_set_whitespace_regexp (NULL);
 159
 160   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 161                                      SBYTES (pattern), &cp->buf);
 162
 163   /* If the compiled pattern hard codes some of the contents of the
 164      syntax-table, it can only be reused with *this* syntax table.  */
 165   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 166
 167   re_set_whitespace_regexp (NULL);
 168
 169   re_set_syntax (old);
 170   /* UNBLOCK_INPUT;  */
 171   if (val)
 172     xsignal1 (Qinvalid_regexp, build_string (val));
 173
 174   cp->regexp = Fcopy_sequence (pattern);
 175 }
 176
 177 /* Shrink each compiled regexp buffer in the cache
 178    to the size actually used right now.
 179    This is called from garbage collection.  */
 180
 181 void
 182 shrink_regexp_cache ()
 183 {
 184   struct regexp_cache *cp;
 185
 186   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 187     {
 188       cp->buf.allocated = cp->buf.used;
 189       cp->buf.buffer
 190         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 191     }
 192 }
 193
 194 /* Clear the regexp cache w.r.t. a particular syntax table,
 195    because it was changed.
 196    There is no danger of memory leak here because re_compile_pattern
 197    automagically manages the memory in each re_pattern_buffer struct,
 198    based on its `allocated' and `buffer' values.  */
 199 void
 200 clear_regexp_cache ()
 201 {
 202   int i;
 203
 204   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 205     /* It's tempting to compare with the syntax-table we've actually changd,
 206        but it's not sufficient because char-table inheritance mewans that
 207        modifying one syntax-table can change others at the same time.  */
 208     if (!EQ (searchbufs[i].syntax_table, Qt))
 209       searchbufs[i].regexp = Qnil;
 210 }
 211
 212 /* Compile a regexp if necessary, but first check to see if there's one in
 213    the cache.
 214    PATTERN is the pattern to compile.
 215    TRANSLATE is a translation table for ignoring case, or nil for none.
 216    REGP is the structure that says where to store the "register"
 217    values that will result from matching this pattern.
 218    If it is 0, we should compile the pattern not to record any
 219    subexpression bounds.
 220    POSIX is nonzero if we want full backtracking (POSIX style)
 221    for this pattern.  0 means backtrack only enough to get a valid match.  */
 222
 223 struct re_pattern_buffer *
 224 compile_pattern (pattern, regp, translate, posix, multibyte)
 225      Lisp_Object pattern;
 226      struct re_registers *regp;
 227      Lisp_Object translate;
 228      int posix, multibyte;
 229 {
 230   struct regexp_cache *cp, **cpp;
 231
 232   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 233     {
 234       cp = *cpp;
 235       /* Entries are initialized to nil, and may be set to nil by
 236          compile_pattern_1 if the pattern isn't valid.  Don't apply
 237          string accessors in those cases.  However, compile_pattern_1
 238          is only applied to the cache entry we pick here to reuse.  So
 239          nil should never appear before a non-nil entry.  */
 240       if (NILP (cp->regexp))
 241         goto compile_it;
 242       if (SCHARS (cp->regexp) == SCHARS (pattern)
 243           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 244           && !NILP (Fstring_equal (cp->regexp, pattern))
 245           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 246           && cp->posix == posix
 247           && (EQ (cp->syntax_table, Qt)
 248               || EQ (cp->syntax_table, current_buffer->syntax_table))
 249           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 250           && cp->buf.charset_unibyte == charset_unibyte)
 251         break;
 252
 253       /* If we're at the end of the cache, compile into the nil cell
 254          we found, or the last (least recently used) cell with a
 255          string value.  */
 256       if (cp->next == 0)
 257         {
 258         compile_it:
 259           compile_pattern_1 (cp, pattern, translate, regp, posix);
 260           break;
 261         }
 262     }
 263
 264   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 265      either because we found it in the cache or because we just compiled it.
 266      Move it to the front of the queue to mark it as most recently used.  */
 267   *cpp = cp->next;
 268   cp->next = searchbuf_head;
 269   searchbuf_head = cp;
 270
 271   /* Advise the searching functions about the space we have allocated
 272      for register data.  */
 273   if (regp)
 274     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 275
 276   /* The compiled pattern can be used both for mulitbyte and unibyte
 277      target.  But, we have to tell which the pattern is used for. */
 278   cp->buf.target_multibyte = multibyte;
 279
 280   return &cp->buf;
 281 }
 282
 283 \f
 284 static Lisp_Object
 285 looking_at_1 (string, posix)
 286      Lisp_Object string;
 287      int posix;
 288 {
 289   Lisp_Object val;
 290   unsigned char *p1, *p2;
 291   int s1, s2;
 292   register int i;
 293   struct re_pattern_buffer *bufp;
 294
 295   if (running_asynch_code)
 296     save_search_regs ();
 297
 298   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 299   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 300     = current_buffer->case_eqv_table;
 301
 302   CHECK_STRING (string);
 303   bufp = compile_pattern (string,
 304                           (NILP (Vinhibit_changing_match_data)
 305                            ? &search_regs : NULL),
 306                           (!NILP (current_buffer->case_fold_search)
 307                            ? current_buffer->case_canon_table : Qnil),
 308                           posix,
 309                           !NILP (current_buffer->enable_multibyte_characters));
 310
 311   immediate_quit = 1;
 312   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 313
 314   /* Get pointers and sizes of the two strings
 315      that make up the visible portion of the buffer. */
 316
 317   p1 = BEGV_ADDR;
 318   s1 = GPT_BYTE - BEGV_BYTE;
 319   p2 = GAP_END_ADDR;
 320   s2 = ZV_BYTE - GPT_BYTE;
 321   if (s1 < 0)
 322     {
 323       p2 = p1;
 324       s2 = ZV_BYTE - BEGV_BYTE;
 325       s1 = 0;
 326     }
 327   if (s2 < 0)
 328     {
 329       s1 = ZV_BYTE - BEGV_BYTE;
 330       s2 = 0;
 331     }
 332
 333   re_match_object = Qnil;
 334
 335   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 336                   PT_BYTE - BEGV_BYTE,
 337                   (NILP (Vinhibit_changing_match_data)
 338                    ? &search_regs : NULL),
 339                   ZV_BYTE - BEGV_BYTE);
 340   immediate_quit = 0;
 341
 342   if (i == -2)
 343     matcher_overflow ();
 344
 345   val = (0 <= i ? Qt : Qnil);
 346   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 347     for (i = 0; i < search_regs.num_regs; i++)
 348       if (search_regs.start[i] >= 0)
 349         {
 350           search_regs.start[i]
 351             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 352           search_regs.end[i]
 353             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 354         }
 355
 356   /* Set last_thing_searched only when match data is changed.  */
 357   if (NILP (Vinhibit_changing_match_data))
 358     XSETBUFFER (last_thing_searched, current_buffer);
 359
 360   return val;
 361 }
 362
 363 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 364        doc: /* Return t if text after point matches regular expression REGEXP.
 365 This function modifies the match data that `match-beginning',
 366 `match-end' and `match-data' access; save and restore the match
 367 data if you want to preserve them.  */)
 368      (regexp)
 369      Lisp_Object regexp;
 370 {
 371   return looking_at_1 (regexp, 0);
 372 }
 373
 374 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 375        doc: /* Return t if text after point matches regular expression REGEXP.
 376 Find the longest match, in accord with Posix regular expression rules.
 377 This function modifies the match data that `match-beginning',
 378 `match-end' and `match-data' access; save and restore the match
 379 data if you want to preserve them.  */)
 380      (regexp)
 381      Lisp_Object regexp;
 382 {
 383   return looking_at_1 (regexp, 1);
 384 }
 385 \f
 386 static Lisp_Object
 387 string_match_1 (regexp, string, start, posix)
 388      Lisp_Object regexp, string, start;
 389      int posix;
 390 {
 391   int val;
 392   struct re_pattern_buffer *bufp;
 393   int pos, pos_byte;
 394   int i;
 395
 396   if (running_asynch_code)
 397     save_search_regs ();
 398
 399   CHECK_STRING (regexp);
 400   CHECK_STRING (string);
 401
 402   if (NILP (start))
 403     pos = 0, pos_byte = 0;
 404   else
 405     {
 406       int len = SCHARS (string);
 407
 408       CHECK_NUMBER (start);
 409       pos = XINT (start);
 410       if (pos < 0 && -pos <= len)
 411         pos = len + pos;
 412       else if (0 > pos || pos > len)
 413         args_out_of_range (string, start);
 414       pos_byte = string_char_to_byte (string, pos);
 415     }
 416
 417   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 418   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 419     = current_buffer->case_eqv_table;
 420
 421   bufp = compile_pattern (regexp,
 422                           (NILP (Vinhibit_changing_match_data)
 423                            ? &search_regs : NULL),
 424                           (!NILP (current_buffer->case_fold_search)
 425                            ? current_buffer->case_canon_table : Qnil),
 426                           posix,
 427                           STRING_MULTIBYTE (string));
 428   immediate_quit = 1;
 429   re_match_object = string;
 430
 431   val = re_search (bufp, (char *) SDATA (string),
 432                    SBYTES (string), pos_byte,
 433                    SBYTES (string) - pos_byte,
 434                    (NILP (Vinhibit_changing_match_data)
 435                     ? &search_regs : NULL));
 436   immediate_quit = 0;
 437
 438   /* Set last_thing_searched only when match data is changed.  */
 439   if (NILP (Vinhibit_changing_match_data))
 440     last_thing_searched = Qt;
 441
 442   if (val == -2)
 443     matcher_overflow ();
 444   if (val < 0) return Qnil;
 445
 446   if (NILP (Vinhibit_changing_match_data))
 447     for (i = 0; i < search_regs.num_regs; i++)
 448       if (search_regs.start[i] >= 0)
 449         {
 450           search_regs.start[i]
 451             = string_byte_to_char (string, search_regs.start[i]);
 452           search_regs.end[i]
 453             = string_byte_to_char (string, search_regs.end[i]);
 454         }
 455
 456   return make_number (string_byte_to_char (string, val));
 457 }
 458
 459 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 460        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 461 Matching ignores case if `case-fold-search' is non-nil.
 462 If third arg START is non-nil, start search at that index in STRING.
 463 For index of first char beyond the match, do (match-end 0).
 464 `match-end' and `match-beginning' also give indices of substrings
 465 matched by parenthesis constructs in the pattern.
 466
 467 You can use the function `match-string' to extract the substrings
 468 matched by the parenthesis constructions in REGEXP. */)
 469      (regexp, string, start)
 470      Lisp_Object regexp, string, start;
 471 {
 472   return string_match_1 (regexp, string, start, 0);
 473 }
 474
 475 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 476        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 477 Find the longest match, in accord with Posix regular expression rules.
 478 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 479 If third arg START is non-nil, start search at that index in STRING.
 480 For index of first char beyond the match, do (match-end 0).
 481 `match-end' and `match-beginning' also give indices of substrings
 482 matched by parenthesis constructs in the pattern.  */)
 483      (regexp, string, start)
 484      Lisp_Object regexp, string, start;
 485 {
 486   return string_match_1 (regexp, string, start, 1);
 487 }
 488
 489 /* Match REGEXP against STRING, searching all of STRING,
 490    and return the index of the match, or negative on failure.
 491    This does not clobber the match data.  */
 492
 493 int
 494 fast_string_match (regexp, string)
 495      Lisp_Object regexp, string;
 496 {
 497   int val;
 498   struct re_pattern_buffer *bufp;
 499
 500   bufp = compile_pattern (regexp, 0, Qnil,
 501                           0, STRING_MULTIBYTE (string));
 502   immediate_quit = 1;
 503   re_match_object = string;
 504
 505   val = re_search (bufp, (char *) SDATA (string),
 506                    SBYTES (string), 0,
 507                    SBYTES (string), 0);
 508   immediate_quit = 0;
 509   return val;
 510 }
 511
 512 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 513    and return the index of the match, or negative on failure.
 514    This does not clobber the match data.
 515    We assume that STRING contains single-byte characters.  */
 516
 517 extern Lisp_Object Vascii_downcase_table;
 518
 519 int
 520 fast_c_string_match_ignore_case (regexp, string)
 521      Lisp_Object regexp;
 522      const char *string;
 523 {
 524   int val;
 525   struct re_pattern_buffer *bufp;
 526   int len = strlen (string);
 527
 528   regexp = string_make_unibyte (regexp);
 529   re_match_object = Qt;
 530   bufp = compile_pattern (regexp, 0,
 531                           Vascii_canon_table, 0,
 532                           0);
 533   immediate_quit = 1;
 534   val = re_search (bufp, string, len, 0, len, 0);
 535   immediate_quit = 0;
 536   return val;
 537 }
 538
 539 /* Like fast_string_match but ignore case.  */
 540
 541 int
 542 fast_string_match_ignore_case (regexp, string)
 543      Lisp_Object regexp, string;
 544 {
 545   int val;
 546   struct re_pattern_buffer *bufp;
 547
 548   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 549                           0, STRING_MULTIBYTE (string));
 550   immediate_quit = 1;
 551   re_match_object = string;
 552
 553   val = re_search (bufp, (char *) SDATA (string),
 554                    SBYTES (string), 0,
 555                    SBYTES (string), 0);
 556   immediate_quit = 0;
 557   return val;
 558 }
 559 \f
 560 /* The newline cache: remembering which sections of text have no newlines.  */
 561
 562 /* If the user has requested newline caching, make sure it's on.
 563    Otherwise, make sure it's off.
 564    This is our cheezy way of associating an action with the change of
 565    state of a buffer-local variable.  */
 566 static void
 567 newline_cache_on_off (buf)
 568      struct buffer *buf;
 569 {
 570   if (NILP (buf->cache_long_line_scans))
 571     {
 572       /* It should be off.  */
 573       if (buf->newline_cache)
 574         {
 575           free_region_cache (buf->newline_cache);
 576           buf->newline_cache = 0;
 577         }
 578     }
 579   else
 580     {
 581       /* It should be on.  */
 582       if (buf->newline_cache == 0)
 583         buf->newline_cache = new_region_cache ();
 584     }
 585 }
 586
 587 \f
 588 /* Search for COUNT instances of the character TARGET between START and END.
 589
 590    If COUNT is positive, search forwards; END must be >= START.
 591    If COUNT is negative, search backwards for the -COUNTth instance;
 592       END must be <= START.
 593    If COUNT is zero, do anything you please; run rogue, for all I care.
 594
 595    If END is zero, use BEGV or ZV instead, as appropriate for the
 596    direction indicated by COUNT.
 597
 598    If we find COUNT instances, set *SHORTAGE to zero, and return the
 599    position past the COUNTth match.  Note that for reverse motion
 600    this is not the same as the usual convention for Emacs motion commands.
 601
 602    If we don't find COUNT instances before reaching END, set *SHORTAGE
 603    to the number of TARGETs left unfound, and return END.
 604
 605    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 606    except when inside redisplay.  */
 607
 608 int
 609 scan_buffer (target, start, end, count, shortage, allow_quit)
 610      register int target;
 611      int start, end;
 612      int count;
 613      int *shortage;
 614      int allow_quit;
 615 {
 616   struct region_cache *newline_cache;
 617   int direction;
 618
 619   if (count > 0)
 620     {
 621       direction = 1;
 622       if (! end) end = ZV;
 623     }
 624   else
 625     {
 626       direction = -1;
 627       if (! end) end = BEGV;
 628     }
 629
 630   newline_cache_on_off (current_buffer);
 631   newline_cache = current_buffer->newline_cache;
 632
 633   if (shortage != 0)
 634     *shortage = 0;
 635
 636   immediate_quit = allow_quit;
 637
 638   if (count > 0)
 639     while (start != end)
 640       {
 641         /* Our innermost scanning loop is very simple; it doesn't know
 642            about gaps, buffer ends, or the newline cache.  ceiling is
 643            the position of the last character before the next such
 644            obstacle --- the last character the dumb search loop should
 645            examine.  */
 646         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 647         int start_byte = CHAR_TO_BYTE (start);
 648         int tem;
 649
 650         /* If we're looking for a newline, consult the newline cache
 651            to see where we can avoid some scanning.  */
 652         if (target == '\n' && newline_cache)
 653           {
 654             int next_change;
 655             immediate_quit = 0;
 656             while (region_cache_forward
 657                    (current_buffer, newline_cache, start_byte, &next_change))
 658               start_byte = next_change;
 659             immediate_quit = allow_quit;
 660
 661             /* START should never be after END.  */
 662             if (start_byte > ceiling_byte)
 663               start_byte = ceiling_byte;
 664
 665             /* Now the text after start is an unknown region, and
 666                next_change is the position of the next known region. */
 667             ceiling_byte = min (next_change - 1, ceiling_byte);
 668           }
 669
 670         /* The dumb loop can only scan text stored in contiguous
 671            bytes. BUFFER_CEILING_OF returns the last character
 672            position that is contiguous, so the ceiling is the
 673            position after that.  */
 674         tem = BUFFER_CEILING_OF (start_byte);
 675         ceiling_byte = min (tem, ceiling_byte);
 676
 677         {
 678           /* The termination address of the dumb loop.  */
 679           register unsigned char *ceiling_addr
 680             = BYTE_POS_ADDR (ceiling_byte) + 1;
 681           register unsigned char *cursor
 682             = BYTE_POS_ADDR (start_byte);
 683           unsigned char *base = cursor;
 684
 685           while (cursor < ceiling_addr)
 686             {
 687               unsigned char *scan_start = cursor;
 688
 689               /* The dumb loop.  */
 690               while (*cursor != target && ++cursor < ceiling_addr)
 691                 ;
 692
 693               /* If we're looking for newlines, cache the fact that
 694                  the region from start to cursor is free of them. */
 695               if (target == '\n' && newline_cache)
 696                 know_region_cache (current_buffer, newline_cache,
 697                                    start_byte + scan_start - base,
 698                                    start_byte + cursor - base);
 699
 700               /* Did we find the target character?  */
 701               if (cursor < ceiling_addr)
 702                 {
 703                   if (--count == 0)
 704                     {
 705                       immediate_quit = 0;
 706                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 707                     }
 708                   cursor++;
 709                 }
 710             }
 711
 712           start = BYTE_TO_CHAR (start_byte + cursor - base);
 713         }
 714       }
 715   else
 716     while (start > end)
 717       {
 718         /* The last character to check before the next obstacle.  */
 719         int ceiling_byte = CHAR_TO_BYTE (end);
 720         int start_byte = CHAR_TO_BYTE (start);
 721         int tem;
 722
 723         /* Consult the newline cache, if appropriate.  */
 724         if (target == '\n' && newline_cache)
 725           {
 726             int next_change;
 727             immediate_quit = 0;
 728             while (region_cache_backward
 729                    (current_buffer, newline_cache, start_byte, &next_change))
 730               start_byte = next_change;
 731             immediate_quit = allow_quit;
 732
 733             /* Start should never be at or before end.  */
 734             if (start_byte <= ceiling_byte)
 735               start_byte = ceiling_byte + 1;
 736
 737             /* Now the text before start is an unknown region, and
 738                next_change is the position of the next known region. */
 739             ceiling_byte = max (next_change, ceiling_byte);
 740           }
 741
 742         /* Stop scanning before the gap.  */
 743         tem = BUFFER_FLOOR_OF (start_byte - 1);
 744         ceiling_byte = max (tem, ceiling_byte);
 745
 746         {
 747           /* The termination address of the dumb loop.  */
 748           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 749           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 750           unsigned char *base = cursor;
 751
 752           while (cursor >= ceiling_addr)
 753             {
 754               unsigned char *scan_start = cursor;
 755
 756               while (*cursor != target && --cursor >= ceiling_addr)
 757                 ;
 758
 759               /* If we're looking for newlines, cache the fact that
 760                  the region from after the cursor to start is free of them.  */
 761               if (target == '\n' && newline_cache)
 762                 know_region_cache (current_buffer, newline_cache,
 763                                    start_byte + cursor - base,
 764                                    start_byte + scan_start - base);
 765
 766               /* Did we find the target character?  */
 767               if (cursor >= ceiling_addr)
 768                 {
 769                   if (++count >= 0)
 770                     {
 771                       immediate_quit = 0;
 772                       return BYTE_TO_CHAR (start_byte + cursor - base);
 773                     }
 774                   cursor--;
 775                 }
 776             }
 777
 778           start = BYTE_TO_CHAR (start_byte + cursor - base);
 779         }
 780       }
 781
 782   immediate_quit = 0;
 783   if (shortage != 0)
 784     *shortage = count * direction;
 785   return start;
 786 }
 787 \f
 788 /* Search for COUNT instances of a line boundary, which means either a
 789    newline or (if selective display enabled) a carriage return.
 790    Start at START.  If COUNT is negative, search backwards.
 791
 792    We report the resulting position by calling TEMP_SET_PT_BOTH.
 793
 794    If we find COUNT instances. we position after (always after,
 795    even if scanning backwards) the COUNTth match, and return 0.
 796
 797    If we don't find COUNT instances before reaching the end of the
 798    buffer (or the beginning, if scanning backwards), we return
 799    the number of line boundaries left unfound, and position at
 800    the limit we bumped up against.
 801
 802    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 803    except in special cases.  */
 804
 805 int
 806 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 807      int start, start_byte;
 808      int limit, limit_byte;
 809      register int count;
 810      int allow_quit;
 811 {
 812   int direction = ((count > 0) ? 1 : -1);
 813
 814   register unsigned char *cursor;
 815   unsigned char *base;
 816
 817   register int ceiling;
 818   register unsigned char *ceiling_addr;
 819
 820   int old_immediate_quit = immediate_quit;
 821
 822   /* The code that follows is like scan_buffer
 823      but checks for either newline or carriage return.  */
 824
 825   if (allow_quit)
 826     immediate_quit++;
 827
 828   start_byte = CHAR_TO_BYTE (start);
 829
 830   if (count > 0)
 831     {
 832       while (start_byte < limit_byte)
 833         {
 834           ceiling =  BUFFER_CEILING_OF (start_byte);
 835           ceiling = min (limit_byte - 1, ceiling);
 836           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 837           base = (cursor = BYTE_POS_ADDR (start_byte));
 838           while (1)
 839             {
 840               while (*cursor != '\n' && ++cursor != ceiling_addr)
 841                 ;
 842
 843               if (cursor != ceiling_addr)
 844                 {
 845                   if (--count == 0)
 846                     {
 847                       immediate_quit = old_immediate_quit;
 848                       start_byte = start_byte + cursor - base + 1;
 849                       start = BYTE_TO_CHAR (start_byte);
 850                       TEMP_SET_PT_BOTH (start, start_byte);
 851                       return 0;
 852                     }
 853                   else
 854                     if (++cursor == ceiling_addr)
 855                       break;
 856                 }
 857               else
 858                 break;
 859             }
 860           start_byte += cursor - base;
 861         }
 862     }
 863   else
 864     {
 865       while (start_byte > limit_byte)
 866         {
 867           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 868           ceiling = max (limit_byte, ceiling);
 869           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 870           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 871           while (1)
 872             {
 873               while (--cursor != ceiling_addr && *cursor != '\n')
 874                 ;
 875
 876               if (cursor != ceiling_addr)
 877                 {
 878                   if (++count == 0)
 879                     {
 880                       immediate_quit = old_immediate_quit;
 881                       /* Return the position AFTER the match we found.  */
 882                       start_byte = start_byte + cursor - base + 1;
 883                       start = BYTE_TO_CHAR (start_byte);
 884                       TEMP_SET_PT_BOTH (start, start_byte);
 885                       return 0;
 886                     }
 887                 }
 888               else
 889                 break;
 890             }
 891           /* Here we add 1 to compensate for the last decrement
 892              of CURSOR, which took it past the valid range.  */
 893           start_byte += cursor - base + 1;
 894         }
 895     }
 896
 897   TEMP_SET_PT_BOTH (limit, limit_byte);
 898   immediate_quit = old_immediate_quit;
 899
 900   return count * direction;
 901 }
 902
 903 int
 904 find_next_newline_no_quit (from, cnt)
 905      register int from, cnt;
 906 {
 907   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 908 }
 909
 910 /* Like find_next_newline, but returns position before the newline,
 911    not after, and only search up to TO.  This isn't just
 912    find_next_newline (...)-1, because you might hit TO.  */
 913
 914 int
 915 find_before_next_newline (from, to, cnt)
 916      int from, to, cnt;
 917 {
 918   int shortage;
 919   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 920
 921   if (shortage == 0)
 922     pos--;
 923
 924   return pos;
 925 }
 926 \f
 927 /* Subroutines of Lisp buffer search functions. */
 928
 929 static Lisp_Object
 930 search_command (string, bound, noerror, count, direction, RE, posix)
 931      Lisp_Object string, bound, noerror, count;
 932      int direction;
 933      int RE;
 934      int posix;
 935 {
 936   register int np;
 937   int lim, lim_byte;
 938   int n = direction;
 939
 940   if (!NILP (count))
 941     {
 942       CHECK_NUMBER (count);
 943       n *= XINT (count);
 944     }
 945
 946   CHECK_STRING (string);
 947   if (NILP (bound))
 948     {
 949       if (n > 0)
 950         lim = ZV, lim_byte = ZV_BYTE;
 951       else
 952         lim = BEGV, lim_byte = BEGV_BYTE;
 953     }
 954   else
 955     {
 956       CHECK_NUMBER_COERCE_MARKER (bound);
 957       lim = XINT (bound);
 958       if (n > 0 ? lim < PT : lim > PT)
 959         error ("Invalid search bound (wrong side of point)");
 960       if (lim > ZV)
 961         lim = ZV, lim_byte = ZV_BYTE;
 962       else if (lim < BEGV)
 963         lim = BEGV, lim_byte = BEGV_BYTE;
 964       else
 965         lim_byte = CHAR_TO_BYTE (lim);
 966     }
 967
 968   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 969   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 970     = current_buffer->case_eqv_table;
 971
 972   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 973                       (!NILP (current_buffer->case_fold_search)
 974                        ? current_buffer->case_canon_table
 975                        : Qnil),
 976                       (!NILP (current_buffer->case_fold_search)
 977                        ? current_buffer->case_eqv_table
 978                        : Qnil),
 979                       posix);
 980   if (np <= 0)
 981     {
 982       if (NILP (noerror))
 983         xsignal1 (Qsearch_failed, string);
 984
 985       if (!EQ (noerror, Qt))
 986         {
 987           if (lim < BEGV || lim > ZV)
 988             abort ();
 989           SET_PT_BOTH (lim, lim_byte);
 990           return Qnil;
 991 #if 0 /* This would be clean, but maybe programs depend on
 992          a value of nil here.  */
 993           np = lim;
 994 #endif
 995         }
 996       else
 997         return Qnil;
 998     }
 999
1000   if (np < BEGV || np > ZV)
1001     abort ();
1002
1003   SET_PT (np);
1004
1005   return make_number (np);
1006 }
1007 \f
1008 /* Return 1 if REGEXP it matches just one constant string.  */
1009
1010 static int
1011 trivial_regexp_p (regexp)
1012      Lisp_Object regexp;
1013 {
1014   int len = SBYTES (regexp);
1015   unsigned char *s = SDATA (regexp);
1016   while (--len >= 0)
1017     {
1018       switch (*s++)
1019         {
1020         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1021           return 0;
1022         case '\\':
1023           if (--len < 0)
1024             return 0;
1025           switch (*s++)
1026             {
1027             case '|': case '(': case ')': case '`': case '\'': case 'b':
1028             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1029             case 'S': case '=': case '{': case '}': case '_':
1030             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1031             case '1': case '2': case '3': case '4': case '5':
1032             case '6': case '7': case '8': case '9':
1033               return 0;
1034             }
1035         }
1036     }
1037   return 1;
1038 }
1039
1040 /* Search for the n'th occurrence of STRING in the current buffer,
1041    starting at position POS and stopping at position LIM,
1042    treating STRING as a literal string if RE is false or as
1043    a regular expression if RE is true.
1044
1045    If N is positive, searching is forward and LIM must be greater than POS.
1046    If N is negative, searching is backward and LIM must be less than POS.
1047
1048    Returns -x if x occurrences remain to be found (x > 0),
1049    or else the position at the beginning of the Nth occurrence
1050    (if searching backward) or the end (if searching forward).
1051
1052    POSIX is nonzero if we want full backtracking (POSIX style)
1053    for this pattern.  0 means backtrack only enough to get a valid match.  */
1054
1055 #define TRANSLATE(out, trt, d)                  \
1056 do                                              \
1057   {                                             \
1058     if (! NILP (trt))                           \
1059       {                                         \
1060         Lisp_Object temp;                       \
1061         temp = Faref (trt, make_number (d));    \
1062         if (INTEGERP (temp))                    \
1063           out = XINT (temp);                    \
1064         else                                    \
1065           out = d;                              \
1066       }                                         \
1067     else                                        \
1068       out = d;                                  \
1069   }                                             \
1070 while (0)
1071
1072 /* Only used in search_buffer, to record the end position of the match
1073    when searching regexps and SEARCH_REGS should not be changed
1074    (i.e. Vinhibit_changing_match_data is non-nil).  */
1075 static struct re_registers search_regs_1;
1076
1077 static int
1078 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1079                RE, trt, inverse_trt, posix)
1080      Lisp_Object string;
1081      int pos;
1082      int pos_byte;
1083      int lim;
1084      int lim_byte;
1085      int n;
1086      int RE;
1087      Lisp_Object trt;
1088      Lisp_Object inverse_trt;
1089      int posix;
1090 {
1091   int len = SCHARS (string);
1092   int len_byte = SBYTES (string);
1093   register int i;
1094
1095   if (running_asynch_code)
1096     save_search_regs ();
1097
1098   /* Searching 0 times means don't move.  */
1099   /* Null string is found at starting position.  */
1100   if (len == 0 || n == 0)
1101     {
1102       set_search_regs (pos_byte, 0);
1103       return pos;
1104     }
1105
1106   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1107     {
1108       unsigned char *p1, *p2;
1109       int s1, s2;
1110       struct re_pattern_buffer *bufp;
1111
1112       bufp = compile_pattern (string,
1113                               (NILP (Vinhibit_changing_match_data)
1114                                ? &search_regs : &search_regs_1),
1115                               trt, posix,
1116                               !NILP (current_buffer->enable_multibyte_characters));
1117
1118       immediate_quit = 1;       /* Quit immediately if user types ^G,
1119                                    because letting this function finish
1120                                    can take too long. */
1121       QUIT;                     /* Do a pending quit right away,
1122                                    to avoid paradoxical behavior */
1123       /* Get pointers and sizes of the two strings
1124          that make up the visible portion of the buffer. */
1125
1126       p1 = BEGV_ADDR;
1127       s1 = GPT_BYTE - BEGV_BYTE;
1128       p2 = GAP_END_ADDR;
1129       s2 = ZV_BYTE - GPT_BYTE;
1130       if (s1 < 0)
1131         {
1132           p2 = p1;
1133           s2 = ZV_BYTE - BEGV_BYTE;
1134           s1 = 0;
1135         }
1136       if (s2 < 0)
1137         {
1138           s1 = ZV_BYTE - BEGV_BYTE;
1139           s2 = 0;
1140         }
1141       re_match_object = Qnil;
1142
1143       while (n < 0)
1144         {
1145           int val;
1146           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1147                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1148                              (NILP (Vinhibit_changing_match_data)
1149                               ? &search_regs : &search_regs_1),
1150                              /* Don't allow match past current point */
1151                              pos_byte - BEGV_BYTE);
1152           if (val == -2)
1153             {
1154               matcher_overflow ();
1155             }
1156           if (val >= 0)
1157             {
1158               if (NILP (Vinhibit_changing_match_data))
1159                 {
1160                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1161                   for (i = 0; i < search_regs.num_regs; i++)
1162                     if (search_regs.start[i] >= 0)
1163                       {
1164                         search_regs.start[i]
1165                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1166                         search_regs.end[i]
1167                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1168                       }
1169                   XSETBUFFER (last_thing_searched, current_buffer);
1170                   /* Set pos to the new position. */
1171                   pos = search_regs.start[0];
1172                 }
1173               else
1174                 {
1175                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1176                   /* Set pos to the new position.  */
1177                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1178                 }
1179             }
1180           else
1181             {
1182               immediate_quit = 0;
1183               return (n);
1184             }
1185           n++;
1186         }
1187       while (n > 0)
1188         {
1189           int val;
1190           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1191                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1192                              (NILP (Vinhibit_changing_match_data)
1193                               ? &search_regs : &search_regs_1),
1194                              lim_byte - BEGV_BYTE);
1195           if (val == -2)
1196             {
1197               matcher_overflow ();
1198             }
1199           if (val >= 0)
1200             {
1201               if (NILP (Vinhibit_changing_match_data))
1202                 {
1203                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1204                   for (i = 0; i < search_regs.num_regs; i++)
1205                     if (search_regs.start[i] >= 0)
1206                       {
1207                         search_regs.start[i]
1208                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1209                         search_regs.end[i]
1210                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1211                       }
1212                   XSETBUFFER (last_thing_searched, current_buffer);
1213                   pos = search_regs.end[0];
1214                 }
1215               else
1216                 {
1217                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1218                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1219                 }
1220             }
1221           else
1222             {
1223               immediate_quit = 0;
1224               return (0 - n);
1225             }
1226           n--;
1227         }
1228       immediate_quit = 0;
1229       return (pos);
1230     }
1231   else                          /* non-RE case */
1232     {
1233       unsigned char *raw_pattern, *pat;
1234       int raw_pattern_size;
1235       int raw_pattern_size_byte;
1236       unsigned char *patbuf;
1237       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1238       unsigned char *base_pat;
1239       /* Set to positive if we find a non-ASCII char that need
1240          translation.  Otherwise set to zero later.  */
1241       int char_base = -1;
1242       int boyer_moore_ok = 1;
1243
1244       /* MULTIBYTE says whether the text to be searched is multibyte.
1245          We must convert PATTERN to match that, or we will not really
1246          find things right.  */
1247
1248       if (multibyte == STRING_MULTIBYTE (string))
1249         {
1250           raw_pattern = (unsigned char *) SDATA (string);
1251           raw_pattern_size = SCHARS (string);
1252           raw_pattern_size_byte = SBYTES (string);
1253         }
1254       else if (multibyte)
1255         {
1256           raw_pattern_size = SCHARS (string);
1257           raw_pattern_size_byte
1258             = count_size_as_multibyte (SDATA (string),
1259                                        raw_pattern_size);
1260           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1261           copy_text (SDATA (string), raw_pattern,
1262                      SCHARS (string), 0, 1);
1263         }
1264       else
1265         {
1266           /* Converting multibyte to single-byte.
1267
1268              ??? Perhaps this conversion should be done in a special way
1269              by subtracting nonascii-insert-offset from each non-ASCII char,
1270              so that only the multibyte chars which really correspond to
1271              the chosen single-byte character set can possibly match.  */
1272           raw_pattern_size = SCHARS (string);
1273           raw_pattern_size_byte = SCHARS (string);
1274           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1275           copy_text (SDATA (string), raw_pattern,
1276                      SBYTES (string), 1, 0);
1277         }
1278
1279       /* Copy and optionally translate the pattern.  */
1280       len = raw_pattern_size;
1281       len_byte = raw_pattern_size_byte;
1282       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1283       pat = patbuf;
1284       base_pat = raw_pattern;
1285       if (multibyte)
1286         {
1287           /* Fill patbuf by translated characters in STRING while
1288              checking if we can use boyer-moore search.  If TRT is
1289              non-nil, we can use boyer-moore search only if TRT can be
1290              represented by the byte array of 256 elements.  For that,
1291              all non-ASCII case-equivalents of all case-senstive
1292              characters in STRING must belong to the same charset and
1293              row.  */
1294
1295           while (--len >= 0)
1296             {
1297               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1298               int c, translated, inverse;
1299               int in_charlen, charlen;
1300
1301               /* If we got here and the RE flag is set, it's because we're
1302                  dealing with a regexp known to be trivial, so the backslash
1303                  just quotes the next character.  */
1304               if (RE && *base_pat == '\\')
1305                 {
1306                   len--;
1307                   raw_pattern_size--;
1308                   len_byte--;
1309                   base_pat++;
1310                 }
1311
1312               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1313
1314               if (NILP (trt))
1315                 {
1316                   str = base_pat;
1317                   charlen = in_charlen;
1318                 }
1319               else
1320                 {
1321                   /* Translate the character.  */
1322                   TRANSLATE (translated, trt, c);
1323                   charlen = CHAR_STRING (translated, str_base);
1324                   str = str_base;
1325
1326                   /* Check if C has any other case-equivalents.  */
1327                   TRANSLATE (inverse, inverse_trt, c);
1328                   /* If so, check if we can use boyer-moore.  */
1329                   if (c != inverse && boyer_moore_ok)
1330                     {
1331                       /* Check if all equivalents belong to the same
1332                          group of characters.  Note that the check of C
1333                          itself is done by the last iteration.  */
1334                       int this_char_base = -1;
1335
1336                       while (boyer_moore_ok)
1337                         {
1338                           if (ASCII_BYTE_P (inverse))
1339                             {
1340                               if (this_char_base > 0)
1341                                 boyer_moore_ok = 0;
1342                               else
1343                                 this_char_base = 0;
1344                             }
1345                           else if (CHAR_BYTE8_P (inverse))
1346                             /* Boyer-moore search can't handle a
1347                                translation of an eight-bit
1348                                character.  */
1349                             boyer_moore_ok = 0;
1350                           else if (this_char_base < 0)
1351                             {
1352                               this_char_base = inverse & ~0x3F;
1353                               if (char_base < 0)
1354                                 char_base = this_char_base;
1355                               else if (this_char_base != char_base)
1356                                 boyer_moore_ok = 0;
1357                             }
1358                           else if ((inverse & ~0x3F) != this_char_base)
1359                             boyer_moore_ok = 0;
1360                           if (c == inverse)
1361                             break;
1362                           TRANSLATE (inverse, inverse_trt, inverse);
1363                         }
1364                     }
1365                 }
1366
1367               /* Store this character into the translated pattern.  */
1368               bcopy (str, pat, charlen);
1369               pat += charlen;
1370               base_pat += in_charlen;
1371               len_byte -= in_charlen;
1372             }
1373
1374           /* If char_base is still negative we didn't find any translated
1375              non-ASCII characters.  */
1376           if (char_base < 0)
1377             char_base = 0;
1378         }
1379       else
1380         {
1381           /* Unibyte buffer.  */
1382           char_base = 0;
1383           while (--len >= 0)
1384             {
1385               int c, translated;
1386
1387               /* If we got here and the RE flag is set, it's because we're
1388                  dealing with a regexp known to be trivial, so the backslash
1389                  just quotes the next character.  */
1390               if (RE && *base_pat == '\\')
1391                 {
1392                   len--;
1393                   raw_pattern_size--;
1394                   base_pat++;
1395                 }
1396               c = *base_pat++;
1397               TRANSLATE (translated, trt, c);
1398               *pat++ = translated;
1399             }
1400         }
1401
1402       len_byte = pat - patbuf;
1403       len = raw_pattern_size;
1404       pat = base_pat = patbuf;
1405
1406       if (boyer_moore_ok)
1407         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1408                             pos, pos_byte, lim, lim_byte,
1409                             char_base);
1410       else
1411         return simple_search (n, pat, len, len_byte, trt,
1412                               pos, pos_byte, lim, lim_byte);
1413     }
1414 }
1415 \f
1416 /* Do a simple string search N times for the string PAT,
1417    whose length is LEN/LEN_BYTE,
1418    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1419    TRT is the translation table.
1420
1421    Return the character position where the match is found.
1422    Otherwise, if M matches remained to be found, return -M.
1423
1424    This kind of search works regardless of what is in PAT and
1425    regardless of what is in TRT.  It is used in cases where
1426    boyer_moore cannot work.  */
1427
1428 static int
1429 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1430      int n;
1431      unsigned char *pat;
1432      int len, len_byte;
1433      Lisp_Object trt;
1434      int pos, pos_byte;
1435      int lim, lim_byte;
1436 {
1437   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1438   int forward = n > 0;
1439   /* Number of buffer bytes matched.  Note that this may be different
1440      from len_byte in a multibyte buffer.  */
1441   int match_byte;
1442
1443   if (lim > pos && multibyte)
1444     while (n > 0)
1445       {
1446         while (1)
1447           {
1448             /* Try matching at position POS.  */
1449             int this_pos = pos;
1450             int this_pos_byte = pos_byte;
1451             int this_len = len;
1452             int this_len_byte = len_byte;
1453             unsigned char *p = pat;
1454             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1455               goto stop;
1456
1457             while (this_len > 0)
1458               {
1459                 int charlen, buf_charlen;
1460                 int pat_ch, buf_ch;
1461
1462                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1463                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1464                                                  ZV_BYTE - this_pos_byte,
1465                                                  buf_charlen);
1466                 TRANSLATE (buf_ch, trt, buf_ch);
1467
1468                 if (buf_ch != pat_ch)
1469                   break;
1470
1471                 this_len_byte -= charlen;
1472                 this_len--;
1473                 p += charlen;
1474
1475                 this_pos_byte += buf_charlen;
1476                 this_pos++;
1477               }
1478
1479             if (this_len == 0)
1480               {
1481                 match_byte = this_pos_byte - pos_byte;
1482                 pos += len;
1483                 pos_byte += match_byte;
1484                 break;
1485               }
1486
1487             INC_BOTH (pos, pos_byte);
1488           }
1489
1490         n--;
1491       }
1492   else if (lim > pos)
1493     while (n > 0)
1494       {
1495         while (1)
1496           {
1497             /* Try matching at position POS.  */
1498             int this_pos = pos;
1499             int this_len = len;
1500             unsigned char *p = pat;
1501
1502             if (pos + len > lim)
1503               goto stop;
1504
1505             while (this_len > 0)
1506               {
1507                 int pat_ch = *p++;
1508                 int buf_ch = FETCH_BYTE (this_pos);
1509                 TRANSLATE (buf_ch, trt, buf_ch);
1510
1511                 if (buf_ch != pat_ch)
1512                   break;
1513
1514                 this_len--;
1515                 this_pos++;
1516               }
1517
1518             if (this_len == 0)
1519               {
1520                 match_byte = len;
1521                 pos += len;
1522                 break;
1523               }
1524
1525             pos++;
1526           }
1527
1528         n--;
1529       }
1530   /* Backwards search.  */
1531   else if (lim < pos && multibyte)
1532     while (n < 0)
1533       {
1534         while (1)
1535           {
1536             /* Try matching at position POS.  */
1537             int this_pos = pos - len;
1538             int this_pos_byte;
1539             int this_len = len;
1540             int this_len_byte = len_byte;
1541             unsigned char *p = pat;
1542
1543             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1544               goto stop;
1545             this_pos_byte = CHAR_TO_BYTE (this_pos);
1546             match_byte = pos_byte - this_pos_byte;
1547
1548             while (this_len > 0)
1549               {
1550                 int charlen, buf_charlen;
1551                 int pat_ch, buf_ch;
1552
1553                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1554                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1555                                                  ZV_BYTE - this_pos_byte,
1556                                                  buf_charlen);
1557                 TRANSLATE (buf_ch, trt, buf_ch);
1558
1559                 if (buf_ch != pat_ch)
1560                   break;
1561
1562                 this_len_byte -= charlen;
1563                 this_len--;
1564                 p += charlen;
1565                 this_pos_byte += buf_charlen;
1566                 this_pos++;
1567               }
1568
1569             if (this_len == 0)
1570               {
1571                 pos -= len;
1572                 pos_byte -= match_byte;
1573                 break;
1574               }
1575
1576             DEC_BOTH (pos, pos_byte);
1577           }
1578
1579         n++;
1580       }
1581   else if (lim < pos)
1582     while (n < 0)
1583       {
1584         while (1)
1585           {
1586             /* Try matching at position POS.  */
1587             int this_pos = pos - len;
1588             int this_len = len;
1589             unsigned char *p = pat;
1590
1591             if (this_pos < lim)
1592               goto stop;
1593
1594             while (this_len > 0)
1595               {
1596                 int pat_ch = *p++;
1597                 int buf_ch = FETCH_BYTE (this_pos);
1598                 TRANSLATE (buf_ch, trt, buf_ch);
1599
1600                 if (buf_ch != pat_ch)
1601                   break;
1602                 this_len--;
1603                 this_pos++;
1604               }
1605
1606             if (this_len == 0)
1607               {
1608                 match_byte = len;
1609                 pos -= len;
1610                 break;
1611               }
1612
1613             pos--;
1614           }
1615
1616         n++;
1617       }
1618
1619  stop:
1620   if (n == 0)
1621     {
1622       if (forward)
1623         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1624       else
1625         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1626
1627       return pos;
1628     }
1629   else if (n > 0)
1630     return -n;
1631   else
1632     return n;
1633 }
1634 \f
1635 /* Do Boyer-Moore search N times for the string BASE_PAT,
1636    whose length is LEN/LEN_BYTE,
1637    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1638    DIRECTION says which direction we search in.
1639    TRT and INVERSE_TRT are translation tables.
1640    Characters in PAT are already translated by TRT.
1641
1642    This kind of search works if all the characters in BASE_PAT that
1643    have nontrivial translation are the same aside from the last byte.
1644    This makes it possible to translate just the last byte of a
1645    character, and do so after just a simple test of the context.
1646    CHAR_BASE is nonzero if there is such a non-ASCII character.
1647
1648    If that criterion is not satisfied, do not call this function.  */
1649
1650 static int
1651 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1652              pos, pos_byte, lim, lim_byte, char_base)
1653      int n;
1654      unsigned char *base_pat;
1655      int len, len_byte;
1656      Lisp_Object trt;
1657      Lisp_Object inverse_trt;
1658      int pos, pos_byte;
1659      int lim, lim_byte;
1660      int char_base;
1661 {
1662   int direction = ((n > 0) ? 1 : -1);
1663   register int dirlen;
1664   int infinity, limit, stride_for_teases = 0;
1665   register int *BM_tab;
1666   int *BM_tab_base;
1667   register unsigned char *cursor, *p_limit;
1668   register int i, j;
1669   unsigned char *pat, *pat_end;
1670   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1671
1672   unsigned char simple_translate[0400];
1673   /* These are set to the preceding bytes of a byte to be translated
1674      if char_base is nonzero.  As the maximum byte length of a
1675      multibyte character is 5, we have to check at most four previous
1676      bytes.  */
1677   int translate_prev_byte1 = 0;
1678   int translate_prev_byte2 = 0;
1679   int translate_prev_byte3 = 0;
1680   int translate_prev_byte4 = 0;
1681
1682   BM_tab = (int *) alloca (0400 * sizeof (int));
1683
1684   /* The general approach is that we are going to maintain that we know */
1685   /* the first (closest to the present position, in whatever direction */
1686   /* we're searching) character that could possibly be the last */
1687   /* (furthest from present position) character of a valid match.  We */
1688   /* advance the state of our knowledge by looking at that character */
1689   /* and seeing whether it indeed matches the last character of the */
1690   /* pattern.  If it does, we take a closer look.  If it does not, we */
1691   /* move our pointer (to putative last characters) as far as is */
1692   /* logically possible.  This amount of movement, which I call a */
1693   /* stride, will be the length of the pattern if the actual character */
1694   /* appears nowhere in the pattern, otherwise it will be the distance */
1695   /* from the last occurrence of that character to the end of the */
1696   /* pattern. */
1697   /* As a coding trick, an enormous stride is coded into the table for */
1698   /* characters that match the last character.  This allows use of only */
1699   /* a single test, a test for having gone past the end of the */
1700   /* permissible match region, to test for both possible matches (when */
1701   /* the stride goes past the end immediately) and failure to */
1702   /* match (where you get nudged past the end one stride at a time). */
1703
1704   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1705   /* is determined only by the last character of the putative match. */
1706   /* If that character does not match, we will stride the proper */
1707   /* distance to propose a match that superimposes it on the last */
1708   /* instance of a character that matches it (per trt), or misses */
1709   /* it entirely if there is none. */
1710
1711   dirlen = len_byte * direction;
1712   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1713
1714   /* Record position after the end of the pattern.  */
1715   pat_end = base_pat + len_byte;
1716   /* BASE_PAT points to a character that we start scanning from.
1717      It is the first character in a forward search,
1718      the last character in a backward search.  */
1719   if (direction < 0)
1720     base_pat = pat_end - 1;
1721
1722   BM_tab_base = BM_tab;
1723   BM_tab += 0400;
1724   j = dirlen;           /* to get it in a register */
1725   /* A character that does not appear in the pattern induces a */
1726   /* stride equal to the pattern length. */
1727   while (BM_tab_base != BM_tab)
1728     {
1729       *--BM_tab = j;
1730       *--BM_tab = j;
1731       *--BM_tab = j;
1732       *--BM_tab = j;
1733     }
1734
1735   /* We use this for translation, instead of TRT itself.
1736      We fill this in to handle the characters that actually
1737      occur in the pattern.  Others don't matter anyway!  */
1738   bzero (simple_translate, sizeof simple_translate);
1739   for (i = 0; i < 0400; i++)
1740     simple_translate[i] = i;
1741
1742   if (char_base)
1743     {
1744       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1745          byte following them are the target of translation.  */
1746       unsigned char str[MAX_MULTIBYTE_LENGTH];
1747       int len = CHAR_STRING (char_base, str);
1748
1749       translate_prev_byte1 = str[len - 2];
1750       if (len > 2)
1751         {
1752           translate_prev_byte2 = str[len - 3];
1753           if (len > 3)
1754             {
1755               translate_prev_byte3 = str[len - 4];
1756               if (len > 4)
1757                 translate_prev_byte4 = str[len - 5];
1758             }
1759         }
1760     }
1761
1762   i = 0;
1763   while (i != infinity)
1764     {
1765       unsigned char *ptr = base_pat + i;
1766       i += direction;
1767       if (i == dirlen)
1768         i = infinity;
1769       if (! NILP (trt))
1770         {
1771           /* If the byte currently looking at is the last of a
1772              character to check case-equivalents, set CH to that
1773              character.  An ASCII character and a non-ASCII character
1774              matching with CHAR_BASE are to be checked.  */
1775           int ch = -1;
1776
1777           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1778             ch = *ptr;
1779           else if (char_base
1780                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1781             {
1782               unsigned char *charstart = ptr - 1;
1783
1784               while (! (CHAR_HEAD_P (*charstart)))
1785                 charstart--;
1786               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1787               if (char_base != (ch & ~0x3F))
1788                 ch = -1;
1789             }
1790
1791           if (ch >= 0200)
1792             j = (ch & 0x3F) | 0200;
1793           else
1794             j = *ptr;
1795
1796           if (i == infinity)
1797             stride_for_teases = BM_tab[j];
1798
1799           BM_tab[j] = dirlen - i;
1800           /* A translation table is accompanied by its inverse -- see */
1801           /* comment following downcase_table for details */
1802           if (ch >= 0)
1803             {
1804               int starting_ch = ch;
1805               int starting_j = j;
1806
1807               while (1)
1808                 {
1809                   TRANSLATE (ch, inverse_trt, ch);
1810                   if (ch >= 0200)
1811                     j = (ch & 0x3F) | 0200;
1812                   else
1813                     j = ch;
1814
1815                   /* For all the characters that map into CH,
1816                      set up simple_translate to map the last byte
1817                      into STARTING_J.  */
1818                   simple_translate[j] = starting_j;
1819                   if (ch == starting_ch)
1820                     break;
1821                   BM_tab[j] = dirlen - i;
1822                 }
1823             }
1824         }
1825       else
1826         {
1827           j = *ptr;
1828
1829           if (i == infinity)
1830             stride_for_teases = BM_tab[j];
1831           BM_tab[j] = dirlen - i;
1832         }
1833       /* stride_for_teases tells how much to stride if we get a */
1834       /* match on the far character but are subsequently */
1835       /* disappointed, by recording what the stride would have been */
1836       /* for that character if the last character had been */
1837       /* different. */
1838     }
1839   infinity = dirlen - infinity;
1840   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1841   /* loop invariant - POS_BYTE points at where last char (first
1842      char if reverse) of pattern would align in a possible match.  */
1843   while (n != 0)
1844     {
1845       int tail_end;
1846       unsigned char *tail_end_ptr;
1847
1848       /* It's been reported that some (broken) compiler thinks that
1849          Boolean expressions in an arithmetic context are unsigned.
1850          Using an explicit ?1:0 prevents this.  */
1851       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1852           < 0)
1853         return (n * (0 - direction));
1854       /* First we do the part we can by pointers (maybe nothing) */
1855       QUIT;
1856       pat = base_pat;
1857       limit = pos_byte - dirlen + direction;
1858       if (direction > 0)
1859         {
1860           limit = BUFFER_CEILING_OF (limit);
1861           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1862              can take on without hitting edge of buffer or the gap.  */
1863           limit = min (limit, pos_byte + 20000);
1864           limit = min (limit, lim_byte - 1);
1865         }
1866       else
1867         {
1868           limit = BUFFER_FLOOR_OF (limit);
1869           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1870              can take on without hitting edge of buffer or the gap.  */
1871           limit = max (limit, pos_byte - 20000);
1872           limit = max (limit, lim_byte);
1873         }
1874       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1875       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1876
1877       if ((limit - pos_byte) * direction > 20)
1878         {
1879           unsigned char *p2;
1880
1881           p_limit = BYTE_POS_ADDR (limit);
1882           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1883           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1884           while (1)             /* use one cursor setting as long as i can */
1885             {
1886               if (direction > 0) /* worth duplicating */
1887                 {
1888                   /* Use signed comparison if appropriate
1889                      to make cursor+infinity sure to be > p_limit.
1890                      Assuming that the buffer lies in a range of addresses
1891                      that are all "positive" (as ints) or all "negative",
1892                      either kind of comparison will work as long
1893                      as we don't step by infinity.  So pick the kind
1894                      that works when we do step by infinity.  */
1895                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1896                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1897                       cursor += BM_tab[*cursor];
1898                   else
1899                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1900                       cursor += BM_tab[*cursor];
1901                 }
1902               else
1903                 {
1904                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1905                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1906                       cursor += BM_tab[*cursor];
1907                   else
1908                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1909                       cursor += BM_tab[*cursor];
1910                 }
1911 /* If you are here, cursor is beyond the end of the searched region. */
1912 /* This can happen if you match on the far character of the pattern, */
1913 /* because the "stride" of that character is infinity, a number able */
1914 /* to throw you well beyond the end of the search.  It can also */
1915 /* happen if you fail to match within the permitted region and would */
1916 /* otherwise try a character beyond that region */
1917               if ((cursor - p_limit) * direction <= len_byte)
1918                 break;  /* a small overrun is genuine */
1919               cursor -= infinity; /* large overrun = hit */
1920               i = dirlen - direction;
1921               if (! NILP (trt))
1922                 {
1923                   while ((i -= direction) + direction != 0)
1924                     {
1925                       int ch;
1926                       cursor -= direction;
1927                       /* Translate only the last byte of a character.  */
1928                       if (! multibyte
1929                           || ((cursor == tail_end_ptr
1930                                || CHAR_HEAD_P (cursor[1]))
1931                               && (CHAR_HEAD_P (cursor[0])
1932                                   /* Check if this is the last byte of
1933                                      a translable character.  */
1934                                   || (translate_prev_byte1 == cursor[-1]
1935                                       && (CHAR_HEAD_P (translate_prev_byte1)
1936                                           || (translate_prev_byte2 == cursor[-2]
1937                                               && (CHAR_HEAD_P (translate_prev_byte2)
1938                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1939                         ch = simple_translate[*cursor];
1940                       else
1941                         ch = *cursor;
1942                       if (pat[i] != ch)
1943                         break;
1944                     }
1945                 }
1946               else
1947                 {
1948                   while ((i -= direction) + direction != 0)
1949                     {
1950                       cursor -= direction;
1951                       if (pat[i] != *cursor)
1952                         break;
1953                     }
1954                 }
1955               cursor += dirlen - i - direction; /* fix cursor */
1956               if (i + direction == 0)
1957                 {
1958                   int position, start, end;
1959
1960                   cursor -= direction;
1961
1962                   position = pos_byte + cursor - p2 + ((direction > 0)
1963                                                        ? 1 - len_byte : 0);
1964                   set_search_regs (position, len_byte);
1965
1966                   if (NILP (Vinhibit_changing_match_data))
1967                     {
1968                       start = search_regs.start[0];
1969                       end = search_regs.end[0];
1970                     }
1971                   else
1972                     /* If Vinhibit_changing_match_data is non-nil,
1973                        search_regs will not be changed.  So let's
1974                        compute start and end here.  */
1975                     {
1976                       start = BYTE_TO_CHAR (position);
1977                       end = BYTE_TO_CHAR (position + len_byte);
1978                     }
1979
1980                   if ((n -= direction) != 0)
1981                     cursor += dirlen; /* to resume search */
1982                   else
1983                     return direction > 0 ? end : start;
1984                 }
1985               else
1986                 cursor += stride_for_teases; /* <sigh> we lose -  */
1987             }
1988           pos_byte += cursor - p2;
1989         }
1990       else
1991         /* Now we'll pick up a clump that has to be done the hard */
1992         /* way because it covers a discontinuity */
1993         {
1994           limit = ((direction > 0)
1995                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1996                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1997           limit = ((direction > 0)
1998                    ? min (limit + len_byte, lim_byte - 1)
1999                    : max (limit - len_byte, lim_byte));
2000           /* LIMIT is now the last value POS_BYTE can have
2001              and still be valid for a possible match.  */
2002           while (1)
2003             {
2004               /* This loop can be coded for space rather than */
2005               /* speed because it will usually run only once. */
2006               /* (the reach is at most len + 21, and typically */
2007               /* does not exceed len) */
2008               while ((limit - pos_byte) * direction >= 0)
2009                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2010               /* now run the same tests to distinguish going off the */
2011               /* end, a match or a phony match. */
2012               if ((pos_byte - limit) * direction <= len_byte)
2013                 break;  /* ran off the end */
2014               /* Found what might be a match.
2015                  Set POS_BYTE back to last (first if reverse) pos.  */
2016               pos_byte -= infinity;
2017               i = dirlen - direction;
2018               while ((i -= direction) + direction != 0)
2019                 {
2020                   int ch;
2021                   unsigned char *ptr;
2022                   pos_byte -= direction;
2023                   ptr = BYTE_POS_ADDR (pos_byte);
2024                   /* Translate only the last byte of a character.  */
2025                   if (! multibyte
2026                       || ((ptr == tail_end_ptr
2027                            || CHAR_HEAD_P (ptr[1]))
2028                           && (CHAR_HEAD_P (ptr[0])
2029                               /* Check if this is the last byte of a
2030                                  translable character.  */
2031                               || (translate_prev_byte1 == ptr[-1]
2032                                   && (CHAR_HEAD_P (translate_prev_byte1)
2033                                       || (translate_prev_byte2 == ptr[-2]
2034                                           && (CHAR_HEAD_P (translate_prev_byte2)
2035                                               || translate_prev_byte3 == ptr[-3])))))))
2036                     ch = simple_translate[*ptr];
2037                   else
2038                     ch = *ptr;
2039                   if (pat[i] != ch)
2040                     break;
2041                 }
2042               /* Above loop has moved POS_BYTE part or all the way
2043                  back to the first pos (last pos if reverse).
2044                  Set it once again at the last (first if reverse) char.  */
2045               pos_byte += dirlen - i- direction;
2046               if (i + direction == 0)
2047                 {
2048                   int position, start, end;
2049                   pos_byte -= direction;
2050
2051                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2052                   set_search_regs (position, len_byte);
2053
2054                   if (NILP (Vinhibit_changing_match_data))
2055                     {
2056                       start = search_regs.start[0];
2057                       end = search_regs.end[0];
2058                     }
2059                   else
2060                     /* If Vinhibit_changing_match_data is non-nil,
2061                        search_regs will not be changed.  So let's
2062                        compute start and end here.  */
2063                     {
2064                       start = BYTE_TO_CHAR (position);
2065                       end = BYTE_TO_CHAR (position + len_byte);
2066                     }
2067
2068                   if ((n -= direction) != 0)
2069                     pos_byte += dirlen; /* to resume search */
2070                   else
2071                     return direction > 0 ? end : start;
2072                 }
2073               else
2074                 pos_byte += stride_for_teases;
2075             }
2076           }
2077       /* We have done one clump.  Can we continue? */
2078       if ((lim_byte - pos_byte) * direction < 0)
2079         return ((0 - n) * direction);
2080     }
2081   return BYTE_TO_CHAR (pos_byte);
2082 }
2083
2084 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2085    for the overall match just found in the current buffer.
2086    Also clear out the match data for registers 1 and up.  */
2087
2088 static void
2089 set_search_regs (beg_byte, nbytes)
2090      int beg_byte, nbytes;
2091 {
2092   int i;
2093
2094   if (!NILP (Vinhibit_changing_match_data))
2095     return;
2096
2097   /* Make sure we have registers in which to store
2098      the match position.  */
2099   if (search_regs.num_regs == 0)
2100     {
2101       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2102       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2103       search_regs.num_regs = 2;
2104     }
2105
2106   /* Clear out the other registers.  */
2107   for (i = 1; i < search_regs.num_regs; i++)
2108     {
2109       search_regs.start[i] = -1;
2110       search_regs.end[i] = -1;
2111     }
2112
2113   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2114   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2115   XSETBUFFER (last_thing_searched, current_buffer);
2116 }
2117 \f
2118 /* Given STRING, a string of words separated by word delimiters,
2119    compute a regexp that matches those exact words separated by
2120    arbitrary punctuation.  If LAX is nonzero, the end of the string
2121    need not match a word boundary unless it ends in whitespace.  */
2122
2123 static Lisp_Object
2124 wordify (string, lax)
2125      Lisp_Object string;
2126      int lax;
2127 {
2128   register unsigned char *p, *o;
2129   register int i, i_byte, len, punct_count = 0, word_count = 0;
2130   Lisp_Object val;
2131   int prev_c = 0;
2132   int adjust, whitespace_at_end;
2133
2134   CHECK_STRING (string);
2135   p = SDATA (string);
2136   len = SCHARS (string);
2137
2138   for (i = 0, i_byte = 0; i < len; )
2139     {
2140       int c;
2141
2142       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2143
2144       if (SYNTAX (c) != Sword)
2145         {
2146           punct_count++;
2147           if (i > 0 && SYNTAX (prev_c) == Sword)
2148             word_count++;
2149         }
2150
2151       prev_c = c;
2152     }
2153
2154   if (SYNTAX (prev_c) == Sword)
2155     {
2156       word_count++;
2157       whitespace_at_end = 0;
2158     }
2159   else
2160     whitespace_at_end = 1;
2161
2162   if (!word_count)
2163     return empty_unibyte_string;
2164
2165   adjust = - punct_count + 5 * (word_count - 1)
2166     + ((lax && !whitespace_at_end) ? 2 : 4);
2167   if (STRING_MULTIBYTE (string))
2168     val = make_uninit_multibyte_string (len + adjust,
2169                                         SBYTES (string)
2170                                         + adjust);
2171   else
2172     val = make_uninit_string (len + adjust);
2173
2174   o = SDATA (val);
2175   *o++ = '\\';
2176   *o++ = 'b';
2177   prev_c = 0;
2178
2179   for (i = 0, i_byte = 0; i < len; )
2180     {
2181       int c;
2182       int i_byte_orig = i_byte;
2183
2184       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2185
2186       if (SYNTAX (c) == Sword)
2187         {
2188           bcopy (SDATA (string) + i_byte_orig, o,
2189                  i_byte - i_byte_orig);
2190           o += i_byte - i_byte_orig;
2191         }
2192       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2193         {
2194           *o++ = '\\';
2195           *o++ = 'W';
2196           *o++ = '\\';
2197           *o++ = 'W';
2198           *o++ = '*';
2199         }
2200
2201       prev_c = c;
2202     }
2203
2204   if (!lax || whitespace_at_end)
2205     {
2206       *o++ = '\\';
2207       *o++ = 'b';
2208     }
2209
2210   return val;
2211 }
2212 \f
2213 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2214        "MSearch backward: ",
2215        doc: /* Search backward from point for STRING.
2216 Set point to the beginning of the occurrence found, and return point.
2217 An optional second argument bounds the search; it is a buffer position.
2218 The match found must not extend before that position.
2219 Optional third argument, if t, means if fail just return nil (no error).
2220  If not nil and not t, position at limit of search and return nil.
2221 Optional fourth argument is repeat count--search for successive occurrences.
2222
2223 Search case-sensitivity is determined by the value of the variable
2224 `case-fold-search', which see.
2225
2226 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2227      (string, bound, noerror, count)
2228      Lisp_Object string, bound, noerror, count;
2229 {
2230   return search_command (string, bound, noerror, count, -1, 0, 0);
2231 }
2232
2233 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2234        doc: /* Search forward from point for STRING.
2235 Set point to the end of the occurrence found, and return point.
2236 An optional second argument bounds the search; it is a buffer position.
2237 The match found must not extend after that position.  A value of nil is
2238   equivalent to (point-max).
2239 Optional third argument, if t, means if fail just return nil (no error).
2240   If not nil and not t, move to limit of search and return nil.
2241 Optional fourth argument is repeat count--search for successive occurrences.
2242
2243 Search case-sensitivity is determined by the value of the variable
2244 `case-fold-search', which see.
2245
2246 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2247      (string, bound, noerror, count)
2248      Lisp_Object string, bound, noerror, count;
2249 {
2250   return search_command (string, bound, noerror, count, 1, 0, 0);
2251 }
2252
2253 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2254        "sWord search backward: ",
2255        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2256 Set point to the beginning of the occurrence found, and return point.
2257 An optional second argument bounds the search; it is a buffer position.
2258 The match found must not extend before that position.
2259 Optional third argument, if t, means if fail just return nil (no error).
2260   If not nil and not t, move to limit of search and return nil.
2261 Optional fourth argument is repeat count--search for successive occurrences.  */)
2262      (string, bound, noerror, count)
2263      Lisp_Object string, bound, noerror, count;
2264 {
2265   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2266 }
2267
2268 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2269        "sWord search: ",
2270        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2271 Set point to the end of the occurrence found, and return point.
2272 An optional second argument bounds the search; it is a buffer position.
2273 The match found must not extend after that position.
2274 Optional third argument, if t, means if fail just return nil (no error).
2275   If not nil and not t, move to limit of search and return nil.
2276 Optional fourth argument is repeat count--search for successive occurrences.  */)
2277      (string, bound, noerror, count)
2278      Lisp_Object string, bound, noerror, count;
2279 {
2280   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2281 }
2282
2283 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2284        "sWord search backward: ",
2285        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2286 Set point to the beginning of the occurrence found, and return point.
2287
2288 Unlike `word-search-backward', the end of STRING need not match a word
2289 boundary unless it ends in whitespace.
2290
2291 An optional second argument bounds the search; it is a buffer position.
2292 The match found must not extend before that position.
2293 Optional third argument, if t, means if fail just return nil (no error).
2294   If not nil and not t, move to limit of search and return nil.
2295 Optional fourth argument is repeat count--search for successive occurrences.  */)
2296      (string, bound, noerror, count)
2297      Lisp_Object string, bound, noerror, count;
2298 {
2299   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2300 }
2301
2302 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2303        "sWord search: ",
2304        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2305 Set point to the end of the occurrence found, and return point.
2306
2307 Unlike `word-search-forward', the end of STRING need not match a word
2308 boundary unless it ends in whitespace.
2309
2310 An optional second argument bounds the search; it is a buffer position.
2311 The match found must not extend after that position.
2312 Optional third argument, if t, means if fail just return nil (no error).
2313   If not nil and not t, move to limit of search and return nil.
2314 Optional fourth argument is repeat count--search for successive occurrences.  */)
2315      (string, bound, noerror, count)
2316      Lisp_Object string, bound, noerror, count;
2317 {
2318   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2319 }
2320
2321 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2322        "sRE search backward: ",
2323        doc: /* Search backward from point for match for regular expression REGEXP.
2324 Set point to the beginning of the match, and return point.
2325 The match found is the one starting last in the buffer
2326 and yet ending before the origin of the search.
2327 An optional second argument bounds the search; it is a buffer position.
2328 The match found must start at or after that position.
2329 Optional third argument, if t, means if fail just return nil (no error).
2330   If not nil and not t, move to limit of search and return nil.
2331 Optional fourth argument is repeat count--search for successive occurrences.
2332 See also the functions `match-beginning', `match-end', `match-string',
2333 and `replace-match'.  */)
2334      (regexp, bound, noerror, count)
2335      Lisp_Object regexp, bound, noerror, count;
2336 {
2337   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2338 }
2339
2340 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2341        "sRE search: ",
2342        doc: /* Search forward from point for regular expression REGEXP.
2343 Set point to the end of the occurrence found, and return point.
2344 An optional second argument bounds the search; it is a buffer position.
2345 The match found must not extend after that position.
2346 Optional third argument, if t, means if fail just return nil (no error).
2347   If not nil and not t, move to limit of search and return nil.
2348 Optional fourth argument is repeat count--search for successive occurrences.
2349 See also the functions `match-beginning', `match-end', `match-string',
2350 and `replace-match'.  */)
2351      (regexp, bound, noerror, count)
2352      Lisp_Object regexp, bound, noerror, count;
2353 {
2354   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2355 }
2356
2357 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2358        "sPosix search backward: ",
2359        doc: /* Search backward from point for match for regular expression REGEXP.
2360 Find the longest match in accord with Posix regular expression rules.
2361 Set point to the beginning of the match, and return point.
2362 The match found is the one starting last in the buffer
2363 and yet ending before the origin of the search.
2364 An optional second argument bounds the search; it is a buffer position.
2365 The match found must start at or after that position.
2366 Optional third argument, if t, means if fail just return nil (no error).
2367   If not nil and not t, move to limit of search and return nil.
2368 Optional fourth argument is repeat count--search for successive occurrences.
2369 See also the functions `match-beginning', `match-end', `match-string',
2370 and `replace-match'.  */)
2371      (regexp, bound, noerror, count)
2372      Lisp_Object regexp, bound, noerror, count;
2373 {
2374   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2375 }
2376
2377 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2378        "sPosix search: ",
2379        doc: /* Search forward from point for regular expression REGEXP.
2380 Find the longest match in accord with Posix regular expression rules.
2381 Set point to the end of the occurrence found, and return point.
2382 An optional second argument bounds the search; it is a buffer position.
2383 The match found must not extend after that position.
2384 Optional third argument, if t, means if fail just return nil (no error).
2385   If not nil and not t, move to limit of search and return nil.
2386 Optional fourth argument is repeat count--search for successive occurrences.
2387 See also the functions `match-beginning', `match-end', `match-string',
2388 and `replace-match'.  */)
2389      (regexp, bound, noerror, count)
2390      Lisp_Object regexp, bound, noerror, count;
2391 {
2392   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2393 }
2394 \f
2395 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2396        doc: /* Replace text matched by last search with NEWTEXT.
2397 Leave point at the end of the replacement text.
2398
2399 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2400 Otherwise maybe capitalize the whole text, or maybe just word initials,
2401 based on the replaced text.
2402 If the replaced text has only capital letters
2403 and has at least one multiletter word, convert NEWTEXT to all caps.
2404 Otherwise if all words are capitalized in the replaced text,
2405 capitalize each word in NEWTEXT.
2406
2407 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2408 Otherwise treat `\\' as special:
2409   `\\&' in NEWTEXT means substitute original matched text.
2410   `\\N' means substitute what matched the Nth `\\(...\\)'.
2411        If Nth parens didn't match, substitute nothing.
2412   `\\\\' means insert one `\\'.
2413 Case conversion does not apply to these substitutions.
2414
2415 FIXEDCASE and LITERAL are optional arguments.
2416
2417 The optional fourth argument STRING can be a string to modify.
2418 This is meaningful when the previous match was done against STRING,
2419 using `string-match'.  When used this way, `replace-match'
2420 creates and returns a new string made by copying STRING and replacing
2421 the part of STRING that was matched.
2422
2423 The optional fifth argument SUBEXP specifies a subexpression;
2424 it says to replace just that subexpression with NEWTEXT,
2425 rather than replacing the entire matched text.
2426 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2427 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2428 NEWTEXT in place of subexp N.
2429 This is useful only after a regular expression search or match,
2430 since only regular expressions have distinguished subexpressions.  */)
2431      (newtext, fixedcase, literal, string, subexp)
2432      Lisp_Object newtext, fixedcase, literal, string, subexp;
2433 {
2434   enum { nochange, all_caps, cap_initial } case_action;
2435   register int pos, pos_byte;
2436   int some_multiletter_word;
2437   int some_lowercase;
2438   int some_uppercase;
2439   int some_nonuppercase_initial;
2440   register int c, prevc;
2441   int sub;
2442   int opoint, newpoint;
2443
2444   CHECK_STRING (newtext);
2445
2446   if (! NILP (string))
2447     CHECK_STRING (string);
2448
2449   case_action = nochange;       /* We tried an initialization */
2450                                 /* but some C compilers blew it */
2451
2452   if (search_regs.num_regs <= 0)
2453     error ("`replace-match' called before any match found");
2454
2455   if (NILP (subexp))
2456     sub = 0;
2457   else
2458     {
2459       CHECK_NUMBER (subexp);
2460       sub = XINT (subexp);
2461       if (sub < 0 || sub >= search_regs.num_regs)
2462         args_out_of_range (subexp, make_number (search_regs.num_regs));
2463     }
2464
2465   if (NILP (string))
2466     {
2467       if (search_regs.start[sub] < BEGV
2468           || search_regs.start[sub] > search_regs.end[sub]
2469           || search_regs.end[sub] > ZV)
2470         args_out_of_range (make_number (search_regs.start[sub]),
2471                            make_number (search_regs.end[sub]));
2472     }
2473   else
2474     {
2475       if (search_regs.start[sub] < 0
2476           || search_regs.start[sub] > search_regs.end[sub]
2477           || search_regs.end[sub] > SCHARS (string))
2478         args_out_of_range (make_number (search_regs.start[sub]),
2479                            make_number (search_regs.end[sub]));
2480     }
2481
2482   if (NILP (fixedcase))
2483     {
2484       /* Decide how to casify by examining the matched text. */
2485       int last;
2486
2487       pos = search_regs.start[sub];
2488       last = search_regs.end[sub];
2489
2490       if (NILP (string))
2491         pos_byte = CHAR_TO_BYTE (pos);
2492       else
2493         pos_byte = string_char_to_byte (string, pos);
2494
2495       prevc = '\n';
2496       case_action = all_caps;
2497
2498       /* some_multiletter_word is set nonzero if any original word
2499          is more than one letter long. */
2500       some_multiletter_word = 0;
2501       some_lowercase = 0;
2502       some_nonuppercase_initial = 0;
2503       some_uppercase = 0;
2504
2505       while (pos < last)
2506         {
2507           if (NILP (string))
2508             {
2509               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2510               INC_BOTH (pos, pos_byte);
2511             }
2512           else
2513             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2514
2515           if (LOWERCASEP (c))
2516             {
2517               /* Cannot be all caps if any original char is lower case */
2518
2519               some_lowercase = 1;
2520               if (SYNTAX (prevc) != Sword)
2521                 some_nonuppercase_initial = 1;
2522               else
2523                 some_multiletter_word = 1;
2524             }
2525           else if (UPPERCASEP (c))
2526             {
2527               some_uppercase = 1;
2528               if (SYNTAX (prevc) != Sword)
2529                 ;
2530               else
2531                 some_multiletter_word = 1;
2532             }
2533           else
2534             {
2535               /* If the initial is a caseless word constituent,
2536                  treat that like a lowercase initial.  */
2537               if (SYNTAX (prevc) != Sword)
2538                 some_nonuppercase_initial = 1;
2539             }
2540
2541           prevc = c;
2542         }
2543
2544       /* Convert to all caps if the old text is all caps
2545          and has at least one multiletter word.  */
2546       if (! some_lowercase && some_multiletter_word)
2547         case_action = all_caps;
2548       /* Capitalize each word, if the old text has all capitalized words.  */
2549       else if (!some_nonuppercase_initial && some_multiletter_word)
2550         case_action = cap_initial;
2551       else if (!some_nonuppercase_initial && some_uppercase)
2552         /* Should x -> yz, operating on X, give Yz or YZ?
2553            We'll assume the latter.  */
2554         case_action = all_caps;
2555       else
2556         case_action = nochange;
2557     }
2558
2559   /* Do replacement in a string.  */
2560   if (!NILP (string))
2561     {
2562       Lisp_Object before, after;
2563
2564       before = Fsubstring (string, make_number (0),
2565                            make_number (search_regs.start[sub]));
2566       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2567
2568       /* Substitute parts of the match into NEWTEXT
2569          if desired.  */
2570       if (NILP (literal))
2571         {
2572           int lastpos = 0;
2573           int lastpos_byte = 0;
2574           /* We build up the substituted string in ACCUM.  */
2575           Lisp_Object accum;
2576           Lisp_Object middle;
2577           int length = SBYTES (newtext);
2578
2579           accum = Qnil;
2580
2581           for (pos_byte = 0, pos = 0; pos_byte < length;)
2582             {
2583               int substart = -1;
2584               int subend = 0;
2585               int delbackslash = 0;
2586
2587               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2588
2589               if (c == '\\')
2590                 {
2591                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2592
2593                   if (c == '&')
2594                     {
2595                       substart = search_regs.start[sub];
2596                       subend = search_regs.end[sub];
2597                     }
2598                   else if (c >= '1' && c <= '9')
2599                     {
2600                       if (search_regs.start[c - '0'] >= 0
2601                           && c <= search_regs.num_regs + '0')
2602                         {
2603                           substart = search_regs.start[c - '0'];
2604                           subend = search_regs.end[c - '0'];
2605                         }
2606                       else
2607                         {
2608                           /* If that subexp did not match,
2609                              replace \\N with nothing.  */
2610                           substart = 0;
2611                           subend = 0;
2612                         }
2613                     }
2614                   else if (c == '\\')
2615                     delbackslash = 1;
2616                   else
2617                     error ("Invalid use of `\\' in replacement text");
2618                 }
2619               if (substart >= 0)
2620                 {
2621                   if (pos - 2 != lastpos)
2622                     middle = substring_both (newtext, lastpos,
2623                                              lastpos_byte,
2624                                              pos - 2, pos_byte - 2);
2625                   else
2626                     middle = Qnil;
2627                   accum = concat3 (accum, middle,
2628                                    Fsubstring (string,
2629                                                make_number (substart),
2630                                                make_number (subend)));
2631                   lastpos = pos;
2632                   lastpos_byte = pos_byte;
2633                 }
2634               else if (delbackslash)
2635                 {
2636                   middle = substring_both (newtext, lastpos,
2637                                            lastpos_byte,
2638                                            pos - 1, pos_byte - 1);
2639
2640                   accum = concat2 (accum, middle);
2641                   lastpos = pos;
2642                   lastpos_byte = pos_byte;
2643                 }
2644             }
2645
2646           if (pos != lastpos)
2647             middle = substring_both (newtext, lastpos,
2648                                      lastpos_byte,
2649                                      pos, pos_byte);
2650           else
2651             middle = Qnil;
2652
2653           newtext = concat2 (accum, middle);
2654         }
2655
2656       /* Do case substitution in NEWTEXT if desired.  */
2657       if (case_action == all_caps)
2658         newtext = Fupcase (newtext);
2659       else if (case_action == cap_initial)
2660         newtext = Fupcase_initials (newtext);
2661
2662       return concat3 (before, newtext, after);
2663     }
2664
2665   /* Record point, then move (quietly) to the start of the match.  */
2666   if (PT >= search_regs.end[sub])
2667     opoint = PT - ZV;
2668   else if (PT > search_regs.start[sub])
2669     opoint = search_regs.end[sub] - ZV;
2670   else
2671     opoint = PT;
2672
2673   /* If we want non-literal replacement,
2674      perform substitution on the replacement string.  */
2675   if (NILP (literal))
2676     {
2677       int length = SBYTES (newtext);
2678       unsigned char *substed;
2679       int substed_alloc_size, substed_len;
2680       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2681       int str_multibyte = STRING_MULTIBYTE (newtext);
2682       Lisp_Object rev_tbl;
2683       int really_changed = 0;
2684
2685       rev_tbl = Qnil;
2686
2687       substed_alloc_size = length * 2 + 100;
2688       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2689       substed_len = 0;
2690
2691       /* Go thru NEWTEXT, producing the actual text to insert in
2692          SUBSTED while adjusting multibyteness to that of the current
2693          buffer.  */
2694
2695       for (pos_byte = 0, pos = 0; pos_byte < length;)
2696         {
2697           unsigned char str[MAX_MULTIBYTE_LENGTH];
2698           unsigned char *add_stuff = NULL;
2699           int add_len = 0;
2700           int idx = -1;
2701
2702           if (str_multibyte)
2703             {
2704               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2705               if (!buf_multibyte)
2706                 c = multibyte_char_to_unibyte (c, rev_tbl);
2707             }
2708           else
2709             {
2710               /* Note that we don't have to increment POS.  */
2711               c = SREF (newtext, pos_byte++);
2712               if (buf_multibyte)
2713                 c = unibyte_char_to_multibyte (c);
2714             }
2715
2716           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2717              or set IDX to a match index, which means put that part
2718              of the buffer text into SUBSTED.  */
2719
2720           if (c == '\\')
2721             {
2722               really_changed = 1;
2723
2724               if (str_multibyte)
2725                 {
2726                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2727                                                       pos, pos_byte);
2728                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2729                     c = multibyte_char_to_unibyte (c, rev_tbl);
2730                 }
2731               else
2732                 {
2733                   c = SREF (newtext, pos_byte++);
2734                   if (buf_multibyte)
2735                     c = unibyte_char_to_multibyte (c);
2736                 }
2737
2738               if (c == '&')
2739                 idx = sub;
2740               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2741                 {
2742                   if (search_regs.start[c - '0'] >= 1)
2743                     idx = c - '0';
2744                 }
2745               else if (c == '\\')
2746                 add_len = 1, add_stuff = "\\";
2747               else
2748                 {
2749                   xfree (substed);
2750                   error ("Invalid use of `\\' in replacement text");
2751                 }
2752             }
2753           else
2754             {
2755               add_len = CHAR_STRING (c, str);
2756               add_stuff = str;
2757             }
2758
2759           /* If we want to copy part of a previous match,
2760              set up ADD_STUFF and ADD_LEN to point to it.  */
2761           if (idx >= 0)
2762             {
2763               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2764               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2765               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2766                 move_gap (search_regs.start[idx]);
2767               add_stuff = BYTE_POS_ADDR (begbyte);
2768             }
2769
2770           /* Now the stuff we want to add to SUBSTED
2771              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2772
2773           /* Make sure SUBSTED is big enough.  */
2774           if (substed_len + add_len >= substed_alloc_size)
2775             {
2776               substed_alloc_size = substed_len + add_len + 500;
2777               substed = (unsigned char *) xrealloc (substed,
2778                                                     substed_alloc_size + 1);
2779             }
2780
2781           /* Now add to the end of SUBSTED.  */
2782           if (add_stuff)
2783             {
2784               bcopy (add_stuff, substed + substed_len, add_len);
2785               substed_len += add_len;
2786             }
2787         }
2788
2789       if (really_changed)
2790         {
2791           if (buf_multibyte)
2792             {
2793               int nchars = multibyte_chars_in_text (substed, substed_len);
2794
2795               newtext = make_multibyte_string (substed, nchars, substed_len);
2796             }
2797           else
2798             newtext = make_unibyte_string (substed, substed_len);
2799         }
2800       xfree (substed);
2801     }
2802
2803   /* Replace the old text with the new in the cleanest possible way.  */
2804   replace_range (search_regs.start[sub], search_regs.end[sub],
2805                  newtext, 1, 0, 1);
2806   newpoint = search_regs.start[sub] + SCHARS (newtext);
2807
2808   if (case_action == all_caps)
2809     Fupcase_region (make_number (search_regs.start[sub]),
2810                     make_number (newpoint));
2811   else if (case_action == cap_initial)
2812     Fupcase_initials_region (make_number (search_regs.start[sub]),
2813                              make_number (newpoint));
2814
2815   /* Adjust search data for this change.  */
2816   {
2817     int oldend = search_regs.end[sub];
2818     int oldstart = search_regs.start[sub];
2819     int change = newpoint - search_regs.end[sub];
2820     int i;
2821
2822     for (i = 0; i < search_regs.num_regs; i++)
2823       {
2824         if (search_regs.start[i] >= oldend)
2825           search_regs.start[i] += change;
2826         else if (search_regs.start[i] > oldstart)
2827           search_regs.start[i] = oldstart;
2828         if (search_regs.end[i] >= oldend)
2829           search_regs.end[i] += change;
2830         else if (search_regs.end[i] > oldstart)
2831           search_regs.end[i] = oldstart;
2832       }
2833   }
2834
2835   /* Put point back where it was in the text.  */
2836   if (opoint <= 0)
2837     TEMP_SET_PT (opoint + ZV);
2838   else
2839     TEMP_SET_PT (opoint);
2840
2841   /* Now move point "officially" to the start of the inserted replacement.  */
2842   move_if_not_intangible (newpoint);
2843
2844   return Qnil;
2845 }
2846 \f
2847 static Lisp_Object
2848 match_limit (num, beginningp)
2849      Lisp_Object num;
2850      int beginningp;
2851 {
2852   register int n;
2853
2854   CHECK_NUMBER (num);
2855   n = XINT (num);
2856   if (n < 0)
2857     args_out_of_range (num, make_number (0));
2858   if (search_regs.num_regs <= 0)
2859     error ("No match data, because no search succeeded");
2860   if (n >= search_regs.num_regs
2861       || search_regs.start[n] < 0)
2862     return Qnil;
2863   return (make_number ((beginningp) ? search_regs.start[n]
2864                                     : search_regs.end[n]));
2865 }
2866
2867 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2868        doc: /* Return position of start of text matched by last search.
2869 SUBEXP, a number, specifies which parenthesized expression in the last
2870   regexp.
2871 Value is nil if SUBEXPth pair didn't match, or there were less than
2872   SUBEXP pairs.
2873 Zero means the entire text matched by the whole regexp or whole string.  */)
2874      (subexp)
2875      Lisp_Object subexp;
2876 {
2877   return match_limit (subexp, 1);
2878 }
2879
2880 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2881        doc: /* Return position of end of text matched by last search.
2882 SUBEXP, a number, specifies which parenthesized expression in the last
2883   regexp.
2884 Value is nil if SUBEXPth pair didn't match, or there were less than
2885   SUBEXP pairs.
2886 Zero means the entire text matched by the whole regexp or whole string.  */)
2887      (subexp)
2888      Lisp_Object subexp;
2889 {
2890   return match_limit (subexp, 0);
2891 }
2892
2893 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2894        doc: /* Return a list containing all info on what the last search matched.
2895 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2896 All the elements are markers or nil (nil if the Nth pair didn't match)
2897 if the last match was on a buffer; integers or nil if a string was matched.
2898 Use `store-match-data' to reinstate the data in this list.
2899
2900 If INTEGERS (the optional first argument) is non-nil, always use
2901 integers \(rather than markers) to represent buffer positions.  In
2902 this case, and if the last match was in a buffer, the buffer will get
2903 stored as one additional element at the end of the list.
2904
2905 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2906 enough to hold all the values, and if INTEGERS is non-nil, no consing
2907 is done.
2908
2909 If optional third arg RESEAT is non-nil, any previous markers on the
2910 REUSE list will be modified to point to nowhere.
2911
2912 Return value is undefined if the last search failed.  */)
2913   (integers, reuse, reseat)
2914      Lisp_Object integers, reuse, reseat;
2915 {
2916   Lisp_Object tail, prev;
2917   Lisp_Object *data;
2918   int i, len;
2919
2920   if (!NILP (reseat))
2921     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2922       if (MARKERP (XCAR (tail)))
2923         {
2924           unchain_marker (XMARKER (XCAR (tail)));
2925           XSETCAR (tail, Qnil);
2926         }
2927
2928   if (NILP (last_thing_searched))
2929     return Qnil;
2930
2931   prev = Qnil;
2932
2933   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2934                                  * sizeof (Lisp_Object));
2935
2936   len = 0;
2937   for (i = 0; i < search_regs.num_regs; i++)
2938     {
2939       int start = search_regs.start[i];
2940       if (start >= 0)
2941         {
2942           if (EQ (last_thing_searched, Qt)
2943               || ! NILP (integers))
2944             {
2945               XSETFASTINT (data[2 * i], start);
2946               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2947             }
2948           else if (BUFFERP (last_thing_searched))
2949             {
2950               data[2 * i] = Fmake_marker ();
2951               Fset_marker (data[2 * i],
2952                            make_number (start),
2953                            last_thing_searched);
2954               data[2 * i + 1] = Fmake_marker ();
2955               Fset_marker (data[2 * i + 1],
2956                            make_number (search_regs.end[i]),
2957                            last_thing_searched);
2958             }
2959           else
2960             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2961             abort ();
2962
2963           len = 2 * i + 2;
2964         }
2965       else
2966         data[2 * i] = data[2 * i + 1] = Qnil;
2967     }
2968
2969   if (BUFFERP (last_thing_searched) && !NILP (integers))
2970     {
2971       data[len] = last_thing_searched;
2972       len++;
2973     }
2974
2975   /* If REUSE is not usable, cons up the values and return them.  */
2976   if (! CONSP (reuse))
2977     return Flist (len, data);
2978
2979   /* If REUSE is a list, store as many value elements as will fit
2980      into the elements of REUSE.  */
2981   for (i = 0, tail = reuse; CONSP (tail);
2982        i++, tail = XCDR (tail))
2983     {
2984       if (i < len)
2985         XSETCAR (tail, data[i]);
2986       else
2987         XSETCAR (tail, Qnil);
2988       prev = tail;
2989     }
2990
2991   /* If we couldn't fit all value elements into REUSE,
2992      cons up the rest of them and add them to the end of REUSE.  */
2993   if (i < len)
2994     XSETCDR (prev, Flist (len - i, data + i));
2995
2996   return reuse;
2997 }
2998
2999 /* We used to have an internal use variant of `reseat' described as:
3000
3001       If RESEAT is `evaporate', put the markers back on the free list
3002       immediately.  No other references to the markers must exist in this
3003       case, so it is used only internally on the unwind stack and
3004       save-match-data from Lisp.
3005
3006    But it was ill-conceived: those supposedly-internal markers get exposed via
3007    the undo-list, so freeing them here is unsafe.  */
3008
3009 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3010        doc: /* Set internal data on last search match from elements of LIST.
3011 LIST should have been created by calling `match-data' previously.
3012
3013 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
3014     (list, reseat)
3015      register Lisp_Object list, reseat;
3016 {
3017   register int i;
3018   register Lisp_Object marker;
3019
3020   if (running_asynch_code)
3021     save_search_regs ();
3022
3023   CHECK_LIST (list);
3024
3025   /* Unless we find a marker with a buffer or an explicit buffer
3026      in LIST, assume that this match data came from a string.  */
3027   last_thing_searched = Qt;
3028
3029   /* Allocate registers if they don't already exist.  */
3030   {
3031     int length = XFASTINT (Flength (list)) / 2;
3032
3033     if (length > search_regs.num_regs)
3034       {
3035         if (search_regs.num_regs == 0)
3036           {
3037             search_regs.start
3038               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3039             search_regs.end
3040               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3041           }
3042         else
3043           {
3044             search_regs.start
3045               = (regoff_t *) xrealloc (search_regs.start,
3046                                        length * sizeof (regoff_t));
3047             search_regs.end
3048               = (regoff_t *) xrealloc (search_regs.end,
3049                                        length * sizeof (regoff_t));
3050           }
3051
3052         for (i = search_regs.num_regs; i < length; i++)
3053           search_regs.start[i] = -1;
3054
3055         search_regs.num_regs = length;
3056       }
3057
3058     for (i = 0; CONSP (list); i++)
3059       {
3060         marker = XCAR (list);
3061         if (BUFFERP (marker))
3062           {
3063             last_thing_searched = marker;
3064             break;
3065           }
3066         if (i >= length)
3067           break;
3068         if (NILP (marker))
3069           {
3070             search_regs.start[i] = -1;
3071             list = XCDR (list);
3072           }
3073         else
3074           {
3075             int from;
3076             Lisp_Object m;
3077
3078             m = marker;
3079             if (MARKERP (marker))
3080               {
3081                 if (XMARKER (marker)->buffer == 0)
3082                   XSETFASTINT (marker, 0);
3083                 else
3084                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3085               }
3086
3087             CHECK_NUMBER_COERCE_MARKER (marker);
3088             from = XINT (marker);
3089
3090             if (!NILP (reseat) && MARKERP (m))
3091               {
3092                 unchain_marker (XMARKER (m));
3093                 XSETCAR (list, Qnil);
3094               }
3095
3096             if ((list = XCDR (list), !CONSP (list)))
3097               break;
3098
3099             m = marker = XCAR (list);
3100
3101             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3102               XSETFASTINT (marker, 0);
3103
3104             CHECK_NUMBER_COERCE_MARKER (marker);
3105             search_regs.start[i] = from;
3106             search_regs.end[i] = XINT (marker);
3107
3108             if (!NILP (reseat) && MARKERP (m))
3109               {
3110                 unchain_marker (XMARKER (m));
3111                 XSETCAR (list, Qnil);
3112               }
3113           }
3114         list = XCDR (list);
3115       }
3116
3117     for (; i < search_regs.num_regs; i++)
3118       search_regs.start[i] = -1;
3119   }
3120
3121   return Qnil;
3122 }
3123
3124 /* If non-zero the match data have been saved in saved_search_regs
3125    during the execution of a sentinel or filter. */
3126 static int search_regs_saved;
3127 static struct re_registers saved_search_regs;
3128 static Lisp_Object saved_last_thing_searched;
3129
3130 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3131    if asynchronous code (filter or sentinel) is running. */
3132 static void
3133 save_search_regs ()
3134 {
3135   if (!search_regs_saved)
3136     {
3137       saved_search_regs.num_regs = search_regs.num_regs;
3138       saved_search_regs.start = search_regs.start;
3139       saved_search_regs.end = search_regs.end;
3140       saved_last_thing_searched = last_thing_searched;
3141       last_thing_searched = Qnil;
3142       search_regs.num_regs = 0;
3143       search_regs.start = 0;
3144       search_regs.end = 0;
3145
3146       search_regs_saved = 1;
3147     }
3148 }
3149
3150 /* Called upon exit from filters and sentinels. */
3151 void
3152 restore_search_regs ()
3153 {
3154   if (search_regs_saved)
3155     {
3156       if (search_regs.num_regs > 0)
3157         {
3158           xfree (search_regs.start);
3159           xfree (search_regs.end);
3160         }
3161       search_regs.num_regs = saved_search_regs.num_regs;
3162       search_regs.start = saved_search_regs.start;
3163       search_regs.end = saved_search_regs.end;
3164       last_thing_searched = saved_last_thing_searched;
3165       saved_last_thing_searched = Qnil;
3166       search_regs_saved = 0;
3167     }
3168 }
3169
3170 static Lisp_Object
3171 unwind_set_match_data (list)
3172      Lisp_Object list;
3173 {
3174   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3175   return Fset_match_data (list, Qt);
3176 }
3177
3178 /* Called to unwind protect the match data.  */
3179 void
3180 record_unwind_save_match_data ()
3181 {
3182   record_unwind_protect (unwind_set_match_data,
3183                          Fmatch_data (Qnil, Qnil, Qnil));
3184 }
3185
3186 /* Quote a string to inactivate reg-expr chars */
3187
3188 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3189        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3190      (string)
3191      Lisp_Object string;
3192 {
3193   register unsigned char *in, *out, *end;
3194   register unsigned char *temp;
3195   int backslashes_added = 0;
3196
3197   CHECK_STRING (string);
3198
3199   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3200
3201   /* Now copy the data into the new string, inserting escapes. */
3202
3203   in = SDATA (string);
3204   end = in + SBYTES (string);
3205   out = temp;
3206
3207   for (; in != end; in++)
3208     {
3209       if (*in == '['
3210           || *in == '*' || *in == '.' || *in == '\\'
3211           || *in == '?' || *in == '+'
3212           || *in == '^' || *in == '$')
3213         *out++ = '\\', backslashes_added++;
3214       *out++ = *in;
3215     }
3216
3217   return make_specified_string (temp,
3218                                 SCHARS (string) + backslashes_added,
3219                                 out - temp,
3220                                 STRING_MULTIBYTE (string));
3221 }
3222 \f
3223 void
3224 syms_of_search ()
3225 {
3226   register int i;
3227
3228   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3229     {
3230       searchbufs[i].buf.allocated = 100;
3231       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3232       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3233       searchbufs[i].regexp = Qnil;
3234       searchbufs[i].whitespace_regexp = Qnil;
3235       searchbufs[i].syntax_table = Qnil;
3236       staticpro (&searchbufs[i].regexp);
3237       staticpro (&searchbufs[i].whitespace_regexp);
3238       staticpro (&searchbufs[i].syntax_table);
3239       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3240     }
3241   searchbuf_head = &searchbufs[0];
3242
3243   Qsearch_failed = intern ("search-failed");
3244   staticpro (&Qsearch_failed);
3245   Qinvalid_regexp = intern ("invalid-regexp");
3246   staticpro (&Qinvalid_regexp);
3247
3248   Fput (Qsearch_failed, Qerror_conditions,
3249         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3250   Fput (Qsearch_failed, Qerror_message,
3251         build_string ("Search failed"));
3252
3253   Fput (Qinvalid_regexp, Qerror_conditions,
3254         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3255   Fput (Qinvalid_regexp, Qerror_message,
3256         build_string ("Invalid regexp"));
3257
3258   last_thing_searched = Qnil;
3259   staticpro (&last_thing_searched);
3260
3261   saved_last_thing_searched = Qnil;
3262   staticpro (&saved_last_thing_searched);
3263
3264   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3265       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3266 Some commands use this for user-specified regexps.
3267 Spaces that occur inside character classes or repetition operators
3268 or other such regexp constructs are not replaced with this.
3269 A value of nil (which is the normal value) means treat spaces literally.  */);
3270   Vsearch_spaces_regexp = Qnil;
3271
3272   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3273       doc: /* Internal use only.
3274 If non-nil, the primitive searching and matching functions
3275 such as `looking-at', `string-match', `re-search-forward', etc.,
3276 do not set the match data.  The proper way to use this variable
3277 is to bind it with `let' around a small expression.  */);
3278   Vinhibit_changing_match_data = Qnil;
3279
3280   defsubr (&Slooking_at);
3281   defsubr (&Sposix_looking_at);
3282   defsubr (&Sstring_match);
3283   defsubr (&Sposix_string_match);
3284   defsubr (&Ssearch_forward);
3285   defsubr (&Ssearch_backward);
3286   defsubr (&Sword_search_forward);
3287   defsubr (&Sword_search_backward);
3288   defsubr (&Sword_search_forward_lax);
3289   defsubr (&Sword_search_backward_lax);
3290   defsubr (&Sre_search_forward);
3291   defsubr (&Sre_search_backward);
3292   defsubr (&Sposix_search_forward);
3293   defsubr (&Sposix_search_backward);
3294   defsubr (&Sreplace_match);
3295   defsubr (&Smatch_beginning);
3296   defsubr (&Smatch_end);
3297   defsubr (&Smatch_data);
3298   defsubr (&Sset_match_data);
3299   defsubr (&Sregexp_quote);
3300 }
3301
3302 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3303    (do not change this comment) */