src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21 Boston, MA 02110-1301, USA.  */
  22
  23
  24 #include <config.h>
  25 #include "lisp.h"
  26 #include "syntax.h"
  27 #include "category.h"
  28 #include "buffer.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs ();
 102 static void save_search_regs ();
 103 static int simple_search ();
 104 static int boyer_moore ();
 105 static int search_buffer ();
 106 static void matcher_overflow () NO_RETURN;
 107
 108 static void
 109 matcher_overflow ()
 110 {
 111   error ("Stack overflow in regexp matcher");
 112 }
 113
 114 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 115    PATTERN is the pattern to compile.
 116    CP is the place to put the result.
 117    TRANSLATE is a translation table for ignoring case, or nil for none.
 118    REGP is the structure that says where to store the "register"
 119    values that will result from matching this pattern.
 120    If it is 0, we should compile the pattern not to record any
 121    subexpression bounds.
 122    POSIX is nonzero if we want full backtracking (POSIX style)
 123    for this pattern.  0 means backtrack only enough to get a valid match.
 124    MULTIBYTE is nonzero if we want to handle multibyte characters in
 125    PATTERN.  0 means all multibyte characters are recognized just as
 126    sequences of binary data.
 127
 128    The behavior also depends on Vsearch_spaces_regexp.  */
 129
 130 static void
 131 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 132      struct regexp_cache *cp;
 133      Lisp_Object pattern;
 134      Lisp_Object translate;
 135      struct re_registers *regp;
 136      int posix;
 137      int multibyte;
 138 {
 139   unsigned char *raw_pattern;
 140   int raw_pattern_size;
 141   char *val;
 142   reg_syntax_t old;
 143
 144   /* MULTIBYTE says whether the text to be searched is multibyte.
 145      We must convert PATTERN to match that, or we will not really
 146      find things right.  */
 147
 148   if (multibyte == STRING_MULTIBYTE (pattern))
 149     {
 150       raw_pattern = (unsigned char *) SDATA (pattern);
 151       raw_pattern_size = SBYTES (pattern);
 152     }
 153   else if (multibyte)
 154     {
 155       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 156                                                   SCHARS (pattern));
 157       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 158       copy_text (SDATA (pattern), raw_pattern,
 159                  SCHARS (pattern), 0, 1);
 160     }
 161   else
 162     {
 163       /* Converting multibyte to single-byte.
 164
 165          ??? Perhaps this conversion should be done in a special way
 166          by subtracting nonascii-insert-offset from each non-ASCII char,
 167          so that only the multibyte chars which really correspond to
 168          the chosen single-byte character set can possibly match.  */
 169       raw_pattern_size = SCHARS (pattern);
 170       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 171       copy_text (SDATA (pattern), raw_pattern,
 172                  SBYTES (pattern), 1, 0);
 173     }
 174
 175   cp->regexp = Qnil;
 176   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 177   cp->posix = posix;
 178   cp->buf.multibyte = multibyte;
 179   cp->whitespace_regexp = Vsearch_spaces_regexp;
 180   /* rms: I think BLOCK_INPUT is not needed here any more,
 181      because regex.c defines malloc to call xmalloc.
 182      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 183      So let's turn it off.  */
 184   /*  BLOCK_INPUT;  */
 185   old = re_set_syntax (RE_SYNTAX_EMACS
 186                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 187
 188   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 189                             : SDATA (Vsearch_spaces_regexp));
 190
 191   val = (char *) re_compile_pattern ((char *)raw_pattern,
 192                                      raw_pattern_size, &cp->buf);
 193
 194   /* If the compiled pattern hard codes some of the contents of the
 195      syntax-table, it can only be reused with *this* syntax table.  */
 196   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 197
 198   re_set_whitespace_regexp (NULL);
 199
 200   re_set_syntax (old);
 201   /* UNBLOCK_INPUT;  */
 202   if (val)
 203     xsignal1 (Qinvalid_regexp, build_string (val));
 204
 205   cp->regexp = Fcopy_sequence (pattern);
 206 }
 207
 208 /* Shrink each compiled regexp buffer in the cache
 209    to the size actually used right now.
 210    This is called from garbage collection.  */
 211
 212 void
 213 shrink_regexp_cache ()
 214 {
 215   struct regexp_cache *cp;
 216
 217   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 218     {
 219       cp->buf.allocated = cp->buf.used;
 220       cp->buf.buffer
 221         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 222     }
 223 }
 224
 225 /* Clear the regexp cache w.r.t. a particular syntax table,
 226    because it was changed.
 227    There is no danger of memory leak here because re_compile_pattern
 228    automagically manages the memory in each re_pattern_buffer struct,
 229    based on its `allocated' and `buffer' values.  */
 230 void
 231 clear_regexp_cache ()
 232 {
 233   int i;
 234
 235   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 236     /* It's tempting to compare with the syntax-table we've actually changd,
 237        but it's not sufficient because char-table inheritance mewans that
 238        modifying one syntax-table can change others at the same time.  */
 239     if (!EQ (searchbufs[i].syntax_table, Qt))
 240       searchbufs[i].regexp = Qnil;
 241 }
 242
 243 /* Compile a regexp if necessary, but first check to see if there's one in
 244    the cache.
 245    PATTERN is the pattern to compile.
 246    TRANSLATE is a translation table for ignoring case, or nil for none.
 247    REGP is the structure that says where to store the "register"
 248    values that will result from matching this pattern.
 249    If it is 0, we should compile the pattern not to record any
 250    subexpression bounds.
 251    POSIX is nonzero if we want full backtracking (POSIX style)
 252    for this pattern.  0 means backtrack only enough to get a valid match.  */
 253
 254 struct re_pattern_buffer *
 255 compile_pattern (pattern, regp, translate, posix, multibyte)
 256      Lisp_Object pattern;
 257      struct re_registers *regp;
 258      Lisp_Object translate;
 259      int posix, multibyte;
 260 {
 261   struct regexp_cache *cp, **cpp;
 262
 263   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 264     {
 265       cp = *cpp;
 266       /* Entries are initialized to nil, and may be set to nil by
 267          compile_pattern_1 if the pattern isn't valid.  Don't apply
 268          string accessors in those cases.  However, compile_pattern_1
 269          is only applied to the cache entry we pick here to reuse.  So
 270          nil should never appear before a non-nil entry.  */
 271       if (NILP (cp->regexp))
 272         goto compile_it;
 273       if (SCHARS (cp->regexp) == SCHARS (pattern)
 274           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 275           && !NILP (Fstring_equal (cp->regexp, pattern))
 276           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 277           && cp->posix == posix
 278           && cp->buf.multibyte == multibyte
 279           && (EQ (cp->syntax_table, Qt)
 280               || EQ (cp->syntax_table, current_buffer->syntax_table))
 281           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 282         break;
 283
 284       /* If we're at the end of the cache, compile into the nil cell
 285          we found, or the last (least recently used) cell with a
 286          string value.  */
 287       if (cp->next == 0)
 288         {
 289         compile_it:
 290           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 291           break;
 292         }
 293     }
 294
 295   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 296      either because we found it in the cache or because we just compiled it.
 297      Move it to the front of the queue to mark it as most recently used.  */
 298   *cpp = cp->next;
 299   cp->next = searchbuf_head;
 300   searchbuf_head = cp;
 301
 302   /* Advise the searching functions about the space we have allocated
 303      for register data.  */
 304   if (regp)
 305     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 306
 307   return &cp->buf;
 308 }
 309
 310 \f
 311 static Lisp_Object
 312 looking_at_1 (string, posix)
 313      Lisp_Object string;
 314      int posix;
 315 {
 316   Lisp_Object val;
 317   unsigned char *p1, *p2;
 318   int s1, s2;
 319   register int i;
 320   struct re_pattern_buffer *bufp;
 321
 322   if (running_asynch_code)
 323     save_search_regs ();
 324
 325   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 326   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 327     = current_buffer->case_eqv_table;
 328
 329   CHECK_STRING (string);
 330   bufp = compile_pattern (string,
 331                           (NILP (Vinhibit_changing_match_data)
 332                            ? &search_regs : NULL),
 333                           (!NILP (current_buffer->case_fold_search)
 334                            ? current_buffer->case_canon_table : Qnil),
 335                           posix,
 336                           !NILP (current_buffer->enable_multibyte_characters));
 337
 338   immediate_quit = 1;
 339   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 340
 341   /* Get pointers and sizes of the two strings
 342      that make up the visible portion of the buffer. */
 343
 344   p1 = BEGV_ADDR;
 345   s1 = GPT_BYTE - BEGV_BYTE;
 346   p2 = GAP_END_ADDR;
 347   s2 = ZV_BYTE - GPT_BYTE;
 348   if (s1 < 0)
 349     {
 350       p2 = p1;
 351       s2 = ZV_BYTE - BEGV_BYTE;
 352       s1 = 0;
 353     }
 354   if (s2 < 0)
 355     {
 356       s1 = ZV_BYTE - BEGV_BYTE;
 357       s2 = 0;
 358     }
 359
 360   re_match_object = Qnil;
 361
 362   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 363                   PT_BYTE - BEGV_BYTE,
 364                   (NILP (Vinhibit_changing_match_data)
 365                    ? &search_regs : NULL),
 366                   ZV_BYTE - BEGV_BYTE);
 367   immediate_quit = 0;
 368
 369   if (i == -2)
 370     matcher_overflow ();
 371
 372   val = (0 <= i ? Qt : Qnil);
 373   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 374     for (i = 0; i < search_regs.num_regs; i++)
 375       if (search_regs.start[i] >= 0)
 376         {
 377           search_regs.start[i]
 378             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 379           search_regs.end[i]
 380             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 381         }
 382
 383   /* Set last_thing_searched only when match data is changed.  */
 384   if (NILP (Vinhibit_changing_match_data))
 385     XSETBUFFER (last_thing_searched, current_buffer);
 386
 387   return val;
 388 }
 389
 390 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 391        doc: /* Return t if text after point matches regular expression REGEXP.
 392 This function modifies the match data that `match-beginning',
 393 `match-end' and `match-data' access; save and restore the match
 394 data if you want to preserve them.  */)
 395      (regexp)
 396      Lisp_Object regexp;
 397 {
 398   return looking_at_1 (regexp, 0);
 399 }
 400
 401 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 402        doc: /* Return t if text after point matches regular expression REGEXP.
 403 Find the longest match, in accord with Posix regular expression rules.
 404 This function modifies the match data that `match-beginning',
 405 `match-end' and `match-data' access; save and restore the match
 406 data if you want to preserve them.  */)
 407      (regexp)
 408      Lisp_Object regexp;
 409 {
 410   return looking_at_1 (regexp, 1);
 411 }
 412 \f
 413 static Lisp_Object
 414 string_match_1 (regexp, string, start, posix)
 415      Lisp_Object regexp, string, start;
 416      int posix;
 417 {
 418   int val;
 419   struct re_pattern_buffer *bufp;
 420   int pos, pos_byte;
 421   int i;
 422
 423   if (running_asynch_code)
 424     save_search_regs ();
 425
 426   CHECK_STRING (regexp);
 427   CHECK_STRING (string);
 428
 429   if (NILP (start))
 430     pos = 0, pos_byte = 0;
 431   else
 432     {
 433       int len = SCHARS (string);
 434
 435       CHECK_NUMBER (start);
 436       pos = XINT (start);
 437       if (pos < 0 && -pos <= len)
 438         pos = len + pos;
 439       else if (0 > pos || pos > len)
 440         args_out_of_range (string, start);
 441       pos_byte = string_char_to_byte (string, pos);
 442     }
 443
 444   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 445   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 446     = current_buffer->case_eqv_table;
 447
 448   bufp = compile_pattern (regexp,
 449                           (NILP (Vinhibit_changing_match_data)
 450                            ? &search_regs : NULL),
 451                           (!NILP (current_buffer->case_fold_search)
 452                            ? current_buffer->case_canon_table : Qnil),
 453                           posix,
 454                           STRING_MULTIBYTE (string));
 455   immediate_quit = 1;
 456   re_match_object = string;
 457
 458   val = re_search (bufp, (char *) SDATA (string),
 459                    SBYTES (string), pos_byte,
 460                    SBYTES (string) - pos_byte,
 461                    (NILP (Vinhibit_changing_match_data)
 462                     ? &search_regs : NULL));
 463   immediate_quit = 0;
 464
 465   /* Set last_thing_searched only when match data is changed.  */
 466   if (NILP (Vinhibit_changing_match_data))
 467     last_thing_searched = Qt;
 468
 469   if (val == -2)
 470     matcher_overflow ();
 471   if (val < 0) return Qnil;
 472
 473   if (NILP (Vinhibit_changing_match_data))
 474     for (i = 0; i < search_regs.num_regs; i++)
 475       if (search_regs.start[i] >= 0)
 476         {
 477           search_regs.start[i]
 478             = string_byte_to_char (string, search_regs.start[i]);
 479           search_regs.end[i]
 480             = string_byte_to_char (string, search_regs.end[i]);
 481         }
 482
 483   return make_number (string_byte_to_char (string, val));
 484 }
 485
 486 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 487        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 488 Matching ignores case if `case-fold-search' is non-nil.
 489 If third arg START is non-nil, start search at that index in STRING.
 490 For index of first char beyond the match, do (match-end 0).
 491 `match-end' and `match-beginning' also give indices of substrings
 492 matched by parenthesis constructs in the pattern.
 493
 494 You can use the function `match-string' to extract the substrings
 495 matched by the parenthesis constructions in REGEXP. */)
 496      (regexp, string, start)
 497      Lisp_Object regexp, string, start;
 498 {
 499   return string_match_1 (regexp, string, start, 0);
 500 }
 501
 502 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 503        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 504 Find the longest match, in accord with Posix regular expression rules.
 505 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 506 If third arg START is non-nil, start search at that index in STRING.
 507 For index of first char beyond the match, do (match-end 0).
 508 `match-end' and `match-beginning' also give indices of substrings
 509 matched by parenthesis constructs in the pattern.  */)
 510      (regexp, string, start)
 511      Lisp_Object regexp, string, start;
 512 {
 513   return string_match_1 (regexp, string, start, 1);
 514 }
 515
 516 /* Match REGEXP against STRING, searching all of STRING,
 517    and return the index of the match, or negative on failure.
 518    This does not clobber the match data.  */
 519
 520 int
 521 fast_string_match (regexp, string)
 522      Lisp_Object regexp, string;
 523 {
 524   int val;
 525   struct re_pattern_buffer *bufp;
 526
 527   bufp = compile_pattern (regexp, 0, Qnil,
 528                           0, STRING_MULTIBYTE (string));
 529   immediate_quit = 1;
 530   re_match_object = string;
 531
 532   val = re_search (bufp, (char *) SDATA (string),
 533                    SBYTES (string), 0,
 534                    SBYTES (string), 0);
 535   immediate_quit = 0;
 536   return val;
 537 }
 538
 539 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 540    and return the index of the match, or negative on failure.
 541    This does not clobber the match data.
 542    We assume that STRING contains single-byte characters.  */
 543
 544 extern Lisp_Object Vascii_downcase_table;
 545
 546 int
 547 fast_c_string_match_ignore_case (regexp, string)
 548      Lisp_Object regexp;
 549      const char *string;
 550 {
 551   int val;
 552   struct re_pattern_buffer *bufp;
 553   int len = strlen (string);
 554
 555   regexp = string_make_unibyte (regexp);
 556   re_match_object = Qt;
 557   bufp = compile_pattern (regexp, 0,
 558                           Vascii_canon_table, 0,
 559                           0);
 560   immediate_quit = 1;
 561   val = re_search (bufp, string, len, 0, len, 0);
 562   immediate_quit = 0;
 563   return val;
 564 }
 565
 566 /* Like fast_string_match but ignore case.  */
 567
 568 int
 569 fast_string_match_ignore_case (regexp, string)
 570      Lisp_Object regexp, string;
 571 {
 572   int val;
 573   struct re_pattern_buffer *bufp;
 574
 575   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 576                           0, STRING_MULTIBYTE (string));
 577   immediate_quit = 1;
 578   re_match_object = string;
 579
 580   val = re_search (bufp, (char *) SDATA (string),
 581                    SBYTES (string), 0,
 582                    SBYTES (string), 0);
 583   immediate_quit = 0;
 584   return val;
 585 }
 586 \f
 587 /* The newline cache: remembering which sections of text have no newlines.  */
 588
 589 /* If the user has requested newline caching, make sure it's on.
 590    Otherwise, make sure it's off.
 591    This is our cheezy way of associating an action with the change of
 592    state of a buffer-local variable.  */
 593 static void
 594 newline_cache_on_off (buf)
 595      struct buffer *buf;
 596 {
 597   if (NILP (buf->cache_long_line_scans))
 598     {
 599       /* It should be off.  */
 600       if (buf->newline_cache)
 601         {
 602           free_region_cache (buf->newline_cache);
 603           buf->newline_cache = 0;
 604         }
 605     }
 606   else
 607     {
 608       /* It should be on.  */
 609       if (buf->newline_cache == 0)
 610         buf->newline_cache = new_region_cache ();
 611     }
 612 }
 613
 614 \f
 615 /* Search for COUNT instances of the character TARGET between START and END.
 616
 617    If COUNT is positive, search forwards; END must be >= START.
 618    If COUNT is negative, search backwards for the -COUNTth instance;
 619       END must be <= START.
 620    If COUNT is zero, do anything you please; run rogue, for all I care.
 621
 622    If END is zero, use BEGV or ZV instead, as appropriate for the
 623    direction indicated by COUNT.
 624
 625    If we find COUNT instances, set *SHORTAGE to zero, and return the
 626    position past the COUNTth match.  Note that for reverse motion
 627    this is not the same as the usual convention for Emacs motion commands.
 628
 629    If we don't find COUNT instances before reaching END, set *SHORTAGE
 630    to the number of TARGETs left unfound, and return END.
 631
 632    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 633    except when inside redisplay.  */
 634
 635 int
 636 scan_buffer (target, start, end, count, shortage, allow_quit)
 637      register int target;
 638      int start, end;
 639      int count;
 640      int *shortage;
 641      int allow_quit;
 642 {
 643   struct region_cache *newline_cache;
 644   int direction;
 645
 646   if (count > 0)
 647     {
 648       direction = 1;
 649       if (! end) end = ZV;
 650     }
 651   else
 652     {
 653       direction = -1;
 654       if (! end) end = BEGV;
 655     }
 656
 657   newline_cache_on_off (current_buffer);
 658   newline_cache = current_buffer->newline_cache;
 659
 660   if (shortage != 0)
 661     *shortage = 0;
 662
 663   immediate_quit = allow_quit;
 664
 665   if (count > 0)
 666     while (start != end)
 667       {
 668         /* Our innermost scanning loop is very simple; it doesn't know
 669            about gaps, buffer ends, or the newline cache.  ceiling is
 670            the position of the last character before the next such
 671            obstacle --- the last character the dumb search loop should
 672            examine.  */
 673         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 674         int start_byte = CHAR_TO_BYTE (start);
 675         int tem;
 676
 677         /* If we're looking for a newline, consult the newline cache
 678            to see where we can avoid some scanning.  */
 679         if (target == '\n' && newline_cache)
 680           {
 681             int next_change;
 682             immediate_quit = 0;
 683             while (region_cache_forward
 684                    (current_buffer, newline_cache, start_byte, &next_change))
 685               start_byte = next_change;
 686             immediate_quit = allow_quit;
 687
 688             /* START should never be after END.  */
 689             if (start_byte > ceiling_byte)
 690               start_byte = ceiling_byte;
 691
 692             /* Now the text after start is an unknown region, and
 693                next_change is the position of the next known region. */
 694             ceiling_byte = min (next_change - 1, ceiling_byte);
 695           }
 696
 697         /* The dumb loop can only scan text stored in contiguous
 698            bytes. BUFFER_CEILING_OF returns the last character
 699            position that is contiguous, so the ceiling is the
 700            position after that.  */
 701         tem = BUFFER_CEILING_OF (start_byte);
 702         ceiling_byte = min (tem, ceiling_byte);
 703
 704         {
 705           /* The termination address of the dumb loop.  */
 706           register unsigned char *ceiling_addr
 707             = BYTE_POS_ADDR (ceiling_byte) + 1;
 708           register unsigned char *cursor
 709             = BYTE_POS_ADDR (start_byte);
 710           unsigned char *base = cursor;
 711
 712           while (cursor < ceiling_addr)
 713             {
 714               unsigned char *scan_start = cursor;
 715
 716               /* The dumb loop.  */
 717               while (*cursor != target && ++cursor < ceiling_addr)
 718                 ;
 719
 720               /* If we're looking for newlines, cache the fact that
 721                  the region from start to cursor is free of them. */
 722               if (target == '\n' && newline_cache)
 723                 know_region_cache (current_buffer, newline_cache,
 724                                    start_byte + scan_start - base,
 725                                    start_byte + cursor - base);
 726
 727               /* Did we find the target character?  */
 728               if (cursor < ceiling_addr)
 729                 {
 730                   if (--count == 0)
 731                     {
 732                       immediate_quit = 0;
 733                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 734                     }
 735                   cursor++;
 736                 }
 737             }
 738
 739           start = BYTE_TO_CHAR (start_byte + cursor - base);
 740         }
 741       }
 742   else
 743     while (start > end)
 744       {
 745         /* The last character to check before the next obstacle.  */
 746         int ceiling_byte = CHAR_TO_BYTE (end);
 747         int start_byte = CHAR_TO_BYTE (start);
 748         int tem;
 749
 750         /* Consult the newline cache, if appropriate.  */
 751         if (target == '\n' && newline_cache)
 752           {
 753             int next_change;
 754             immediate_quit = 0;
 755             while (region_cache_backward
 756                    (current_buffer, newline_cache, start_byte, &next_change))
 757               start_byte = next_change;
 758             immediate_quit = allow_quit;
 759
 760             /* Start should never be at or before end.  */
 761             if (start_byte <= ceiling_byte)
 762               start_byte = ceiling_byte + 1;
 763
 764             /* Now the text before start is an unknown region, and
 765                next_change is the position of the next known region. */
 766             ceiling_byte = max (next_change, ceiling_byte);
 767           }
 768
 769         /* Stop scanning before the gap.  */
 770         tem = BUFFER_FLOOR_OF (start_byte - 1);
 771         ceiling_byte = max (tem, ceiling_byte);
 772
 773         {
 774           /* The termination address of the dumb loop.  */
 775           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 776           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 777           unsigned char *base = cursor;
 778
 779           while (cursor >= ceiling_addr)
 780             {
 781               unsigned char *scan_start = cursor;
 782
 783               while (*cursor != target && --cursor >= ceiling_addr)
 784                 ;
 785
 786               /* If we're looking for newlines, cache the fact that
 787                  the region from after the cursor to start is free of them.  */
 788               if (target == '\n' && newline_cache)
 789                 know_region_cache (current_buffer, newline_cache,
 790                                    start_byte + cursor - base,
 791                                    start_byte + scan_start - base);
 792
 793               /* Did we find the target character?  */
 794               if (cursor >= ceiling_addr)
 795                 {
 796                   if (++count >= 0)
 797                     {
 798                       immediate_quit = 0;
 799                       return BYTE_TO_CHAR (start_byte + cursor - base);
 800                     }
 801                   cursor--;
 802                 }
 803             }
 804
 805           start = BYTE_TO_CHAR (start_byte + cursor - base);
 806         }
 807       }
 808
 809   immediate_quit = 0;
 810   if (shortage != 0)
 811     *shortage = count * direction;
 812   return start;
 813 }
 814 \f
 815 /* Search for COUNT instances of a line boundary, which means either a
 816    newline or (if selective display enabled) a carriage return.
 817    Start at START.  If COUNT is negative, search backwards.
 818
 819    We report the resulting position by calling TEMP_SET_PT_BOTH.
 820
 821    If we find COUNT instances. we position after (always after,
 822    even if scanning backwards) the COUNTth match, and return 0.
 823
 824    If we don't find COUNT instances before reaching the end of the
 825    buffer (or the beginning, if scanning backwards), we return
 826    the number of line boundaries left unfound, and position at
 827    the limit we bumped up against.
 828
 829    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 830    except in special cases.  */
 831
 832 int
 833 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 834      int start, start_byte;
 835      int limit, limit_byte;
 836      register int count;
 837      int allow_quit;
 838 {
 839   int direction = ((count > 0) ? 1 : -1);
 840
 841   register unsigned char *cursor;
 842   unsigned char *base;
 843
 844   register int ceiling;
 845   register unsigned char *ceiling_addr;
 846
 847   int old_immediate_quit = immediate_quit;
 848
 849   /* The code that follows is like scan_buffer
 850      but checks for either newline or carriage return.  */
 851
 852   if (allow_quit)
 853     immediate_quit++;
 854
 855   start_byte = CHAR_TO_BYTE (start);
 856
 857   if (count > 0)
 858     {
 859       while (start_byte < limit_byte)
 860         {
 861           ceiling =  BUFFER_CEILING_OF (start_byte);
 862           ceiling = min (limit_byte - 1, ceiling);
 863           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 864           base = (cursor = BYTE_POS_ADDR (start_byte));
 865           while (1)
 866             {
 867               while (*cursor != '\n' && ++cursor != ceiling_addr)
 868                 ;
 869
 870               if (cursor != ceiling_addr)
 871                 {
 872                   if (--count == 0)
 873                     {
 874                       immediate_quit = old_immediate_quit;
 875                       start_byte = start_byte + cursor - base + 1;
 876                       start = BYTE_TO_CHAR (start_byte);
 877                       TEMP_SET_PT_BOTH (start, start_byte);
 878                       return 0;
 879                     }
 880                   else
 881                     if (++cursor == ceiling_addr)
 882                       break;
 883                 }
 884               else
 885                 break;
 886             }
 887           start_byte += cursor - base;
 888         }
 889     }
 890   else
 891     {
 892       while (start_byte > limit_byte)
 893         {
 894           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 895           ceiling = max (limit_byte, ceiling);
 896           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 897           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 898           while (1)
 899             {
 900               while (--cursor != ceiling_addr && *cursor != '\n')
 901                 ;
 902
 903               if (cursor != ceiling_addr)
 904                 {
 905                   if (++count == 0)
 906                     {
 907                       immediate_quit = old_immediate_quit;
 908                       /* Return the position AFTER the match we found.  */
 909                       start_byte = start_byte + cursor - base + 1;
 910                       start = BYTE_TO_CHAR (start_byte);
 911                       TEMP_SET_PT_BOTH (start, start_byte);
 912                       return 0;
 913                     }
 914                 }
 915               else
 916                 break;
 917             }
 918           /* Here we add 1 to compensate for the last decrement
 919              of CURSOR, which took it past the valid range.  */
 920           start_byte += cursor - base + 1;
 921         }
 922     }
 923
 924   TEMP_SET_PT_BOTH (limit, limit_byte);
 925   immediate_quit = old_immediate_quit;
 926
 927   return count * direction;
 928 }
 929
 930 int
 931 find_next_newline_no_quit (from, cnt)
 932      register int from, cnt;
 933 {
 934   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 935 }
 936
 937 /* Like find_next_newline, but returns position before the newline,
 938    not after, and only search up to TO.  This isn't just
 939    find_next_newline (...)-1, because you might hit TO.  */
 940
 941 int
 942 find_before_next_newline (from, to, cnt)
 943      int from, to, cnt;
 944 {
 945   int shortage;
 946   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 947
 948   if (shortage == 0)
 949     pos--;
 950
 951   return pos;
 952 }
 953 \f
 954 /* Subroutines of Lisp buffer search functions. */
 955
 956 static Lisp_Object
 957 search_command (string, bound, noerror, count, direction, RE, posix)
 958      Lisp_Object string, bound, noerror, count;
 959      int direction;
 960      int RE;
 961      int posix;
 962 {
 963   register int np;
 964   int lim, lim_byte;
 965   int n = direction;
 966
 967   if (!NILP (count))
 968     {
 969       CHECK_NUMBER (count);
 970       n *= XINT (count);
 971     }
 972
 973   CHECK_STRING (string);
 974   if (NILP (bound))
 975     {
 976       if (n > 0)
 977         lim = ZV, lim_byte = ZV_BYTE;
 978       else
 979         lim = BEGV, lim_byte = BEGV_BYTE;
 980     }
 981   else
 982     {
 983       CHECK_NUMBER_COERCE_MARKER (bound);
 984       lim = XINT (bound);
 985       if (n > 0 ? lim < PT : lim > PT)
 986         error ("Invalid search bound (wrong side of point)");
 987       if (lim > ZV)
 988         lim = ZV, lim_byte = ZV_BYTE;
 989       else if (lim < BEGV)
 990         lim = BEGV, lim_byte = BEGV_BYTE;
 991       else
 992         lim_byte = CHAR_TO_BYTE (lim);
 993     }
 994
 995   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 996   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 997     = current_buffer->case_eqv_table;
 998
 999   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1000                       (!NILP (current_buffer->case_fold_search)
1001                        ? current_buffer->case_canon_table
1002                        : Qnil),
1003                       (!NILP (current_buffer->case_fold_search)
1004                        ? current_buffer->case_eqv_table
1005                        : Qnil),
1006                       posix);
1007   if (np <= 0)
1008     {
1009       if (NILP (noerror))
1010         xsignal1 (Qsearch_failed, string);
1011
1012       if (!EQ (noerror, Qt))
1013         {
1014           if (lim < BEGV || lim > ZV)
1015             abort ();
1016           SET_PT_BOTH (lim, lim_byte);
1017           return Qnil;
1018 #if 0 /* This would be clean, but maybe programs depend on
1019          a value of nil here.  */
1020           np = lim;
1021 #endif
1022         }
1023       else
1024         return Qnil;
1025     }
1026
1027   if (np < BEGV || np > ZV)
1028     abort ();
1029
1030   SET_PT (np);
1031
1032   return make_number (np);
1033 }
1034 \f
1035 /* Return 1 if REGEXP it matches just one constant string.  */
1036
1037 static int
1038 trivial_regexp_p (regexp)
1039      Lisp_Object regexp;
1040 {
1041   int len = SBYTES (regexp);
1042   unsigned char *s = SDATA (regexp);
1043   while (--len >= 0)
1044     {
1045       switch (*s++)
1046         {
1047         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1048           return 0;
1049         case '\\':
1050           if (--len < 0)
1051             return 0;
1052           switch (*s++)
1053             {
1054             case '|': case '(': case ')': case '`': case '\'': case 'b':
1055             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1056             case 'S': case '=': case '{': case '}': case '_':
1057             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1058             case '1': case '2': case '3': case '4': case '5':
1059             case '6': case '7': case '8': case '9':
1060               return 0;
1061             }
1062         }
1063     }
1064   return 1;
1065 }
1066
1067 /* Search for the n'th occurrence of STRING in the current buffer,
1068    starting at position POS and stopping at position LIM,
1069    treating STRING as a literal string if RE is false or as
1070    a regular expression if RE is true.
1071
1072    If N is positive, searching is forward and LIM must be greater than POS.
1073    If N is negative, searching is backward and LIM must be less than POS.
1074
1075    Returns -x if x occurrences remain to be found (x > 0),
1076    or else the position at the beginning of the Nth occurrence
1077    (if searching backward) or the end (if searching forward).
1078
1079    POSIX is nonzero if we want full backtracking (POSIX style)
1080    for this pattern.  0 means backtrack only enough to get a valid match.  */
1081
1082 #define TRANSLATE(out, trt, d)                  \
1083 do                                              \
1084   {                                             \
1085     if (! NILP (trt))                           \
1086       {                                         \
1087         Lisp_Object temp;                       \
1088         temp = Faref (trt, make_number (d));    \
1089         if (INTEGERP (temp))                    \
1090           out = XINT (temp);                    \
1091         else                                    \
1092           out = d;                              \
1093       }                                         \
1094     else                                        \
1095       out = d;                                  \
1096   }                                             \
1097 while (0)
1098
1099 /* Only used in search_buffer, to record the end position of the match
1100    when searching regexps and SEARCH_REGS should not be changed
1101    (i.e. Vinhibit_changing_match_data is non-nil).  */
1102 static struct re_registers search_regs_1;
1103
1104 static int
1105 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1106                RE, trt, inverse_trt, posix)
1107      Lisp_Object string;
1108      int pos;
1109      int pos_byte;
1110      int lim;
1111      int lim_byte;
1112      int n;
1113      int RE;
1114      Lisp_Object trt;
1115      Lisp_Object inverse_trt;
1116      int posix;
1117 {
1118   int len = SCHARS (string);
1119   int len_byte = SBYTES (string);
1120   register int i;
1121
1122   if (running_asynch_code)
1123     save_search_regs ();
1124
1125   /* Searching 0 times means don't move.  */
1126   /* Null string is found at starting position.  */
1127   if (len == 0 || n == 0)
1128     {
1129       set_search_regs (pos_byte, 0);
1130       return pos;
1131     }
1132
1133   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1134     {
1135       unsigned char *p1, *p2;
1136       int s1, s2;
1137       struct re_pattern_buffer *bufp;
1138
1139       bufp = compile_pattern (string,
1140                               (NILP (Vinhibit_changing_match_data)
1141                                ? &search_regs : &search_regs_1),
1142                               trt, posix,
1143                               !NILP (current_buffer->enable_multibyte_characters));
1144
1145       immediate_quit = 1;       /* Quit immediately if user types ^G,
1146                                    because letting this function finish
1147                                    can take too long. */
1148       QUIT;                     /* Do a pending quit right away,
1149                                    to avoid paradoxical behavior */
1150       /* Get pointers and sizes of the two strings
1151          that make up the visible portion of the buffer. */
1152
1153       p1 = BEGV_ADDR;
1154       s1 = GPT_BYTE - BEGV_BYTE;
1155       p2 = GAP_END_ADDR;
1156       s2 = ZV_BYTE - GPT_BYTE;
1157       if (s1 < 0)
1158         {
1159           p2 = p1;
1160           s2 = ZV_BYTE - BEGV_BYTE;
1161           s1 = 0;
1162         }
1163       if (s2 < 0)
1164         {
1165           s1 = ZV_BYTE - BEGV_BYTE;
1166           s2 = 0;
1167         }
1168       re_match_object = Qnil;
1169
1170       while (n < 0)
1171         {
1172           int val;
1173           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1174                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1175                              (NILP (Vinhibit_changing_match_data)
1176                               ? &search_regs : &search_regs_1),
1177                              /* Don't allow match past current point */
1178                              pos_byte - BEGV_BYTE);
1179           if (val == -2)
1180             {
1181               matcher_overflow ();
1182             }
1183           if (val >= 0)
1184             {
1185               if (NILP (Vinhibit_changing_match_data))
1186                 {
1187                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1188                   for (i = 0; i < search_regs.num_regs; i++)
1189                     if (search_regs.start[i] >= 0)
1190                       {
1191                         search_regs.start[i]
1192                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1193                         search_regs.end[i]
1194                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1195                       }
1196                   XSETBUFFER (last_thing_searched, current_buffer);
1197                   /* Set pos to the new position. */
1198                   pos = search_regs.start[0];
1199                 }
1200               else
1201                 {
1202                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1203                   /* Set pos to the new position.  */
1204                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1205                 }
1206             }
1207           else
1208             {
1209               immediate_quit = 0;
1210               return (n);
1211             }
1212           n++;
1213         }
1214       while (n > 0)
1215         {
1216           int val;
1217           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1218                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1219                              (NILP (Vinhibit_changing_match_data)
1220                               ? &search_regs : &search_regs_1),
1221                              lim_byte - BEGV_BYTE);
1222           if (val == -2)
1223             {
1224               matcher_overflow ();
1225             }
1226           if (val >= 0)
1227             {
1228               if (NILP (Vinhibit_changing_match_data))
1229                 {
1230                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1231                   for (i = 0; i < search_regs.num_regs; i++)
1232                     if (search_regs.start[i] >= 0)
1233                       {
1234                         search_regs.start[i]
1235                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1236                         search_regs.end[i]
1237                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1238                       }
1239                   XSETBUFFER (last_thing_searched, current_buffer);
1240                   pos = search_regs.end[0];
1241                 }
1242               else
1243                 {
1244                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1245                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1246                 }
1247             }
1248           else
1249             {
1250               immediate_quit = 0;
1251               return (0 - n);
1252             }
1253           n--;
1254         }
1255       immediate_quit = 0;
1256       return (pos);
1257     }
1258   else                          /* non-RE case */
1259     {
1260       unsigned char *raw_pattern, *pat;
1261       int raw_pattern_size;
1262       int raw_pattern_size_byte;
1263       unsigned char *patbuf;
1264       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1265       unsigned char *base_pat;
1266       /* Set to positive if we find a non-ASCII char that need
1267          translation.  Otherwise set to zero later.  */
1268       int charset_base = -1;
1269       int boyer_moore_ok = 1;
1270
1271       /* MULTIBYTE says whether the text to be searched is multibyte.
1272          We must convert PATTERN to match that, or we will not really
1273          find things right.  */
1274
1275       if (multibyte == STRING_MULTIBYTE (string))
1276         {
1277           raw_pattern = (unsigned char *) SDATA (string);
1278           raw_pattern_size = SCHARS (string);
1279           raw_pattern_size_byte = SBYTES (string);
1280         }
1281       else if (multibyte)
1282         {
1283           raw_pattern_size = SCHARS (string);
1284           raw_pattern_size_byte
1285             = count_size_as_multibyte (SDATA (string),
1286                                        raw_pattern_size);
1287           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1288           copy_text (SDATA (string), raw_pattern,
1289                      SCHARS (string), 0, 1);
1290         }
1291       else
1292         {
1293           /* Converting multibyte to single-byte.
1294
1295              ??? Perhaps this conversion should be done in a special way
1296              by subtracting nonascii-insert-offset from each non-ASCII char,
1297              so that only the multibyte chars which really correspond to
1298              the chosen single-byte character set can possibly match.  */
1299           raw_pattern_size = SCHARS (string);
1300           raw_pattern_size_byte = SCHARS (string);
1301           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1302           copy_text (SDATA (string), raw_pattern,
1303                      SBYTES (string), 1, 0);
1304         }
1305
1306       /* Copy and optionally translate the pattern.  */
1307       len = raw_pattern_size;
1308       len_byte = raw_pattern_size_byte;
1309       patbuf = (unsigned char *) alloca (len_byte);
1310       pat = patbuf;
1311       base_pat = raw_pattern;
1312       if (multibyte)
1313         {
1314           /* Fill patbuf by translated characters in STRING while
1315              checking if we can use boyer-moore search.  If TRT is
1316              non-nil, we can use boyer-moore search only if TRT can be
1317              represented by the byte array of 256 elements.  For that,
1318              all non-ASCII case-equivalents of all case-senstive
1319              characters in STRING must belong to the same charset and
1320              row.  */
1321
1322           while (--len >= 0)
1323             {
1324               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1325               int c, translated, inverse;
1326               int in_charlen, charlen;
1327
1328               /* If we got here and the RE flag is set, it's because we're
1329                  dealing with a regexp known to be trivial, so the backslash
1330                  just quotes the next character.  */
1331               if (RE && *base_pat == '\\')
1332                 {
1333                   len--;
1334                   raw_pattern_size--;
1335                   len_byte--;
1336                   base_pat++;
1337                 }
1338
1339               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1340
1341               if (NILP (trt))
1342                 {
1343                   str = base_pat;
1344                   charlen = in_charlen;
1345                 }
1346               else
1347                 {
1348                   /* Translate the character.  */
1349                   TRANSLATE (translated, trt, c);
1350                   charlen = CHAR_STRING (translated, str_base);
1351                   str = str_base;
1352
1353                   /* Check if C has any other case-equivalents.  */
1354                   TRANSLATE (inverse, inverse_trt, c);
1355                   /* If so, check if we can use boyer-moore.  */
1356                   if (c != inverse && boyer_moore_ok)
1357                     {
1358                       /* Check if all equivalents belong to the same
1359                          charset & row.  Note that the check of C
1360                          itself is done by the last iteration.  Note
1361                          also that we don't have to check ASCII
1362                          characters because boyer-moore search can
1363                          always handle their translation.  */
1364                       while (1)
1365                         {
1366                           if (ASCII_BYTE_P (inverse))
1367                             {
1368                               if (charset_base > 0)
1369                                 {
1370                                   boyer_moore_ok = 0;
1371                                   break;
1372                                 }
1373                               charset_base = 0;
1374                             }
1375                           else if (SINGLE_BYTE_CHAR_P (inverse))
1376                             {
1377                               /* Boyer-moore search can't handle a
1378                                  translation of an eight-bit
1379                                  character.  */
1380                               boyer_moore_ok = 0;
1381                               break;
1382                             }
1383                           else if (charset_base < 0)
1384                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1385                           else if ((inverse & ~CHAR_FIELD3_MASK)
1386                                    != charset_base)
1387                             {
1388                               boyer_moore_ok = 0;
1389                               break;
1390                             }
1391                           if (c == inverse)
1392                             break;
1393                           TRANSLATE (inverse, inverse_trt, inverse);
1394                         }
1395                     }
1396                 }
1397               if (charset_base < 0)
1398                 charset_base = 0;
1399
1400               /* Store this character into the translated pattern.  */
1401               bcopy (str, pat, charlen);
1402               pat += charlen;
1403               base_pat += in_charlen;
1404               len_byte -= in_charlen;
1405             }
1406         }
1407       else
1408         {
1409           /* Unibyte buffer.  */
1410           charset_base = 0;
1411           while (--len >= 0)
1412             {
1413               int c, translated;
1414
1415               /* If we got here and the RE flag is set, it's because we're
1416                  dealing with a regexp known to be trivial, so the backslash
1417                  just quotes the next character.  */
1418               if (RE && *base_pat == '\\')
1419                 {
1420                   len--;
1421                   raw_pattern_size--;
1422                   base_pat++;
1423                 }
1424               c = *base_pat++;
1425               TRANSLATE (translated, trt, c);
1426               *pat++ = translated;
1427             }
1428         }
1429
1430       len_byte = pat - patbuf;
1431       len = raw_pattern_size;
1432       pat = base_pat = patbuf;
1433
1434       if (boyer_moore_ok)
1435         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1436                             pos, pos_byte, lim, lim_byte,
1437                             charset_base);
1438       else
1439         return simple_search (n, pat, len, len_byte, trt,
1440                               pos, pos_byte, lim, lim_byte);
1441     }
1442 }
1443 \f
1444 /* Do a simple string search N times for the string PAT,
1445    whose length is LEN/LEN_BYTE,
1446    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1447    TRT is the translation table.
1448
1449    Return the character position where the match is found.
1450    Otherwise, if M matches remained to be found, return -M.
1451
1452    This kind of search works regardless of what is in PAT and
1453    regardless of what is in TRT.  It is used in cases where
1454    boyer_moore cannot work.  */
1455
1456 static int
1457 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1458      int n;
1459      unsigned char *pat;
1460      int len, len_byte;
1461      Lisp_Object trt;
1462      int pos, pos_byte;
1463      int lim, lim_byte;
1464 {
1465   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1466   int forward = n > 0;
1467
1468   if (lim > pos && multibyte)
1469     while (n > 0)
1470       {
1471         while (1)
1472           {
1473             /* Try matching at position POS.  */
1474             int this_pos = pos;
1475             int this_pos_byte = pos_byte;
1476             int this_len = len;
1477             int this_len_byte = len_byte;
1478             unsigned char *p = pat;
1479             if (pos + len > lim)
1480               goto stop;
1481
1482             while (this_len > 0)
1483               {
1484                 int charlen, buf_charlen;
1485                 int pat_ch, buf_ch;
1486
1487                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1488                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1489                                                  ZV_BYTE - this_pos_byte,
1490                                                  buf_charlen);
1491                 TRANSLATE (buf_ch, trt, buf_ch);
1492
1493                 if (buf_ch != pat_ch)
1494                   break;
1495
1496                 this_len_byte -= charlen;
1497                 this_len--;
1498                 p += charlen;
1499
1500                 this_pos_byte += buf_charlen;
1501                 this_pos++;
1502               }
1503
1504             if (this_len == 0)
1505               {
1506                 pos += len;
1507                 pos_byte += len_byte;
1508                 break;
1509               }
1510
1511             INC_BOTH (pos, pos_byte);
1512           }
1513
1514         n--;
1515       }
1516   else if (lim > pos)
1517     while (n > 0)
1518       {
1519         while (1)
1520           {
1521             /* Try matching at position POS.  */
1522             int this_pos = pos;
1523             int this_len = len;
1524             unsigned char *p = pat;
1525
1526             if (pos + len > lim)
1527               goto stop;
1528
1529             while (this_len > 0)
1530               {
1531                 int pat_ch = *p++;
1532                 int buf_ch = FETCH_BYTE (this_pos);
1533                 TRANSLATE (buf_ch, trt, buf_ch);
1534
1535                 if (buf_ch != pat_ch)
1536                   break;
1537
1538                 this_len--;
1539                 this_pos++;
1540               }
1541
1542             if (this_len == 0)
1543               {
1544                 pos += len;
1545                 break;
1546               }
1547
1548             pos++;
1549           }
1550
1551         n--;
1552       }
1553   /* Backwards search.  */
1554   else if (lim < pos && multibyte)
1555     while (n < 0)
1556       {
1557         while (1)
1558           {
1559             /* Try matching at position POS.  */
1560             int this_pos = pos - len;
1561             int this_pos_byte = pos_byte - len_byte;
1562             int this_len = len;
1563             int this_len_byte = len_byte;
1564             unsigned char *p = pat;
1565
1566             if (this_pos < lim || this_pos_byte < lim_byte)
1567               goto stop;
1568
1569             while (this_len > 0)
1570               {
1571                 int charlen, buf_charlen;
1572                 int pat_ch, buf_ch;
1573
1574                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1575                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1576                                                  ZV_BYTE - this_pos_byte,
1577                                                  buf_charlen);
1578                 TRANSLATE (buf_ch, trt, buf_ch);
1579
1580                 if (buf_ch != pat_ch)
1581                   break;
1582
1583                 this_len_byte -= charlen;
1584                 this_len--;
1585                 p += charlen;
1586                 this_pos_byte += buf_charlen;
1587                 this_pos++;
1588               }
1589
1590             if (this_len == 0)
1591               {
1592                 pos -= len;
1593                 pos_byte -= len_byte;
1594                 break;
1595               }
1596
1597             DEC_BOTH (pos, pos_byte);
1598           }
1599
1600         n++;
1601       }
1602   else if (lim < pos)
1603     while (n < 0)
1604       {
1605         while (1)
1606           {
1607             /* Try matching at position POS.  */
1608             int this_pos = pos - len;
1609             int this_len = len;
1610             unsigned char *p = pat;
1611
1612             if (pos - len < lim)
1613               goto stop;
1614
1615             while (this_len > 0)
1616               {
1617                 int pat_ch = *p++;
1618                 int buf_ch = FETCH_BYTE (this_pos);
1619                 TRANSLATE (buf_ch, trt, buf_ch);
1620
1621                 if (buf_ch != pat_ch)
1622                   break;
1623                 this_len--;
1624                 this_pos++;
1625               }
1626
1627             if (this_len == 0)
1628               {
1629                 pos -= len;
1630                 break;
1631               }
1632
1633             pos--;
1634           }
1635
1636         n++;
1637       }
1638
1639  stop:
1640   if (n == 0)
1641     {
1642       if (forward)
1643         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1644       else
1645         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1646
1647       return pos;
1648     }
1649   else if (n > 0)
1650     return -n;
1651   else
1652     return n;
1653 }
1654 \f
1655 /* Do Boyer-Moore search N times for the string BASE_PAT,
1656    whose length is LEN/LEN_BYTE,
1657    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1658    DIRECTION says which direction we search in.
1659    TRT and INVERSE_TRT are translation tables.
1660    Characters in PAT are already translated by TRT.
1661
1662    This kind of search works if all the characters in BASE_PAT that
1663    have nontrivial translation are the same aside from the last byte.
1664    This makes it possible to translate just the last byte of a
1665    character, and do so after just a simple test of the context.
1666    CHARSET_BASE is nonzero if there is such a non-ASCII character.
1667
1668    If that criterion is not satisfied, do not call this function.  */
1669
1670 static int
1671 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1672              pos, pos_byte, lim, lim_byte, charset_base)
1673      int n;
1674      unsigned char *base_pat;
1675      int len, len_byte;
1676      Lisp_Object trt;
1677      Lisp_Object inverse_trt;
1678      int pos, pos_byte;
1679      int lim, lim_byte;
1680      int charset_base;
1681 {
1682   int direction = ((n > 0) ? 1 : -1);
1683   register int dirlen;
1684   int infinity, limit, stride_for_teases = 0;
1685   register int *BM_tab;
1686   int *BM_tab_base;
1687   register unsigned char *cursor, *p_limit;
1688   register int i, j;
1689   unsigned char *pat, *pat_end;
1690   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1691
1692   unsigned char simple_translate[0400];
1693   /* These are set to the preceding bytes of a byte to be translated
1694      if charset_base is nonzero.  As the maximum byte length of a
1695      multibyte character is 4, we have to check at most three previous
1696      bytes.  */
1697   int translate_prev_byte1 = 0;
1698   int translate_prev_byte2 = 0;
1699   int translate_prev_byte3 = 0;
1700
1701   BM_tab = (int *) alloca (0400 * sizeof (int));
1702
1703   /* The general approach is that we are going to maintain that we know */
1704   /* the first (closest to the present position, in whatever direction */
1705   /* we're searching) character that could possibly be the last */
1706   /* (furthest from present position) character of a valid match.  We */
1707   /* advance the state of our knowledge by looking at that character */
1708   /* and seeing whether it indeed matches the last character of the */
1709   /* pattern.  If it does, we take a closer look.  If it does not, we */
1710   /* move our pointer (to putative last characters) as far as is */
1711   /* logically possible.  This amount of movement, which I call a */
1712   /* stride, will be the length of the pattern if the actual character */
1713   /* appears nowhere in the pattern, otherwise it will be the distance */
1714   /* from the last occurrence of that character to the end of the */
1715   /* pattern. */
1716   /* As a coding trick, an enormous stride is coded into the table for */
1717   /* characters that match the last character.  This allows use of only */
1718   /* a single test, a test for having gone past the end of the */
1719   /* permissible match region, to test for both possible matches (when */
1720   /* the stride goes past the end immediately) and failure to */
1721   /* match (where you get nudged past the end one stride at a time). */
1722
1723   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1724   /* is determined only by the last character of the putative match. */
1725   /* If that character does not match, we will stride the proper */
1726   /* distance to propose a match that superimposes it on the last */
1727   /* instance of a character that matches it (per trt), or misses */
1728   /* it entirely if there is none. */
1729
1730   dirlen = len_byte * direction;
1731   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1732
1733   /* Record position after the end of the pattern.  */
1734   pat_end = base_pat + len_byte;
1735   /* BASE_PAT points to a character that we start scanning from.
1736      It is the first character in a forward search,
1737      the last character in a backward search.  */
1738   if (direction < 0)
1739     base_pat = pat_end - 1;
1740
1741   BM_tab_base = BM_tab;
1742   BM_tab += 0400;
1743   j = dirlen;           /* to get it in a register */
1744   /* A character that does not appear in the pattern induces a */
1745   /* stride equal to the pattern length. */
1746   while (BM_tab_base != BM_tab)
1747     {
1748       *--BM_tab = j;
1749       *--BM_tab = j;
1750       *--BM_tab = j;
1751       *--BM_tab = j;
1752     }
1753
1754   /* We use this for translation, instead of TRT itself.
1755      We fill this in to handle the characters that actually
1756      occur in the pattern.  Others don't matter anyway!  */
1757   bzero (simple_translate, sizeof simple_translate);
1758   for (i = 0; i < 0400; i++)
1759     simple_translate[i] = i;
1760
1761   if (charset_base)
1762     {
1763       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1764          byte following them are the target of translation.  */
1765       int sample_char = charset_base | 0x20;
1766       unsigned char str[MAX_MULTIBYTE_LENGTH];
1767       int len = CHAR_STRING (sample_char, str);
1768
1769       translate_prev_byte1 = str[len - 2];
1770       if (len > 2)
1771         {
1772           translate_prev_byte2 = str[len - 3];
1773           if (len > 3)
1774             translate_prev_byte3 = str[len - 4];
1775         }
1776     }
1777
1778   i = 0;
1779   while (i != infinity)
1780     {
1781       unsigned char *ptr = base_pat + i;
1782       i += direction;
1783       if (i == dirlen)
1784         i = infinity;
1785       if (! NILP (trt))
1786         {
1787           /* If the byte currently looking at is the last of a
1788              character to check case-equivalents, set CH to that
1789              character.  An ASCII character and a non-ASCII character
1790              matching with CHARSET_BASE are to be checked.  */
1791           int ch = -1;
1792
1793           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1794             ch = *ptr;
1795           else if (charset_base
1796                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1797             {
1798               unsigned char *charstart = ptr - 1;
1799
1800               while (! (CHAR_HEAD_P (*charstart)))
1801                 charstart--;
1802               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1803               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1804                 ch = -1;
1805             }
1806
1807           if (ch >= 0400)
1808             j = ((unsigned char) ch) | 0200;
1809           else
1810             j = *ptr;
1811
1812           if (i == infinity)
1813             stride_for_teases = BM_tab[j];
1814
1815           BM_tab[j] = dirlen - i;
1816           /* A translation table is accompanied by its inverse -- see */
1817           /* comment following downcase_table for details */
1818           if (ch >= 0)
1819             {
1820               int starting_ch = ch;
1821               int starting_j = j;
1822
1823               while (1)
1824                 {
1825                   TRANSLATE (ch, inverse_trt, ch);
1826                   if (ch >= 0400)
1827                     j = ((unsigned char) ch) | 0200;
1828                   else
1829                     j = (unsigned char) ch;
1830
1831                   /* For all the characters that map into CH,
1832                      set up simple_translate to map the last byte
1833                      into STARTING_J.  */
1834                   simple_translate[j] = starting_j;
1835                   if (ch == starting_ch)
1836                     break;
1837                   BM_tab[j] = dirlen - i;
1838                 }
1839             }
1840         }
1841       else
1842         {
1843           j = *ptr;
1844
1845           if (i == infinity)
1846             stride_for_teases = BM_tab[j];
1847           BM_tab[j] = dirlen - i;
1848         }
1849       /* stride_for_teases tells how much to stride if we get a */
1850       /* match on the far character but are subsequently */
1851       /* disappointed, by recording what the stride would have been */
1852       /* for that character if the last character had been */
1853       /* different. */
1854     }
1855   infinity = dirlen - infinity;
1856   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1857   /* loop invariant - POS_BYTE points at where last char (first
1858      char if reverse) of pattern would align in a possible match.  */
1859   while (n != 0)
1860     {
1861       int tail_end;
1862       unsigned char *tail_end_ptr;
1863
1864       /* It's been reported that some (broken) compiler thinks that
1865          Boolean expressions in an arithmetic context are unsigned.
1866          Using an explicit ?1:0 prevents this.  */
1867       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1868           < 0)
1869         return (n * (0 - direction));
1870       /* First we do the part we can by pointers (maybe nothing) */
1871       QUIT;
1872       pat = base_pat;
1873       limit = pos_byte - dirlen + direction;
1874       if (direction > 0)
1875         {
1876           limit = BUFFER_CEILING_OF (limit);
1877           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1878              can take on without hitting edge of buffer or the gap.  */
1879           limit = min (limit, pos_byte + 20000);
1880           limit = min (limit, lim_byte - 1);
1881         }
1882       else
1883         {
1884           limit = BUFFER_FLOOR_OF (limit);
1885           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1886              can take on without hitting edge of buffer or the gap.  */
1887           limit = max (limit, pos_byte - 20000);
1888           limit = max (limit, lim_byte);
1889         }
1890       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1891       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1892
1893       if ((limit - pos_byte) * direction > 20)
1894         {
1895           unsigned char *p2;
1896
1897           p_limit = BYTE_POS_ADDR (limit);
1898           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1899           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1900           while (1)             /* use one cursor setting as long as i can */
1901             {
1902               if (direction > 0) /* worth duplicating */
1903                 {
1904                   /* Use signed comparison if appropriate
1905                      to make cursor+infinity sure to be > p_limit.
1906                      Assuming that the buffer lies in a range of addresses
1907                      that are all "positive" (as ints) or all "negative",
1908                      either kind of comparison will work as long
1909                      as we don't step by infinity.  So pick the kind
1910                      that works when we do step by infinity.  */
1911                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1912                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1913                       cursor += BM_tab[*cursor];
1914                   else
1915                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1916                       cursor += BM_tab[*cursor];
1917                 }
1918               else
1919                 {
1920                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1921                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1922                       cursor += BM_tab[*cursor];
1923                   else
1924                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1925                       cursor += BM_tab[*cursor];
1926                 }
1927 /* If you are here, cursor is beyond the end of the searched region. */
1928 /* This can happen if you match on the far character of the pattern, */
1929 /* because the "stride" of that character is infinity, a number able */
1930 /* to throw you well beyond the end of the search.  It can also */
1931 /* happen if you fail to match within the permitted region and would */
1932 /* otherwise try a character beyond that region */
1933               if ((cursor - p_limit) * direction <= len_byte)
1934                 break;  /* a small overrun is genuine */
1935               cursor -= infinity; /* large overrun = hit */
1936               i = dirlen - direction;
1937               if (! NILP (trt))
1938                 {
1939                   while ((i -= direction) + direction != 0)
1940                     {
1941                       int ch;
1942                       cursor -= direction;
1943                       /* Translate only the last byte of a character.  */
1944                       if (! multibyte
1945                           || ((cursor == tail_end_ptr
1946                                || CHAR_HEAD_P (cursor[1]))
1947                               && (CHAR_HEAD_P (cursor[0])
1948                                   /* Check if this is the last byte of
1949                                      a translable character.  */
1950                                   || (translate_prev_byte1 == cursor[-1]
1951                                       && (CHAR_HEAD_P (translate_prev_byte1)
1952                                           || (translate_prev_byte2 == cursor[-2]
1953                                               && (CHAR_HEAD_P (translate_prev_byte2)
1954                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1955                         ch = simple_translate[*cursor];
1956                       else
1957                         ch = *cursor;
1958                       if (pat[i] != ch)
1959                         break;
1960                     }
1961                 }
1962               else
1963                 {
1964                   while ((i -= direction) + direction != 0)
1965                     {
1966                       cursor -= direction;
1967                       if (pat[i] != *cursor)
1968                         break;
1969                     }
1970                 }
1971               cursor += dirlen - i - direction; /* fix cursor */
1972               if (i + direction == 0)
1973                 {
1974                   int position, start, end;
1975
1976                   cursor -= direction;
1977
1978                   position = pos_byte + cursor - p2 + ((direction > 0)
1979                                                        ? 1 - len_byte : 0);
1980                   set_search_regs (position, len_byte);
1981
1982                   if (NILP (Vinhibit_changing_match_data))
1983                     {
1984                       start = search_regs.start[0];
1985                       end = search_regs.end[0];
1986                     }
1987                   else
1988                     /* If Vinhibit_changing_match_data is non-nil,
1989                        search_regs will not be changed.  So let's
1990                        compute start and end here.  */
1991                     {
1992                       start = BYTE_TO_CHAR (position);
1993                       end = BYTE_TO_CHAR (position + len_byte);
1994                     }
1995
1996                   if ((n -= direction) != 0)
1997                     cursor += dirlen; /* to resume search */
1998                   else
1999                     return direction > 0 ? end : start;
2000                 }
2001               else
2002                 cursor += stride_for_teases; /* <sigh> we lose -  */
2003             }
2004           pos_byte += cursor - p2;
2005         }
2006       else
2007         /* Now we'll pick up a clump that has to be done the hard */
2008         /* way because it covers a discontinuity */
2009         {
2010           limit = ((direction > 0)
2011                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2012                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2013           limit = ((direction > 0)
2014                    ? min (limit + len_byte, lim_byte - 1)
2015                    : max (limit - len_byte, lim_byte));
2016           /* LIMIT is now the last value POS_BYTE can have
2017              and still be valid for a possible match.  */
2018           while (1)
2019             {
2020               /* This loop can be coded for space rather than */
2021               /* speed because it will usually run only once. */
2022               /* (the reach is at most len + 21, and typically */
2023               /* does not exceed len) */
2024               while ((limit - pos_byte) * direction >= 0)
2025                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2026               /* now run the same tests to distinguish going off the */
2027               /* end, a match or a phony match. */
2028               if ((pos_byte - limit) * direction <= len_byte)
2029                 break;  /* ran off the end */
2030               /* Found what might be a match.
2031                  Set POS_BYTE back to last (first if reverse) pos.  */
2032               pos_byte -= infinity;
2033               i = dirlen - direction;
2034               while ((i -= direction) + direction != 0)
2035                 {
2036                   int ch;
2037                   unsigned char *ptr;
2038                   pos_byte -= direction;
2039                   ptr = BYTE_POS_ADDR (pos_byte);
2040                   /* Translate only the last byte of a character.  */
2041                   if (! multibyte
2042                       || ((ptr == tail_end_ptr
2043                            || CHAR_HEAD_P (ptr[1]))
2044                           && (CHAR_HEAD_P (ptr[0])
2045                               /* Check if this is the last byte of a
2046                                  translable character.  */
2047                               || (translate_prev_byte1 == ptr[-1]
2048                                   && (CHAR_HEAD_P (translate_prev_byte1)
2049                                       || (translate_prev_byte2 == ptr[-2]
2050                                           && (CHAR_HEAD_P (translate_prev_byte2)
2051                                               || translate_prev_byte3 == ptr[-3])))))))
2052                     ch = simple_translate[*ptr];
2053                   else
2054                     ch = *ptr;
2055                   if (pat[i] != ch)
2056                     break;
2057                 }
2058               /* Above loop has moved POS_BYTE part or all the way
2059                  back to the first pos (last pos if reverse).
2060                  Set it once again at the last (first if reverse) char.  */
2061               pos_byte += dirlen - i- direction;
2062               if (i + direction == 0)
2063                 {
2064                   int position, start, end;
2065                   pos_byte -= direction;
2066
2067                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2068                   set_search_regs (position, len_byte);
2069
2070                   if (NILP (Vinhibit_changing_match_data))
2071                     {
2072                       start = search_regs.start[0];
2073                       end = search_regs.end[0];
2074                     }
2075                   else
2076                     /* If Vinhibit_changing_match_data is non-nil,
2077                        search_regs will not be changed.  So let's
2078                        compute start and end here.  */
2079                     {
2080                       start = BYTE_TO_CHAR (position);
2081                       end = BYTE_TO_CHAR (position + len_byte);
2082                     }
2083
2084                   if ((n -= direction) != 0)
2085                     pos_byte += dirlen; /* to resume search */
2086                   else
2087                     return direction > 0 ? end : start;
2088                 }
2089               else
2090                 pos_byte += stride_for_teases;
2091             }
2092           }
2093       /* We have done one clump.  Can we continue? */
2094       if ((lim_byte - pos_byte) * direction < 0)
2095         return ((0 - n) * direction);
2096     }
2097   return BYTE_TO_CHAR (pos_byte);
2098 }
2099
2100 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2101    for the overall match just found in the current buffer.
2102    Also clear out the match data for registers 1 and up.  */
2103
2104 static void
2105 set_search_regs (beg_byte, nbytes)
2106      int beg_byte, nbytes;
2107 {
2108   int i;
2109
2110   if (!NILP (Vinhibit_changing_match_data))
2111     return;
2112
2113   /* Make sure we have registers in which to store
2114      the match position.  */
2115   if (search_regs.num_regs == 0)
2116     {
2117       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2118       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2119       search_regs.num_regs = 2;
2120     }
2121
2122   /* Clear out the other registers.  */
2123   for (i = 1; i < search_regs.num_regs; i++)
2124     {
2125       search_regs.start[i] = -1;
2126       search_regs.end[i] = -1;
2127     }
2128
2129   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2130   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2131   XSETBUFFER (last_thing_searched, current_buffer);
2132 }
2133 \f
2134 /* Given a string of words separated by word delimiters,
2135   compute a regexp that matches those exact words
2136   separated by arbitrary punctuation.  */
2137
2138 static Lisp_Object
2139 wordify (string)
2140      Lisp_Object string;
2141 {
2142   register unsigned char *p, *o;
2143   register int i, i_byte, len, punct_count = 0, word_count = 0;
2144   Lisp_Object val;
2145   int prev_c = 0;
2146   int adjust;
2147
2148   CHECK_STRING (string);
2149   p = SDATA (string);
2150   len = SCHARS (string);
2151
2152   for (i = 0, i_byte = 0; i < len; )
2153     {
2154       int c;
2155
2156       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2157
2158       if (SYNTAX (c) != Sword)
2159         {
2160           punct_count++;
2161           if (i > 0 && SYNTAX (prev_c) == Sword)
2162             word_count++;
2163         }
2164
2165       prev_c = c;
2166     }
2167
2168   if (SYNTAX (prev_c) == Sword)
2169     word_count++;
2170   if (!word_count)
2171     return empty_unibyte_string;
2172
2173   adjust = - punct_count + 5 * (word_count - 1) + 4;
2174   if (STRING_MULTIBYTE (string))
2175     val = make_uninit_multibyte_string (len + adjust,
2176                                         SBYTES (string)
2177                                         + adjust);
2178   else
2179     val = make_uninit_string (len + adjust);
2180
2181   o = SDATA (val);
2182   *o++ = '\\';
2183   *o++ = 'b';
2184   prev_c = 0;
2185
2186   for (i = 0, i_byte = 0; i < len; )
2187     {
2188       int c;
2189       int i_byte_orig = i_byte;
2190
2191       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2192
2193       if (SYNTAX (c) == Sword)
2194         {
2195           bcopy (SDATA (string) + i_byte_orig, o,
2196                  i_byte - i_byte_orig);
2197           o += i_byte - i_byte_orig;
2198         }
2199       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2200         {
2201           *o++ = '\\';
2202           *o++ = 'W';
2203           *o++ = '\\';
2204           *o++ = 'W';
2205           *o++ = '*';
2206         }
2207
2208       prev_c = c;
2209     }
2210
2211   *o++ = '\\';
2212   *o++ = 'b';
2213
2214   return val;
2215 }
2216 \f
2217 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2218        "MSearch backward: ",
2219        doc: /* Search backward from point for STRING.
2220 Set point to the beginning of the occurrence found, and return point.
2221 An optional second argument bounds the search; it is a buffer position.
2222 The match found must not extend before that position.
2223 Optional third argument, if t, means if fail just return nil (no error).
2224  If not nil and not t, position at limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2226
2227 Search case-sensitivity is determined by the value of the variable
2228 `case-fold-search', which see.
2229
2230 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2231      (string, bound, noerror, count)
2232      Lisp_Object string, bound, noerror, count;
2233 {
2234   return search_command (string, bound, noerror, count, -1, 0, 0);
2235 }
2236
2237 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2238        doc: /* Search forward from point for STRING.
2239 Set point to the end of the occurrence found, and return point.
2240 An optional second argument bounds the search; it is a buffer position.
2241 The match found must not extend after that position.  A value of nil is
2242   equivalent to (point-max).
2243 Optional third argument, if t, means if fail just return nil (no error).
2244   If not nil and not t, move to limit of search and return nil.
2245 Optional fourth argument is repeat count--search for successive occurrences.
2246
2247 Search case-sensitivity is determined by the value of the variable
2248 `case-fold-search', which see.
2249
2250 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2251      (string, bound, noerror, count)
2252      Lisp_Object string, bound, noerror, count;
2253 {
2254   return search_command (string, bound, noerror, count, 1, 0, 0);
2255 }
2256
2257 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2258        "sWord search backward: ",
2259        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2260 Set point to the beginning of the occurrence found, and return point.
2261 An optional second argument bounds the search; it is a buffer position.
2262 The match found must not extend before that position.
2263 Optional third argument, if t, means if fail just return nil (no error).
2264   If not nil and not t, move to limit of search and return nil.
2265 Optional fourth argument is repeat count--search for successive occurrences.  */)
2266      (string, bound, noerror, count)
2267      Lisp_Object string, bound, noerror, count;
2268 {
2269   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2270 }
2271
2272 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2273        "sWord search: ",
2274        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2275 Set point to the end of the occurrence found, and return point.
2276 An optional second argument bounds the search; it is a buffer position.
2277 The match found must not extend after that position.
2278 Optional third argument, if t, means if fail just return nil (no error).
2279   If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences.  */)
2281      (string, bound, noerror, count)
2282      Lisp_Object string, bound, noerror, count;
2283 {
2284   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2285 }
2286
2287 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2288        "sRE search backward: ",
2289        doc: /* Search backward from point for match for regular expression REGEXP.
2290 Set point to the beginning of the match, and return point.
2291 The match found is the one starting last in the buffer
2292 and yet ending before the origin of the search.
2293 An optional second argument bounds the search; it is a buffer position.
2294 The match found must start at or after that position.
2295 Optional third argument, if t, means if fail just return nil (no error).
2296   If not nil and not t, move to limit of search and return nil.
2297 Optional fourth argument is repeat count--search for successive occurrences.
2298 See also the functions `match-beginning', `match-end', `match-string',
2299 and `replace-match'.  */)
2300      (regexp, bound, noerror, count)
2301      Lisp_Object regexp, bound, noerror, count;
2302 {
2303   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2304 }
2305
2306 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2307        "sRE search: ",
2308        doc: /* Search forward from point for regular expression REGEXP.
2309 Set point to the end of the occurrence found, and return point.
2310 An optional second argument bounds the search; it is a buffer position.
2311 The match found must not extend after that position.
2312 Optional third argument, if t, means if fail just return nil (no error).
2313   If not nil and not t, move to limit of search and return nil.
2314 Optional fourth argument is repeat count--search for successive occurrences.
2315 See also the functions `match-beginning', `match-end', `match-string',
2316 and `replace-match'.  */)
2317      (regexp, bound, noerror, count)
2318      Lisp_Object regexp, bound, noerror, count;
2319 {
2320   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2321 }
2322
2323 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2324        "sPosix search backward: ",
2325        doc: /* Search backward from point for match for regular expression REGEXP.
2326 Find the longest match in accord with Posix regular expression rules.
2327 Set point to the beginning of the match, and return point.
2328 The match found is the one starting last in the buffer
2329 and yet ending before the origin of the search.
2330 An optional second argument bounds the search; it is a buffer position.
2331 The match found must start at or after that position.
2332 Optional third argument, if t, means if fail just return nil (no error).
2333   If not nil and not t, move to limit of search and return nil.
2334 Optional fourth argument is repeat count--search for successive occurrences.
2335 See also the functions `match-beginning', `match-end', `match-string',
2336 and `replace-match'.  */)
2337      (regexp, bound, noerror, count)
2338      Lisp_Object regexp, bound, noerror, count;
2339 {
2340   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2341 }
2342
2343 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2344        "sPosix search: ",
2345        doc: /* Search forward from point for regular expression REGEXP.
2346 Find the longest match in accord with Posix regular expression rules.
2347 Set point to the end of the occurrence found, and return point.
2348 An optional second argument bounds the search; it is a buffer position.
2349 The match found must not extend after that position.
2350 Optional third argument, if t, means if fail just return nil (no error).
2351   If not nil and not t, move to limit of search and return nil.
2352 Optional fourth argument is repeat count--search for successive occurrences.
2353 See also the functions `match-beginning', `match-end', `match-string',
2354 and `replace-match'.  */)
2355      (regexp, bound, noerror, count)
2356      Lisp_Object regexp, bound, noerror, count;
2357 {
2358   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2359 }
2360 \f
2361 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2362        doc: /* Replace text matched by last search with NEWTEXT.
2363 Leave point at the end of the replacement text.
2364
2365 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2366 Otherwise maybe capitalize the whole text, or maybe just word initials,
2367 based on the replaced text.
2368 If the replaced text has only capital letters
2369 and has at least one multiletter word, convert NEWTEXT to all caps.
2370 Otherwise if all words are capitalized in the replaced text,
2371 capitalize each word in NEWTEXT.
2372
2373 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2374 Otherwise treat `\\' as special:
2375   `\\&' in NEWTEXT means substitute original matched text.
2376   `\\N' means substitute what matched the Nth `\\(...\\)'.
2377        If Nth parens didn't match, substitute nothing.
2378   `\\\\' means insert one `\\'.
2379 Case conversion does not apply to these substitutions.
2380
2381 FIXEDCASE and LITERAL are optional arguments.
2382
2383 The optional fourth argument STRING can be a string to modify.
2384 This is meaningful when the previous match was done against STRING,
2385 using `string-match'.  When used this way, `replace-match'
2386 creates and returns a new string made by copying STRING and replacing
2387 the part of STRING that was matched.
2388
2389 The optional fifth argument SUBEXP specifies a subexpression;
2390 it says to replace just that subexpression with NEWTEXT,
2391 rather than replacing the entire matched text.
2392 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2393 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2394 NEWTEXT in place of subexp N.
2395 This is useful only after a regular expression search or match,
2396 since only regular expressions have distinguished subexpressions.  */)
2397      (newtext, fixedcase, literal, string, subexp)
2398      Lisp_Object newtext, fixedcase, literal, string, subexp;
2399 {
2400   enum { nochange, all_caps, cap_initial } case_action;
2401   register int pos, pos_byte;
2402   int some_multiletter_word;
2403   int some_lowercase;
2404   int some_uppercase;
2405   int some_nonuppercase_initial;
2406   register int c, prevc;
2407   int sub;
2408   int opoint, newpoint;
2409
2410   CHECK_STRING (newtext);
2411
2412   if (! NILP (string))
2413     CHECK_STRING (string);
2414
2415   case_action = nochange;       /* We tried an initialization */
2416                                 /* but some C compilers blew it */
2417
2418   if (search_regs.num_regs <= 0)
2419     error ("`replace-match' called before any match found");
2420
2421   if (NILP (subexp))
2422     sub = 0;
2423   else
2424     {
2425       CHECK_NUMBER (subexp);
2426       sub = XINT (subexp);
2427       if (sub < 0 || sub >= search_regs.num_regs)
2428         args_out_of_range (subexp, make_number (search_regs.num_regs));
2429     }
2430
2431   if (NILP (string))
2432     {
2433       if (search_regs.start[sub] < BEGV
2434           || search_regs.start[sub] > search_regs.end[sub]
2435           || search_regs.end[sub] > ZV)
2436         args_out_of_range (make_number (search_regs.start[sub]),
2437                            make_number (search_regs.end[sub]));
2438     }
2439   else
2440     {
2441       if (search_regs.start[sub] < 0
2442           || search_regs.start[sub] > search_regs.end[sub]
2443           || search_regs.end[sub] > SCHARS (string))
2444         args_out_of_range (make_number (search_regs.start[sub]),
2445                            make_number (search_regs.end[sub]));
2446     }
2447
2448   if (NILP (fixedcase))
2449     {
2450       /* Decide how to casify by examining the matched text. */
2451       int last;
2452
2453       pos = search_regs.start[sub];
2454       last = search_regs.end[sub];
2455
2456       if (NILP (string))
2457         pos_byte = CHAR_TO_BYTE (pos);
2458       else
2459         pos_byte = string_char_to_byte (string, pos);
2460
2461       prevc = '\n';
2462       case_action = all_caps;
2463
2464       /* some_multiletter_word is set nonzero if any original word
2465          is more than one letter long. */
2466       some_multiletter_word = 0;
2467       some_lowercase = 0;
2468       some_nonuppercase_initial = 0;
2469       some_uppercase = 0;
2470
2471       while (pos < last)
2472         {
2473           if (NILP (string))
2474             {
2475               c = FETCH_CHAR (pos_byte);
2476               INC_BOTH (pos, pos_byte);
2477             }
2478           else
2479             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2480
2481           if (LOWERCASEP (c))
2482             {
2483               /* Cannot be all caps if any original char is lower case */
2484
2485               some_lowercase = 1;
2486               if (SYNTAX (prevc) != Sword)
2487                 some_nonuppercase_initial = 1;
2488               else
2489                 some_multiletter_word = 1;
2490             }
2491           else if (UPPERCASEP (c))
2492             {
2493               some_uppercase = 1;
2494               if (SYNTAX (prevc) != Sword)
2495                 ;
2496               else
2497                 some_multiletter_word = 1;
2498             }
2499           else
2500             {
2501               /* If the initial is a caseless word constituent,
2502                  treat that like a lowercase initial.  */
2503               if (SYNTAX (prevc) != Sword)
2504                 some_nonuppercase_initial = 1;
2505             }
2506
2507           prevc = c;
2508         }
2509
2510       /* Convert to all caps if the old text is all caps
2511          and has at least one multiletter word.  */
2512       if (! some_lowercase && some_multiletter_word)
2513         case_action = all_caps;
2514       /* Capitalize each word, if the old text has all capitalized words.  */
2515       else if (!some_nonuppercase_initial && some_multiletter_word)
2516         case_action = cap_initial;
2517       else if (!some_nonuppercase_initial && some_uppercase)
2518         /* Should x -> yz, operating on X, give Yz or YZ?
2519            We'll assume the latter.  */
2520         case_action = all_caps;
2521       else
2522         case_action = nochange;
2523     }
2524
2525   /* Do replacement in a string.  */
2526   if (!NILP (string))
2527     {
2528       Lisp_Object before, after;
2529
2530       before = Fsubstring (string, make_number (0),
2531                            make_number (search_regs.start[sub]));
2532       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2533
2534       /* Substitute parts of the match into NEWTEXT
2535          if desired.  */
2536       if (NILP (literal))
2537         {
2538           int lastpos = 0;
2539           int lastpos_byte = 0;
2540           /* We build up the substituted string in ACCUM.  */
2541           Lisp_Object accum;
2542           Lisp_Object middle;
2543           int length = SBYTES (newtext);
2544
2545           accum = Qnil;
2546
2547           for (pos_byte = 0, pos = 0; pos_byte < length;)
2548             {
2549               int substart = -1;
2550               int subend = 0;
2551               int delbackslash = 0;
2552
2553               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2554
2555               if (c == '\\')
2556                 {
2557                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2558
2559                   if (c == '&')
2560                     {
2561                       substart = search_regs.start[sub];
2562                       subend = search_regs.end[sub];
2563                     }
2564                   else if (c >= '1' && c <= '9')
2565                     {
2566                       if (search_regs.start[c - '0'] >= 0
2567                           && c <= search_regs.num_regs + '0')
2568                         {
2569                           substart = search_regs.start[c - '0'];
2570                           subend = search_regs.end[c - '0'];
2571                         }
2572                       else
2573                         {
2574                           /* If that subexp did not match,
2575                              replace \\N with nothing.  */
2576                           substart = 0;
2577                           subend = 0;
2578                         }
2579                     }
2580                   else if (c == '\\')
2581                     delbackslash = 1;
2582                   else
2583                     error ("Invalid use of `\\' in replacement text");
2584                 }
2585               if (substart >= 0)
2586                 {
2587                   if (pos - 2 != lastpos)
2588                     middle = substring_both (newtext, lastpos,
2589                                              lastpos_byte,
2590                                              pos - 2, pos_byte - 2);
2591                   else
2592                     middle = Qnil;
2593                   accum = concat3 (accum, middle,
2594                                    Fsubstring (string,
2595                                                make_number (substart),
2596                                                make_number (subend)));
2597                   lastpos = pos;
2598                   lastpos_byte = pos_byte;
2599                 }
2600               else if (delbackslash)
2601                 {
2602                   middle = substring_both (newtext, lastpos,
2603                                            lastpos_byte,
2604                                            pos - 1, pos_byte - 1);
2605
2606                   accum = concat2 (accum, middle);
2607                   lastpos = pos;
2608                   lastpos_byte = pos_byte;
2609                 }
2610             }
2611
2612           if (pos != lastpos)
2613             middle = substring_both (newtext, lastpos,
2614                                      lastpos_byte,
2615                                      pos, pos_byte);
2616           else
2617             middle = Qnil;
2618
2619           newtext = concat2 (accum, middle);
2620         }
2621
2622       /* Do case substitution in NEWTEXT if desired.  */
2623       if (case_action == all_caps)
2624         newtext = Fupcase (newtext);
2625       else if (case_action == cap_initial)
2626         newtext = Fupcase_initials (newtext);
2627
2628       return concat3 (before, newtext, after);
2629     }
2630
2631   /* Record point, then move (quietly) to the start of the match.  */
2632   if (PT >= search_regs.end[sub])
2633     opoint = PT - ZV;
2634   else if (PT > search_regs.start[sub])
2635     opoint = search_regs.end[sub] - ZV;
2636   else
2637     opoint = PT;
2638
2639   /* If we want non-literal replacement,
2640      perform substitution on the replacement string.  */
2641   if (NILP (literal))
2642     {
2643       int length = SBYTES (newtext);
2644       unsigned char *substed;
2645       int substed_alloc_size, substed_len;
2646       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2647       int str_multibyte = STRING_MULTIBYTE (newtext);
2648       Lisp_Object rev_tbl;
2649       int really_changed = 0;
2650
2651       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2652                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2653                                           make_number (0))
2654                 : Qnil);
2655
2656       substed_alloc_size = length * 2 + 100;
2657       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2658       substed_len = 0;
2659
2660       /* Go thru NEWTEXT, producing the actual text to insert in
2661          SUBSTED while adjusting multibyteness to that of the current
2662          buffer.  */
2663
2664       for (pos_byte = 0, pos = 0; pos_byte < length;)
2665         {
2666           unsigned char str[MAX_MULTIBYTE_LENGTH];
2667           unsigned char *add_stuff = NULL;
2668           int add_len = 0;
2669           int idx = -1;
2670
2671           if (str_multibyte)
2672             {
2673               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2674               if (!buf_multibyte)
2675                 c = multibyte_char_to_unibyte (c, rev_tbl);
2676             }
2677           else
2678             {
2679               /* Note that we don't have to increment POS.  */
2680               c = SREF (newtext, pos_byte++);
2681               if (buf_multibyte)
2682                 c = unibyte_char_to_multibyte (c);
2683             }
2684
2685           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2686              or set IDX to a match index, which means put that part
2687              of the buffer text into SUBSTED.  */
2688
2689           if (c == '\\')
2690             {
2691               really_changed = 1;
2692
2693               if (str_multibyte)
2694                 {
2695                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2696                                                       pos, pos_byte);
2697                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2698                     c = multibyte_char_to_unibyte (c, rev_tbl);
2699                 }
2700               else
2701                 {
2702                   c = SREF (newtext, pos_byte++);
2703                   if (buf_multibyte)
2704                     c = unibyte_char_to_multibyte (c);
2705                 }
2706
2707               if (c == '&')
2708                 idx = sub;
2709               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2710                 {
2711                   if (search_regs.start[c - '0'] >= 1)
2712                     idx = c - '0';
2713                 }
2714               else if (c == '\\')
2715                 add_len = 1, add_stuff = "\\";
2716               else
2717                 {
2718                   xfree (substed);
2719                   error ("Invalid use of `\\' in replacement text");
2720                 }
2721             }
2722           else
2723             {
2724               add_len = CHAR_STRING (c, str);
2725               add_stuff = str;
2726             }
2727
2728           /* If we want to copy part of a previous match,
2729              set up ADD_STUFF and ADD_LEN to point to it.  */
2730           if (idx >= 0)
2731             {
2732               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2733               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2734               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2735                 move_gap (search_regs.start[idx]);
2736               add_stuff = BYTE_POS_ADDR (begbyte);
2737             }
2738
2739           /* Now the stuff we want to add to SUBSTED
2740              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2741
2742           /* Make sure SUBSTED is big enough.  */
2743           if (substed_len + add_len >= substed_alloc_size)
2744             {
2745               substed_alloc_size = substed_len + add_len + 500;
2746               substed = (unsigned char *) xrealloc (substed,
2747                                                     substed_alloc_size + 1);
2748             }
2749
2750           /* Now add to the end of SUBSTED.  */
2751           if (add_stuff)
2752             {
2753               bcopy (add_stuff, substed + substed_len, add_len);
2754               substed_len += add_len;
2755             }
2756         }
2757
2758       if (really_changed)
2759         {
2760           if (buf_multibyte)
2761             {
2762               int nchars = multibyte_chars_in_text (substed, substed_len);
2763
2764               newtext = make_multibyte_string (substed, nchars, substed_len);
2765             }
2766           else
2767             newtext = make_unibyte_string (substed, substed_len);
2768         }
2769       xfree (substed);
2770     }
2771
2772   /* Replace the old text with the new in the cleanest possible way.  */
2773   replace_range (search_regs.start[sub], search_regs.end[sub],
2774                  newtext, 1, 0, 1);
2775   newpoint = search_regs.start[sub] + SCHARS (newtext);
2776
2777   if (case_action == all_caps)
2778     Fupcase_region (make_number (search_regs.start[sub]),
2779                     make_number (newpoint));
2780   else if (case_action == cap_initial)
2781     Fupcase_initials_region (make_number (search_regs.start[sub]),
2782                              make_number (newpoint));
2783
2784   /* Adjust search data for this change.  */
2785   {
2786     int oldend = search_regs.end[sub];
2787     int oldstart = search_regs.start[sub];
2788     int change = newpoint - search_regs.end[sub];
2789     int i;
2790
2791     for (i = 0; i < search_regs.num_regs; i++)
2792       {
2793         if (search_regs.start[i] >= oldend)
2794           search_regs.start[i] += change;
2795         else if (search_regs.start[i] > oldstart)
2796           search_regs.start[i] = oldstart;
2797         if (search_regs.end[i] >= oldend)
2798           search_regs.end[i] += change;
2799         else if (search_regs.end[i] > oldstart)
2800           search_regs.end[i] = oldstart;
2801       }
2802   }
2803
2804   /* Put point back where it was in the text.  */
2805   if (opoint <= 0)
2806     TEMP_SET_PT (opoint + ZV);
2807   else
2808     TEMP_SET_PT (opoint);
2809
2810   /* Now move point "officially" to the start of the inserted replacement.  */
2811   move_if_not_intangible (newpoint);
2812
2813   return Qnil;
2814 }
2815 \f
2816 static Lisp_Object
2817 match_limit (num, beginningp)
2818      Lisp_Object num;
2819      int beginningp;
2820 {
2821   register int n;
2822
2823   CHECK_NUMBER (num);
2824   n = XINT (num);
2825   if (n < 0)
2826     args_out_of_range (num, make_number (0));
2827   if (search_regs.num_regs <= 0)
2828     error ("No match data, because no search succeeded");
2829   if (n >= search_regs.num_regs
2830       || search_regs.start[n] < 0)
2831     return Qnil;
2832   return (make_number ((beginningp) ? search_regs.start[n]
2833                                     : search_regs.end[n]));
2834 }
2835
2836 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2837        doc: /* Return position of start of text matched by last search.
2838 SUBEXP, a number, specifies which parenthesized expression in the last
2839   regexp.
2840 Value is nil if SUBEXPth pair didn't match, or there were less than
2841   SUBEXP pairs.
2842 Zero means the entire text matched by the whole regexp or whole string.  */)
2843      (subexp)
2844      Lisp_Object subexp;
2845 {
2846   return match_limit (subexp, 1);
2847 }
2848
2849 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2850        doc: /* Return position of end of text matched by last search.
2851 SUBEXP, a number, specifies which parenthesized expression in the last
2852   regexp.
2853 Value is nil if SUBEXPth pair didn't match, or there were less than
2854   SUBEXP pairs.
2855 Zero means the entire text matched by the whole regexp or whole string.  */)
2856      (subexp)
2857      Lisp_Object subexp;
2858 {
2859   return match_limit (subexp, 0);
2860 }
2861
2862 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2863        doc: /* Return a list containing all info on what the last search matched.
2864 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2865 All the elements are markers or nil (nil if the Nth pair didn't match)
2866 if the last match was on a buffer; integers or nil if a string was matched.
2867 Use `store-match-data' to reinstate the data in this list.
2868
2869 If INTEGERS (the optional first argument) is non-nil, always use
2870 integers \(rather than markers) to represent buffer positions.  In
2871 this case, and if the last match was in a buffer, the buffer will get
2872 stored as one additional element at the end of the list.
2873
2874 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2875 enough to hold all the values, and if INTEGERS is non-nil, no consing
2876 is done.
2877
2878 If optional third arg RESEAT is non-nil, any previous markers on the
2879 REUSE list will be modified to point to nowhere.
2880
2881 Return value is undefined if the last search failed.  */)
2882   (integers, reuse, reseat)
2883      Lisp_Object integers, reuse, reseat;
2884 {
2885   Lisp_Object tail, prev;
2886   Lisp_Object *data;
2887   int i, len;
2888
2889   if (!NILP (reseat))
2890     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2891       if (MARKERP (XCAR (tail)))
2892         {
2893           unchain_marker (XMARKER (XCAR (tail)));
2894           XSETCAR (tail, Qnil);
2895         }
2896
2897   if (NILP (last_thing_searched))
2898     return Qnil;
2899
2900   prev = Qnil;
2901
2902   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2903                                  * sizeof (Lisp_Object));
2904
2905   len = 0;
2906   for (i = 0; i < search_regs.num_regs; i++)
2907     {
2908       int start = search_regs.start[i];
2909       if (start >= 0)
2910         {
2911           if (EQ (last_thing_searched, Qt)
2912               || ! NILP (integers))
2913             {
2914               XSETFASTINT (data[2 * i], start);
2915               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2916             }
2917           else if (BUFFERP (last_thing_searched))
2918             {
2919               data[2 * i] = Fmake_marker ();
2920               Fset_marker (data[2 * i],
2921                            make_number (start),
2922                            last_thing_searched);
2923               data[2 * i + 1] = Fmake_marker ();
2924               Fset_marker (data[2 * i + 1],
2925                            make_number (search_regs.end[i]),
2926                            last_thing_searched);
2927             }
2928           else
2929             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2930             abort ();
2931
2932           len = 2 * i + 2;
2933         }
2934       else
2935         data[2 * i] = data[2 * i + 1] = Qnil;
2936     }
2937
2938   if (BUFFERP (last_thing_searched) && !NILP (integers))
2939     {
2940       data[len] = last_thing_searched;
2941       len++;
2942     }
2943
2944   /* If REUSE is not usable, cons up the values and return them.  */
2945   if (! CONSP (reuse))
2946     return Flist (len, data);
2947
2948   /* If REUSE is a list, store as many value elements as will fit
2949      into the elements of REUSE.  */
2950   for (i = 0, tail = reuse; CONSP (tail);
2951        i++, tail = XCDR (tail))
2952     {
2953       if (i < len)
2954         XSETCAR (tail, data[i]);
2955       else
2956         XSETCAR (tail, Qnil);
2957       prev = tail;
2958     }
2959
2960   /* If we couldn't fit all value elements into REUSE,
2961      cons up the rest of them and add them to the end of REUSE.  */
2962   if (i < len)
2963     XSETCDR (prev, Flist (len - i, data + i));
2964
2965   return reuse;
2966 }
2967
2968 /* We used to have an internal use variant of `reseat' described as:
2969
2970       If RESEAT is `evaporate', put the markers back on the free list
2971       immediately.  No other references to the markers must exist in this
2972       case, so it is used only internally on the unwind stack and
2973       save-match-data from Lisp.
2974
2975    But it was ill-conceived: those supposedly-internal markers get exposed via
2976    the undo-list, so freeing them here is unsafe.  */
2977
2978 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2979        doc: /* Set internal data on last search match from elements of LIST.
2980 LIST should have been created by calling `match-data' previously.
2981
2982 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2983     (list, reseat)
2984      register Lisp_Object list, reseat;
2985 {
2986   register int i;
2987   register Lisp_Object marker;
2988
2989   if (running_asynch_code)
2990     save_search_regs ();
2991
2992   CHECK_LIST (list);
2993
2994   /* Unless we find a marker with a buffer or an explicit buffer
2995      in LIST, assume that this match data came from a string.  */
2996   last_thing_searched = Qt;
2997
2998   /* Allocate registers if they don't already exist.  */
2999   {
3000     int length = XFASTINT (Flength (list)) / 2;
3001
3002     if (length > search_regs.num_regs)
3003       {
3004         if (search_regs.num_regs == 0)
3005           {
3006             search_regs.start
3007               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3008             search_regs.end
3009               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3010           }
3011         else
3012           {
3013             search_regs.start
3014               = (regoff_t *) xrealloc (search_regs.start,
3015                                        length * sizeof (regoff_t));
3016             search_regs.end
3017               = (regoff_t *) xrealloc (search_regs.end,
3018                                        length * sizeof (regoff_t));
3019           }
3020
3021         for (i = search_regs.num_regs; i < length; i++)
3022           search_regs.start[i] = -1;
3023
3024         search_regs.num_regs = length;
3025       }
3026
3027     for (i = 0; CONSP (list); i++)
3028       {
3029         marker = XCAR (list);
3030         if (BUFFERP (marker))
3031           {
3032             last_thing_searched = marker;
3033             break;
3034           }
3035         if (i >= length)
3036           break;
3037         if (NILP (marker))
3038           {
3039             search_regs.start[i] = -1;
3040             list = XCDR (list);
3041           }
3042         else
3043           {
3044             int from;
3045             Lisp_Object m;
3046
3047             m = marker;
3048             if (MARKERP (marker))
3049               {
3050                 if (XMARKER (marker)->buffer == 0)
3051                   XSETFASTINT (marker, 0);
3052                 else
3053                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3054               }
3055
3056             CHECK_NUMBER_COERCE_MARKER (marker);
3057             from = XINT (marker);
3058
3059             if (!NILP (reseat) && MARKERP (m))
3060               {
3061                 unchain_marker (XMARKER (m));
3062                 XSETCAR (list, Qnil);
3063               }
3064
3065             if ((list = XCDR (list), !CONSP (list)))
3066               break;
3067
3068             m = marker = XCAR (list);
3069
3070             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3071               XSETFASTINT (marker, 0);
3072
3073             CHECK_NUMBER_COERCE_MARKER (marker);
3074             search_regs.start[i] = from;
3075             search_regs.end[i] = XINT (marker);
3076
3077             if (!NILP (reseat) && MARKERP (m))
3078               {
3079                 unchain_marker (XMARKER (m));
3080                 XSETCAR (list, Qnil);
3081               }
3082           }
3083         list = XCDR (list);
3084       }
3085
3086     for (; i < search_regs.num_regs; i++)
3087       search_regs.start[i] = -1;
3088   }
3089
3090   return Qnil;
3091 }
3092
3093 /* If non-zero the match data have been saved in saved_search_regs
3094    during the execution of a sentinel or filter. */
3095 static int search_regs_saved;
3096 static struct re_registers saved_search_regs;
3097 static Lisp_Object saved_last_thing_searched;
3098
3099 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3100    if asynchronous code (filter or sentinel) is running. */
3101 static void
3102 save_search_regs ()
3103 {
3104   if (!search_regs_saved)
3105     {
3106       saved_search_regs.num_regs = search_regs.num_regs;
3107       saved_search_regs.start = search_regs.start;
3108       saved_search_regs.end = search_regs.end;
3109       saved_last_thing_searched = last_thing_searched;
3110       last_thing_searched = Qnil;
3111       search_regs.num_regs = 0;
3112       search_regs.start = 0;
3113       search_regs.end = 0;
3114
3115       search_regs_saved = 1;
3116     }
3117 }
3118
3119 /* Called upon exit from filters and sentinels. */
3120 void
3121 restore_search_regs ()
3122 {
3123   if (search_regs_saved)
3124     {
3125       if (search_regs.num_regs > 0)
3126         {
3127           xfree (search_regs.start);
3128           xfree (search_regs.end);
3129         }
3130       search_regs.num_regs = saved_search_regs.num_regs;
3131       search_regs.start = saved_search_regs.start;
3132       search_regs.end = saved_search_regs.end;
3133       last_thing_searched = saved_last_thing_searched;
3134       saved_last_thing_searched = Qnil;
3135       search_regs_saved = 0;
3136     }
3137 }
3138
3139 static Lisp_Object
3140 unwind_set_match_data (list)
3141      Lisp_Object list;
3142 {
3143   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3144   return Fset_match_data (list, Qt);
3145 }
3146
3147 /* Called to unwind protect the match data.  */
3148 void
3149 record_unwind_save_match_data ()
3150 {
3151   record_unwind_protect (unwind_set_match_data,
3152                          Fmatch_data (Qnil, Qnil, Qnil));
3153 }
3154
3155 /* Quote a string to inactivate reg-expr chars */
3156
3157 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3158        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3159      (string)
3160      Lisp_Object string;
3161 {
3162   register unsigned char *in, *out, *end;
3163   register unsigned char *temp;
3164   int backslashes_added = 0;
3165
3166   CHECK_STRING (string);
3167
3168   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3169
3170   /* Now copy the data into the new string, inserting escapes. */
3171
3172   in = SDATA (string);
3173   end = in + SBYTES (string);
3174   out = temp;
3175
3176   for (; in != end; in++)
3177     {
3178       if (*in == '['
3179           || *in == '*' || *in == '.' || *in == '\\'
3180           || *in == '?' || *in == '+'
3181           || *in == '^' || *in == '$')
3182         *out++ = '\\', backslashes_added++;
3183       *out++ = *in;
3184     }
3185
3186   return make_specified_string (temp,
3187                                 SCHARS (string) + backslashes_added,
3188                                 out - temp,
3189                                 STRING_MULTIBYTE (string));
3190 }
3191 \f
3192 void
3193 syms_of_search ()
3194 {
3195   register int i;
3196
3197   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3198     {
3199       searchbufs[i].buf.allocated = 100;
3200       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3201       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3202       searchbufs[i].regexp = Qnil;
3203       searchbufs[i].whitespace_regexp = Qnil;
3204       searchbufs[i].syntax_table = Qnil;
3205       staticpro (&searchbufs[i].regexp);
3206       staticpro (&searchbufs[i].whitespace_regexp);
3207       staticpro (&searchbufs[i].syntax_table);
3208       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3209     }
3210   searchbuf_head = &searchbufs[0];
3211
3212   Qsearch_failed = intern ("search-failed");
3213   staticpro (&Qsearch_failed);
3214   Qinvalid_regexp = intern ("invalid-regexp");
3215   staticpro (&Qinvalid_regexp);
3216
3217   Fput (Qsearch_failed, Qerror_conditions,
3218         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3219   Fput (Qsearch_failed, Qerror_message,
3220         build_string ("Search failed"));
3221
3222   Fput (Qinvalid_regexp, Qerror_conditions,
3223         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3224   Fput (Qinvalid_regexp, Qerror_message,
3225         build_string ("Invalid regexp"));
3226
3227   last_thing_searched = Qnil;
3228   staticpro (&last_thing_searched);
3229
3230   saved_last_thing_searched = Qnil;
3231   staticpro (&saved_last_thing_searched);
3232
3233   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3234       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3235 Some commands use this for user-specified regexps.
3236 Spaces that occur inside character classes or repetition operators
3237 or other such regexp constructs are not replaced with this.
3238 A value of nil (which is the normal value) means treat spaces literally.  */);
3239   Vsearch_spaces_regexp = Qnil;
3240
3241   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3242       doc: /* Internal use only.
3243 If non-nil, the primitive searching and matching functions
3244 such as `looking-at', `string-match', `re-search-forward', etc.,
3245 do not set the match data.  The proper way to use this variable
3246 is to bind it with `let' around a small expression.  */);
3247   Vinhibit_changing_match_data = Qnil;
3248
3249   defsubr (&Slooking_at);
3250   defsubr (&Sposix_looking_at);
3251   defsubr (&Sstring_match);
3252   defsubr (&Sposix_string_match);
3253   defsubr (&Ssearch_forward);
3254   defsubr (&Ssearch_backward);
3255   defsubr (&Sword_search_forward);
3256   defsubr (&Sword_search_backward);
3257   defsubr (&Sre_search_forward);
3258   defsubr (&Sre_search_backward);
3259   defsubr (&Sposix_search_forward);
3260   defsubr (&Sposix_search_backward);
3261   defsubr (&Sreplace_match);
3262   defsubr (&Smatch_beginning);
3263   defsubr (&Smatch_end);
3264   defsubr (&Smatch_data);
3265   defsubr (&Sset_match_data);
3266   defsubr (&Sregexp_quote);
3267 }
3268
3269 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3270    (do not change this comment) */