src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs (EMACS_INT, EMACS_INT);
 102 static void save_search_regs (void);
 103 static EMACS_INT simple_search (int, unsigned char *, int, int,
 104                                 Lisp_Object, EMACS_INT, EMACS_INT,
 105                                 EMACS_INT, EMACS_INT);
 106 static EMACS_INT boyer_moore (int, unsigned char *, int, int,
 107                               Lisp_Object, Lisp_Object,
 108                               EMACS_INT, EMACS_INT,
 109                               EMACS_INT, EMACS_INT, int);
 110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
 111                                 EMACS_INT, EMACS_INT, int, int,
 112                                 Lisp_Object, Lisp_Object, int);
 113 static void matcher_overflow (void) NO_RETURN;
 114
 115 static void
 116 matcher_overflow (void)
 117 {
 118   error ("Stack overflow in regexp matcher");
 119 }
 120
 121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 122    PATTERN is the pattern to compile.
 123    CP is the place to put the result.
 124    TRANSLATE is a translation table for ignoring case, or nil for none.
 125    REGP is the structure that says where to store the "register"
 126    values that will result from matching this pattern.
 127    If it is 0, we should compile the pattern not to record any
 128    subexpression bounds.
 129    POSIX is nonzero if we want full backtracking (POSIX style)
 130    for this pattern.  0 means backtrack only enough to get a valid match.
 131
 132    The behavior also depends on Vsearch_spaces_regexp.  */
 133
 134 static void
 135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
 136 {
 137   char *val;
 138   reg_syntax_t old;
 139
 140   cp->regexp = Qnil;
 141   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 142   cp->posix = posix;
 143   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 144   cp->buf.charset_unibyte = charset_unibyte;
 145   if (STRINGP (Vsearch_spaces_regexp))
 146     cp->whitespace_regexp = Vsearch_spaces_regexp;
 147   else
 148     cp->whitespace_regexp = Qnil;
 149
 150   /* rms: I think BLOCK_INPUT is not needed here any more,
 151      because regex.c defines malloc to call xmalloc.
 152      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 153      So let's turn it off.  */
 154   /*  BLOCK_INPUT;  */
 155   old = re_set_syntax (RE_SYNTAX_EMACS
 156                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 157
 158   if (STRINGP (Vsearch_spaces_regexp))
 159     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 160   else
 161     re_set_whitespace_regexp (NULL);
 162
 163   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 164                                      SBYTES (pattern), &cp->buf);
 165
 166   /* If the compiled pattern hard codes some of the contents of the
 167      syntax-table, it can only be reused with *this* syntax table.  */
 168   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 169
 170   re_set_whitespace_regexp (NULL);
 171
 172   re_set_syntax (old);
 173   /* UNBLOCK_INPUT;  */
 174   if (val)
 175     xsignal1 (Qinvalid_regexp, build_string (val));
 176
 177   cp->regexp = Fcopy_sequence (pattern);
 178 }
 179
 180 /* Shrink each compiled regexp buffer in the cache
 181    to the size actually used right now.
 182    This is called from garbage collection.  */
 183
 184 void
 185 shrink_regexp_cache (void)
 186 {
 187   struct regexp_cache *cp;
 188
 189   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 190     {
 191       cp->buf.allocated = cp->buf.used;
 192       cp->buf.buffer
 193         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 194     }
 195 }
 196
 197 /* Clear the regexp cache w.r.t. a particular syntax table,
 198    because it was changed.
 199    There is no danger of memory leak here because re_compile_pattern
 200    automagically manages the memory in each re_pattern_buffer struct,
 201    based on its `allocated' and `buffer' values.  */
 202 void
 203 clear_regexp_cache (void)
 204 {
 205   int i;
 206
 207   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 208     /* It's tempting to compare with the syntax-table we've actually changed,
 209        but it's not sufficient because char-table inheritance means that
 210        modifying one syntax-table can change others at the same time.  */
 211     if (!EQ (searchbufs[i].syntax_table, Qt))
 212       searchbufs[i].regexp = Qnil;
 213 }
 214
 215 /* Compile a regexp if necessary, but first check to see if there's one in
 216    the cache.
 217    PATTERN is the pattern to compile.
 218    TRANSLATE is a translation table for ignoring case, or nil for none.
 219    REGP is the structure that says where to store the "register"
 220    values that will result from matching this pattern.
 221    If it is 0, we should compile the pattern not to record any
 222    subexpression bounds.
 223    POSIX is nonzero if we want full backtracking (POSIX style)
 224    for this pattern.  0 means backtrack only enough to get a valid match.  */
 225
 226 struct re_pattern_buffer *
 227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 228 {
 229   struct regexp_cache *cp, **cpp;
 230
 231   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 232     {
 233       cp = *cpp;
 234       /* Entries are initialized to nil, and may be set to nil by
 235          compile_pattern_1 if the pattern isn't valid.  Don't apply
 236          string accessors in those cases.  However, compile_pattern_1
 237          is only applied to the cache entry we pick here to reuse.  So
 238          nil should never appear before a non-nil entry.  */
 239       if (NILP (cp->regexp))
 240         goto compile_it;
 241       if (SCHARS (cp->regexp) == SCHARS (pattern)
 242           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 243           && !NILP (Fstring_equal (cp->regexp, pattern))
 244           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 245           && cp->posix == posix
 246           && (EQ (cp->syntax_table, Qt)
 247               || EQ (cp->syntax_table, current_buffer->syntax_table))
 248           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 249           && cp->buf.charset_unibyte == charset_unibyte)
 250         break;
 251
 252       /* If we're at the end of the cache, compile into the nil cell
 253          we found, or the last (least recently used) cell with a
 254          string value.  */
 255       if (cp->next == 0)
 256         {
 257         compile_it:
 258           compile_pattern_1 (cp, pattern, translate, regp, posix);
 259           break;
 260         }
 261     }
 262
 263   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 264      either because we found it in the cache or because we just compiled it.
 265      Move it to the front of the queue to mark it as most recently used.  */
 266   *cpp = cp->next;
 267   cp->next = searchbuf_head;
 268   searchbuf_head = cp;
 269
 270   /* Advise the searching functions about the space we have allocated
 271      for register data.  */
 272   if (regp)
 273     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 274
 275   /* The compiled pattern can be used both for multibyte and unibyte
 276      target.  But, we have to tell which the pattern is used for. */
 277   cp->buf.target_multibyte = multibyte;
 278
 279   return &cp->buf;
 280 }
 281
 282 \f
 283 static Lisp_Object
 284 looking_at_1 (Lisp_Object string, int posix)
 285 {
 286   Lisp_Object val;
 287   unsigned char *p1, *p2;
 288   EMACS_INT s1, s2;
 289   register int i;
 290   struct re_pattern_buffer *bufp;
 291
 292   if (running_asynch_code)
 293     save_search_regs ();
 294
 295   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 296   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 297     = current_buffer->case_eqv_table;
 298
 299   CHECK_STRING (string);
 300   bufp = compile_pattern (string,
 301                           (NILP (Vinhibit_changing_match_data)
 302                            ? &search_regs : NULL),
 303                           (!NILP (current_buffer->case_fold_search)
 304                            ? current_buffer->case_canon_table : Qnil),
 305                           posix,
 306                           !NILP (current_buffer->enable_multibyte_characters));
 307
 308   immediate_quit = 1;
 309   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 310
 311   /* Get pointers and sizes of the two strings
 312      that make up the visible portion of the buffer. */
 313
 314   p1 = BEGV_ADDR;
 315   s1 = GPT_BYTE - BEGV_BYTE;
 316   p2 = GAP_END_ADDR;
 317   s2 = ZV_BYTE - GPT_BYTE;
 318   if (s1 < 0)
 319     {
 320       p2 = p1;
 321       s2 = ZV_BYTE - BEGV_BYTE;
 322       s1 = 0;
 323     }
 324   if (s2 < 0)
 325     {
 326       s1 = ZV_BYTE - BEGV_BYTE;
 327       s2 = 0;
 328     }
 329
 330   re_match_object = Qnil;
 331
 332   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 333                   PT_BYTE - BEGV_BYTE,
 334                   (NILP (Vinhibit_changing_match_data)
 335                    ? &search_regs : NULL),
 336                   ZV_BYTE - BEGV_BYTE);
 337   immediate_quit = 0;
 338
 339   if (i == -2)
 340     matcher_overflow ();
 341
 342   val = (0 <= i ? Qt : Qnil);
 343   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 344     for (i = 0; i < search_regs.num_regs; i++)
 345       if (search_regs.start[i] >= 0)
 346         {
 347           search_regs.start[i]
 348             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 349           search_regs.end[i]
 350             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 351         }
 352
 353   /* Set last_thing_searched only when match data is changed.  */
 354   if (NILP (Vinhibit_changing_match_data))
 355     XSETBUFFER (last_thing_searched, current_buffer);
 356
 357   return val;
 358 }
 359
 360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 361        doc: /* Return t if text after point matches regular expression REGEXP.
 362 This function modifies the match data that `match-beginning',
 363 `match-end' and `match-data' access; save and restore the match
 364 data if you want to preserve them.  */)
 365   (Lisp_Object regexp)
 366 {
 367   return looking_at_1 (regexp, 0);
 368 }
 369
 370 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 371        doc: /* Return t if text after point matches regular expression REGEXP.
 372 Find the longest match, in accord with Posix regular expression rules.
 373 This function modifies the match data that `match-beginning',
 374 `match-end' and `match-data' access; save and restore the match
 375 data if you want to preserve them.  */)
 376   (Lisp_Object regexp)
 377 {
 378   return looking_at_1 (regexp, 1);
 379 }
 380 \f
 381 static Lisp_Object
 382 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 383 {
 384   int val;
 385   struct re_pattern_buffer *bufp;
 386   EMACS_INT pos, pos_byte;
 387   int i;
 388
 389   if (running_asynch_code)
 390     save_search_regs ();
 391
 392   CHECK_STRING (regexp);
 393   CHECK_STRING (string);
 394
 395   if (NILP (start))
 396     pos = 0, pos_byte = 0;
 397   else
 398     {
 399       int len = SCHARS (string);
 400
 401       CHECK_NUMBER (start);
 402       pos = XINT (start);
 403       if (pos < 0 && -pos <= len)
 404         pos = len + pos;
 405       else if (0 > pos || pos > len)
 406         args_out_of_range (string, start);
 407       pos_byte = string_char_to_byte (string, pos);
 408     }
 409
 410   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 411   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 412     = current_buffer->case_eqv_table;
 413
 414   bufp = compile_pattern (regexp,
 415                           (NILP (Vinhibit_changing_match_data)
 416                            ? &search_regs : NULL),
 417                           (!NILP (current_buffer->case_fold_search)
 418                            ? current_buffer->case_canon_table : Qnil),
 419                           posix,
 420                           STRING_MULTIBYTE (string));
 421   immediate_quit = 1;
 422   re_match_object = string;
 423
 424   val = re_search (bufp, (char *) SDATA (string),
 425                    SBYTES (string), pos_byte,
 426                    SBYTES (string) - pos_byte,
 427                    (NILP (Vinhibit_changing_match_data)
 428                     ? &search_regs : NULL));
 429   immediate_quit = 0;
 430
 431   /* Set last_thing_searched only when match data is changed.  */
 432   if (NILP (Vinhibit_changing_match_data))
 433     last_thing_searched = Qt;
 434
 435   if (val == -2)
 436     matcher_overflow ();
 437   if (val < 0) return Qnil;
 438
 439   if (NILP (Vinhibit_changing_match_data))
 440     for (i = 0; i < search_regs.num_regs; i++)
 441       if (search_regs.start[i] >= 0)
 442         {
 443           search_regs.start[i]
 444             = string_byte_to_char (string, search_regs.start[i]);
 445           search_regs.end[i]
 446             = string_byte_to_char (string, search_regs.end[i]);
 447         }
 448
 449   return make_number (string_byte_to_char (string, val));
 450 }
 451
 452 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 453        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 454 Matching ignores case if `case-fold-search' is non-nil.
 455 If third arg START is non-nil, start search at that index in STRING.
 456 For index of first char beyond the match, do (match-end 0).
 457 `match-end' and `match-beginning' also give indices of substrings
 458 matched by parenthesis constructs in the pattern.
 459
 460 You can use the function `match-string' to extract the substrings
 461 matched by the parenthesis constructions in REGEXP. */)
 462   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 463 {
 464   return string_match_1 (regexp, string, start, 0);
 465 }
 466
 467 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 468        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 469 Find the longest match, in accord with Posix regular expression rules.
 470 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 471 If third arg START is non-nil, start search at that index in STRING.
 472 For index of first char beyond the match, do (match-end 0).
 473 `match-end' and `match-beginning' also give indices of substrings
 474 matched by parenthesis constructs in the pattern.  */)
 475   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 476 {
 477   return string_match_1 (regexp, string, start, 1);
 478 }
 479
 480 /* Match REGEXP against STRING, searching all of STRING,
 481    and return the index of the match, or negative on failure.
 482    This does not clobber the match data.  */
 483
 484 int
 485 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 486 {
 487   int val;
 488   struct re_pattern_buffer *bufp;
 489
 490   bufp = compile_pattern (regexp, 0, Qnil,
 491                           0, STRING_MULTIBYTE (string));
 492   immediate_quit = 1;
 493   re_match_object = string;
 494
 495   val = re_search (bufp, (char *) SDATA (string),
 496                    SBYTES (string), 0,
 497                    SBYTES (string), 0);
 498   immediate_quit = 0;
 499   return val;
 500 }
 501
 502 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 503    and return the index of the match, or negative on failure.
 504    This does not clobber the match data.
 505    We assume that STRING contains single-byte characters.  */
 506
 507 int
 508 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
 509 {
 510   int val;
 511   struct re_pattern_buffer *bufp;
 512   int len = strlen (string);
 513
 514   regexp = string_make_unibyte (regexp);
 515   re_match_object = Qt;
 516   bufp = compile_pattern (regexp, 0,
 517                           Vascii_canon_table, 0,
 518                           0);
 519   immediate_quit = 1;
 520   val = re_search (bufp, string, len, 0, len, 0);
 521   immediate_quit = 0;
 522   return val;
 523 }
 524
 525 /* Like fast_string_match but ignore case.  */
 526
 527 int
 528 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 529 {
 530   int val;
 531   struct re_pattern_buffer *bufp;
 532
 533   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 534                           0, STRING_MULTIBYTE (string));
 535   immediate_quit = 1;
 536   re_match_object = string;
 537
 538   val = re_search (bufp, (char *) SDATA (string),
 539                    SBYTES (string), 0,
 540                    SBYTES (string), 0);
 541   immediate_quit = 0;
 542   return val;
 543 }
 544 \f
 545 /* Match REGEXP against the characters after POS to LIMIT, and return
 546    the number of matched characters.  If STRING is non-nil, match
 547    against the characters in it.  In that case, POS and LIMIT are
 548    indices into the string.  This function doesn't modify the match
 549    data.  */
 550
 551 EMACS_INT
 552 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
 553 {
 554   int multibyte;
 555   struct re_pattern_buffer *buf;
 556   unsigned char *p1, *p2;
 557   EMACS_INT s1, s2;
 558   EMACS_INT len;
 559
 560   if (STRINGP (string))
 561     {
 562       if (pos_byte < 0)
 563         pos_byte = string_char_to_byte (string, pos);
 564       if (limit_byte < 0)
 565         limit_byte = string_char_to_byte (string, limit);
 566       p1 = NULL;
 567       s1 = 0;
 568       p2 = SDATA (string);
 569       s2 = SBYTES (string);
 570       re_match_object = string;
 571       multibyte = STRING_MULTIBYTE (string);
 572     }
 573   else
 574     {
 575       if (pos_byte < 0)
 576         pos_byte = CHAR_TO_BYTE (pos);
 577       if (limit_byte < 0)
 578         limit_byte = CHAR_TO_BYTE (limit);
 579       pos_byte -= BEGV_BYTE;
 580       limit_byte -= BEGV_BYTE;
 581       p1 = BEGV_ADDR;
 582       s1 = GPT_BYTE - BEGV_BYTE;
 583       p2 = GAP_END_ADDR;
 584       s2 = ZV_BYTE - GPT_BYTE;
 585       if (s1 < 0)
 586         {
 587           p2 = p1;
 588           s2 = ZV_BYTE - BEGV_BYTE;
 589           s1 = 0;
 590         }
 591       if (s2 < 0)
 592         {
 593           s1 = ZV_BYTE - BEGV_BYTE;
 594           s2 = 0;
 595         }
 596       re_match_object = Qnil;
 597       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 598     }
 599
 600   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 601   immediate_quit = 1;
 602   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 603                     pos_byte, NULL, limit_byte);
 604   immediate_quit = 0;
 605
 606   return len;
 607 }
 608
 609 \f
 610 /* The newline cache: remembering which sections of text have no newlines.  */
 611
 612 /* If the user has requested newline caching, make sure it's on.
 613    Otherwise, make sure it's off.
 614    This is our cheezy way of associating an action with the change of
 615    state of a buffer-local variable.  */
 616 static void
 617 newline_cache_on_off (struct buffer *buf)
 618 {
 619   if (NILP (buf->cache_long_line_scans))
 620     {
 621       /* It should be off.  */
 622       if (buf->newline_cache)
 623         {
 624           free_region_cache (buf->newline_cache);
 625           buf->newline_cache = 0;
 626         }
 627     }
 628   else
 629     {
 630       /* It should be on.  */
 631       if (buf->newline_cache == 0)
 632         buf->newline_cache = new_region_cache ();
 633     }
 634 }
 635
 636 \f
 637 /* Search for COUNT instances of the character TARGET between START and END.
 638
 639    If COUNT is positive, search forwards; END must be >= START.
 640    If COUNT is negative, search backwards for the -COUNTth instance;
 641       END must be <= START.
 642    If COUNT is zero, do anything you please; run rogue, for all I care.
 643
 644    If END is zero, use BEGV or ZV instead, as appropriate for the
 645    direction indicated by COUNT.
 646
 647    If we find COUNT instances, set *SHORTAGE to zero, and return the
 648    position past the COUNTth match.  Note that for reverse motion
 649    this is not the same as the usual convention for Emacs motion commands.
 650
 651    If we don't find COUNT instances before reaching END, set *SHORTAGE
 652    to the number of TARGETs left unfound, and return END.
 653
 654    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 655    except when inside redisplay.  */
 656
 657 int
 658 scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
 659 {
 660   struct region_cache *newline_cache;
 661   int direction;
 662
 663   if (count > 0)
 664     {
 665       direction = 1;
 666       if (! end) end = ZV;
 667     }
 668   else
 669     {
 670       direction = -1;
 671       if (! end) end = BEGV;
 672     }
 673
 674   newline_cache_on_off (current_buffer);
 675   newline_cache = current_buffer->newline_cache;
 676
 677   if (shortage != 0)
 678     *shortage = 0;
 679
 680   immediate_quit = allow_quit;
 681
 682   if (count > 0)
 683     while (start != end)
 684       {
 685         /* Our innermost scanning loop is very simple; it doesn't know
 686            about gaps, buffer ends, or the newline cache.  ceiling is
 687            the position of the last character before the next such
 688            obstacle --- the last character the dumb search loop should
 689            examine.  */
 690         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 691         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 692         EMACS_INT tem;
 693
 694         /* If we're looking for a newline, consult the newline cache
 695            to see where we can avoid some scanning.  */
 696         if (target == '\n' && newline_cache)
 697           {
 698             int next_change;
 699             immediate_quit = 0;
 700             while (region_cache_forward
 701                    (current_buffer, newline_cache, start_byte, &next_change))
 702               start_byte = next_change;
 703             immediate_quit = allow_quit;
 704
 705             /* START should never be after END.  */
 706             if (start_byte > ceiling_byte)
 707               start_byte = ceiling_byte;
 708
 709             /* Now the text after start is an unknown region, and
 710                next_change is the position of the next known region. */
 711             ceiling_byte = min (next_change - 1, ceiling_byte);
 712           }
 713
 714         /* The dumb loop can only scan text stored in contiguous
 715            bytes. BUFFER_CEILING_OF returns the last character
 716            position that is contiguous, so the ceiling is the
 717            position after that.  */
 718         tem = BUFFER_CEILING_OF (start_byte);
 719         ceiling_byte = min (tem, ceiling_byte);
 720
 721         {
 722           /* The termination address of the dumb loop.  */
 723           register unsigned char *ceiling_addr
 724             = BYTE_POS_ADDR (ceiling_byte) + 1;
 725           register unsigned char *cursor
 726             = BYTE_POS_ADDR (start_byte);
 727           unsigned char *base = cursor;
 728
 729           while (cursor < ceiling_addr)
 730             {
 731               unsigned char *scan_start = cursor;
 732
 733               /* The dumb loop.  */
 734               while (*cursor != target && ++cursor < ceiling_addr)
 735                 ;
 736
 737               /* If we're looking for newlines, cache the fact that
 738                  the region from start to cursor is free of them. */
 739               if (target == '\n' && newline_cache)
 740                 know_region_cache (current_buffer, newline_cache,
 741                                    start_byte + scan_start - base,
 742                                    start_byte + cursor - base);
 743
 744               /* Did we find the target character?  */
 745               if (cursor < ceiling_addr)
 746                 {
 747                   if (--count == 0)
 748                     {
 749                       immediate_quit = 0;
 750                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 751                     }
 752                   cursor++;
 753                 }
 754             }
 755
 756           start = BYTE_TO_CHAR (start_byte + cursor - base);
 757         }
 758       }
 759   else
 760     while (start > end)
 761       {
 762         /* The last character to check before the next obstacle.  */
 763         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 764         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 765         EMACS_INT tem;
 766
 767         /* Consult the newline cache, if appropriate.  */
 768         if (target == '\n' && newline_cache)
 769           {
 770             int next_change;
 771             immediate_quit = 0;
 772             while (region_cache_backward
 773                    (current_buffer, newline_cache, start_byte, &next_change))
 774               start_byte = next_change;
 775             immediate_quit = allow_quit;
 776
 777             /* Start should never be at or before end.  */
 778             if (start_byte <= ceiling_byte)
 779               start_byte = ceiling_byte + 1;
 780
 781             /* Now the text before start is an unknown region, and
 782                next_change is the position of the next known region. */
 783             ceiling_byte = max (next_change, ceiling_byte);
 784           }
 785
 786         /* Stop scanning before the gap.  */
 787         tem = BUFFER_FLOOR_OF (start_byte - 1);
 788         ceiling_byte = max (tem, ceiling_byte);
 789
 790         {
 791           /* The termination address of the dumb loop.  */
 792           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 793           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 794           unsigned char *base = cursor;
 795
 796           while (cursor >= ceiling_addr)
 797             {
 798               unsigned char *scan_start = cursor;
 799
 800               while (*cursor != target && --cursor >= ceiling_addr)
 801                 ;
 802
 803               /* If we're looking for newlines, cache the fact that
 804                  the region from after the cursor to start is free of them.  */
 805               if (target == '\n' && newline_cache)
 806                 know_region_cache (current_buffer, newline_cache,
 807                                    start_byte + cursor - base,
 808                                    start_byte + scan_start - base);
 809
 810               /* Did we find the target character?  */
 811               if (cursor >= ceiling_addr)
 812                 {
 813                   if (++count >= 0)
 814                     {
 815                       immediate_quit = 0;
 816                       return BYTE_TO_CHAR (start_byte + cursor - base);
 817                     }
 818                   cursor--;
 819                 }
 820             }
 821
 822           start = BYTE_TO_CHAR (start_byte + cursor - base);
 823         }
 824       }
 825
 826   immediate_quit = 0;
 827   if (shortage != 0)
 828     *shortage = count * direction;
 829   return start;
 830 }
 831 \f
 832 /* Search for COUNT instances of a line boundary, which means either a
 833    newline or (if selective display enabled) a carriage return.
 834    Start at START.  If COUNT is negative, search backwards.
 835
 836    We report the resulting position by calling TEMP_SET_PT_BOTH.
 837
 838    If we find COUNT instances. we position after (always after,
 839    even if scanning backwards) the COUNTth match, and return 0.
 840
 841    If we don't find COUNT instances before reaching the end of the
 842    buffer (or the beginning, if scanning backwards), we return
 843    the number of line boundaries left unfound, and position at
 844    the limit we bumped up against.
 845
 846    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 847    except in special cases.  */
 848
 849 int
 850 scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
 851 {
 852   int direction = ((count > 0) ? 1 : -1);
 853
 854   register unsigned char *cursor;
 855   unsigned char *base;
 856
 857   EMACS_INT ceiling;
 858   register unsigned char *ceiling_addr;
 859
 860   int old_immediate_quit = immediate_quit;
 861
 862   /* The code that follows is like scan_buffer
 863      but checks for either newline or carriage return.  */
 864
 865   if (allow_quit)
 866     immediate_quit++;
 867
 868   start_byte = CHAR_TO_BYTE (start);
 869
 870   if (count > 0)
 871     {
 872       while (start_byte < limit_byte)
 873         {
 874           ceiling =  BUFFER_CEILING_OF (start_byte);
 875           ceiling = min (limit_byte - 1, ceiling);
 876           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 877           base = (cursor = BYTE_POS_ADDR (start_byte));
 878           while (1)
 879             {
 880               while (*cursor != '\n' && ++cursor != ceiling_addr)
 881                 ;
 882
 883               if (cursor != ceiling_addr)
 884                 {
 885                   if (--count == 0)
 886                     {
 887                       immediate_quit = old_immediate_quit;
 888                       start_byte = start_byte + cursor - base + 1;
 889                       start = BYTE_TO_CHAR (start_byte);
 890                       TEMP_SET_PT_BOTH (start, start_byte);
 891                       return 0;
 892                     }
 893                   else
 894                     if (++cursor == ceiling_addr)
 895                       break;
 896                 }
 897               else
 898                 break;
 899             }
 900           start_byte += cursor - base;
 901         }
 902     }
 903   else
 904     {
 905       while (start_byte > limit_byte)
 906         {
 907           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 908           ceiling = max (limit_byte, ceiling);
 909           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 910           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 911           while (1)
 912             {
 913               while (--cursor != ceiling_addr && *cursor != '\n')
 914                 ;
 915
 916               if (cursor != ceiling_addr)
 917                 {
 918                   if (++count == 0)
 919                     {
 920                       immediate_quit = old_immediate_quit;
 921                       /* Return the position AFTER the match we found.  */
 922                       start_byte = start_byte + cursor - base + 1;
 923                       start = BYTE_TO_CHAR (start_byte);
 924                       TEMP_SET_PT_BOTH (start, start_byte);
 925                       return 0;
 926                     }
 927                 }
 928               else
 929                 break;
 930             }
 931           /* Here we add 1 to compensate for the last decrement
 932              of CURSOR, which took it past the valid range.  */
 933           start_byte += cursor - base + 1;
 934         }
 935     }
 936
 937   TEMP_SET_PT_BOTH (limit, limit_byte);
 938   immediate_quit = old_immediate_quit;
 939
 940   return count * direction;
 941 }
 942
 943 int
 944 find_next_newline_no_quit (EMACS_INT from, int cnt)
 945 {
 946   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 947 }
 948
 949 /* Like find_next_newline, but returns position before the newline,
 950    not after, and only search up to TO.  This isn't just
 951    find_next_newline (...)-1, because you might hit TO.  */
 952
 953 int
 954 find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
 955 {
 956   int shortage;
 957   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 958
 959   if (shortage == 0)
 960     pos--;
 961
 962   return pos;
 963 }
 964 \f
 965 /* Subroutines of Lisp buffer search functions. */
 966
 967 static Lisp_Object
 968 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
 969 {
 970   register int np;
 971   int lim, lim_byte;
 972   int n = direction;
 973
 974   if (!NILP (count))
 975     {
 976       CHECK_NUMBER (count);
 977       n *= XINT (count);
 978     }
 979
 980   CHECK_STRING (string);
 981   if (NILP (bound))
 982     {
 983       if (n > 0)
 984         lim = ZV, lim_byte = ZV_BYTE;
 985       else
 986         lim = BEGV, lim_byte = BEGV_BYTE;
 987     }
 988   else
 989     {
 990       CHECK_NUMBER_COERCE_MARKER (bound);
 991       lim = XINT (bound);
 992       if (n > 0 ? lim < PT : lim > PT)
 993         error ("Invalid search bound (wrong side of point)");
 994       if (lim > ZV)
 995         lim = ZV, lim_byte = ZV_BYTE;
 996       else if (lim < BEGV)
 997         lim = BEGV, lim_byte = BEGV_BYTE;
 998       else
 999         lim_byte = CHAR_TO_BYTE (lim);
1000     }
1001
1002   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1003   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1004     = current_buffer->case_eqv_table;
1005
1006   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1007                       (!NILP (current_buffer->case_fold_search)
1008                        ? current_buffer->case_canon_table
1009                        : Qnil),
1010                       (!NILP (current_buffer->case_fold_search)
1011                        ? current_buffer->case_eqv_table
1012                        : Qnil),
1013                       posix);
1014   if (np <= 0)
1015     {
1016       if (NILP (noerror))
1017         xsignal1 (Qsearch_failed, string);
1018
1019       if (!EQ (noerror, Qt))
1020         {
1021           if (lim < BEGV || lim > ZV)
1022             abort ();
1023           SET_PT_BOTH (lim, lim_byte);
1024           return Qnil;
1025 #if 0 /* This would be clean, but maybe programs depend on
1026          a value of nil here.  */
1027           np = lim;
1028 #endif
1029         }
1030       else
1031         return Qnil;
1032     }
1033
1034   if (np < BEGV || np > ZV)
1035     abort ();
1036
1037   SET_PT (np);
1038
1039   return make_number (np);
1040 }
1041 \f
1042 /* Return 1 if REGEXP it matches just one constant string.  */
1043
1044 static int
1045 trivial_regexp_p (Lisp_Object regexp)
1046 {
1047   int len = SBYTES (regexp);
1048   unsigned char *s = SDATA (regexp);
1049   while (--len >= 0)
1050     {
1051       switch (*s++)
1052         {
1053         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1054           return 0;
1055         case '\\':
1056           if (--len < 0)
1057             return 0;
1058           switch (*s++)
1059             {
1060             case '|': case '(': case ')': case '`': case '\'': case 'b':
1061             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1062             case 'S': case '=': case '{': case '}': case '_':
1063             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1064             case '1': case '2': case '3': case '4': case '5':
1065             case '6': case '7': case '8': case '9':
1066               return 0;
1067             }
1068         }
1069     }
1070   return 1;
1071 }
1072
1073 /* Search for the n'th occurrence of STRING in the current buffer,
1074    starting at position POS and stopping at position LIM,
1075    treating STRING as a literal string if RE is false or as
1076    a regular expression if RE is true.
1077
1078    If N is positive, searching is forward and LIM must be greater than POS.
1079    If N is negative, searching is backward and LIM must be less than POS.
1080
1081    Returns -x if x occurrences remain to be found (x > 0),
1082    or else the position at the beginning of the Nth occurrence
1083    (if searching backward) or the end (if searching forward).
1084
1085    POSIX is nonzero if we want full backtracking (POSIX style)
1086    for this pattern.  0 means backtrack only enough to get a valid match.  */
1087
1088 #define TRANSLATE(out, trt, d)                  \
1089 do                                              \
1090   {                                             \
1091     if (! NILP (trt))                           \
1092       {                                         \
1093         Lisp_Object temp;                       \
1094         temp = Faref (trt, make_number (d));    \
1095         if (INTEGERP (temp))                    \
1096           out = XINT (temp);                    \
1097         else                                    \
1098           out = d;                              \
1099       }                                         \
1100     else                                        \
1101       out = d;                                  \
1102   }                                             \
1103 while (0)
1104
1105 /* Only used in search_buffer, to record the end position of the match
1106    when searching regexps and SEARCH_REGS should not be changed
1107    (i.e. Vinhibit_changing_match_data is non-nil).  */
1108 static struct re_registers search_regs_1;
1109
1110 static EMACS_INT
1111 search_buffer (Lisp_Object string, EMACS_INT pos, EMACS_INT pos_byte,
1112                EMACS_INT lim, EMACS_INT lim_byte, int n,
1113                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1114 {
1115   int len = SCHARS (string);
1116   int len_byte = SBYTES (string);
1117   register int i;
1118
1119   if (running_asynch_code)
1120     save_search_regs ();
1121
1122   /* Searching 0 times means don't move.  */
1123   /* Null string is found at starting position.  */
1124   if (len == 0 || n == 0)
1125     {
1126       set_search_regs (pos_byte, 0);
1127       return pos;
1128     }
1129
1130   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1131     {
1132       unsigned char *p1, *p2;
1133       int s1, s2;
1134       struct re_pattern_buffer *bufp;
1135
1136       bufp = compile_pattern (string,
1137                               (NILP (Vinhibit_changing_match_data)
1138                                ? &search_regs : &search_regs_1),
1139                               trt, posix,
1140                               !NILP (current_buffer->enable_multibyte_characters));
1141
1142       immediate_quit = 1;       /* Quit immediately if user types ^G,
1143                                    because letting this function finish
1144                                    can take too long. */
1145       QUIT;                     /* Do a pending quit right away,
1146                                    to avoid paradoxical behavior */
1147       /* Get pointers and sizes of the two strings
1148          that make up the visible portion of the buffer. */
1149
1150       p1 = BEGV_ADDR;
1151       s1 = GPT_BYTE - BEGV_BYTE;
1152       p2 = GAP_END_ADDR;
1153       s2 = ZV_BYTE - GPT_BYTE;
1154       if (s1 < 0)
1155         {
1156           p2 = p1;
1157           s2 = ZV_BYTE - BEGV_BYTE;
1158           s1 = 0;
1159         }
1160       if (s2 < 0)
1161         {
1162           s1 = ZV_BYTE - BEGV_BYTE;
1163           s2 = 0;
1164         }
1165       re_match_object = Qnil;
1166
1167       while (n < 0)
1168         {
1169           int val;
1170           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1171                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1172                              (NILP (Vinhibit_changing_match_data)
1173                               ? &search_regs : &search_regs_1),
1174                              /* Don't allow match past current point */
1175                              pos_byte - BEGV_BYTE);
1176           if (val == -2)
1177             {
1178               matcher_overflow ();
1179             }
1180           if (val >= 0)
1181             {
1182               if (NILP (Vinhibit_changing_match_data))
1183                 {
1184                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1185                   for (i = 0; i < search_regs.num_regs; i++)
1186                     if (search_regs.start[i] >= 0)
1187                       {
1188                         search_regs.start[i]
1189                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1190                         search_regs.end[i]
1191                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1192                       }
1193                   XSETBUFFER (last_thing_searched, current_buffer);
1194                   /* Set pos to the new position. */
1195                   pos = search_regs.start[0];
1196                 }
1197               else
1198                 {
1199                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1200                   /* Set pos to the new position.  */
1201                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1202                 }
1203             }
1204           else
1205             {
1206               immediate_quit = 0;
1207               return (n);
1208             }
1209           n++;
1210         }
1211       while (n > 0)
1212         {
1213           int val;
1214           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1215                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1216                              (NILP (Vinhibit_changing_match_data)
1217                               ? &search_regs : &search_regs_1),
1218                              lim_byte - BEGV_BYTE);
1219           if (val == -2)
1220             {
1221               matcher_overflow ();
1222             }
1223           if (val >= 0)
1224             {
1225               if (NILP (Vinhibit_changing_match_data))
1226                 {
1227                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1228                   for (i = 0; i < search_regs.num_regs; i++)
1229                     if (search_regs.start[i] >= 0)
1230                       {
1231                         search_regs.start[i]
1232                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1233                         search_regs.end[i]
1234                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1235                       }
1236                   XSETBUFFER (last_thing_searched, current_buffer);
1237                   pos = search_regs.end[0];
1238                 }
1239               else
1240                 {
1241                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1242                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1243                 }
1244             }
1245           else
1246             {
1247               immediate_quit = 0;
1248               return (0 - n);
1249             }
1250           n--;
1251         }
1252       immediate_quit = 0;
1253       return (pos);
1254     }
1255   else                          /* non-RE case */
1256     {
1257       unsigned char *raw_pattern, *pat;
1258       int raw_pattern_size;
1259       int raw_pattern_size_byte;
1260       unsigned char *patbuf;
1261       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1262       unsigned char *base_pat;
1263       /* Set to positive if we find a non-ASCII char that need
1264          translation.  Otherwise set to zero later.  */
1265       int char_base = -1;
1266       int boyer_moore_ok = 1;
1267
1268       /* MULTIBYTE says whether the text to be searched is multibyte.
1269          We must convert PATTERN to match that, or we will not really
1270          find things right.  */
1271
1272       if (multibyte == STRING_MULTIBYTE (string))
1273         {
1274           raw_pattern = (unsigned char *) SDATA (string);
1275           raw_pattern_size = SCHARS (string);
1276           raw_pattern_size_byte = SBYTES (string);
1277         }
1278       else if (multibyte)
1279         {
1280           raw_pattern_size = SCHARS (string);
1281           raw_pattern_size_byte
1282             = count_size_as_multibyte (SDATA (string),
1283                                        raw_pattern_size);
1284           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1285           copy_text (SDATA (string), raw_pattern,
1286                      SCHARS (string), 0, 1);
1287         }
1288       else
1289         {
1290           /* Converting multibyte to single-byte.
1291
1292              ??? Perhaps this conversion should be done in a special way
1293              by subtracting nonascii-insert-offset from each non-ASCII char,
1294              so that only the multibyte chars which really correspond to
1295              the chosen single-byte character set can possibly match.  */
1296           raw_pattern_size = SCHARS (string);
1297           raw_pattern_size_byte = SCHARS (string);
1298           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1299           copy_text (SDATA (string), raw_pattern,
1300                      SBYTES (string), 1, 0);
1301         }
1302
1303       /* Copy and optionally translate the pattern.  */
1304       len = raw_pattern_size;
1305       len_byte = raw_pattern_size_byte;
1306       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1307       pat = patbuf;
1308       base_pat = raw_pattern;
1309       if (multibyte)
1310         {
1311           /* Fill patbuf by translated characters in STRING while
1312              checking if we can use boyer-moore search.  If TRT is
1313              non-nil, we can use boyer-moore search only if TRT can be
1314              represented by the byte array of 256 elements.  For that,
1315              all non-ASCII case-equivalents of all case-senstive
1316              characters in STRING must belong to the same charset and
1317              row.  */
1318
1319           while (--len >= 0)
1320             {
1321               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1322               int c, translated, inverse;
1323               int in_charlen, charlen;
1324
1325               /* If we got here and the RE flag is set, it's because we're
1326                  dealing with a regexp known to be trivial, so the backslash
1327                  just quotes the next character.  */
1328               if (RE && *base_pat == '\\')
1329                 {
1330                   len--;
1331                   raw_pattern_size--;
1332                   len_byte--;
1333                   base_pat++;
1334                 }
1335
1336               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1337
1338               if (NILP (trt))
1339                 {
1340                   str = base_pat;
1341                   charlen = in_charlen;
1342                 }
1343               else
1344                 {
1345                   /* Translate the character.  */
1346                   TRANSLATE (translated, trt, c);
1347                   charlen = CHAR_STRING (translated, str_base);
1348                   str = str_base;
1349
1350                   /* Check if C has any other case-equivalents.  */
1351                   TRANSLATE (inverse, inverse_trt, c);
1352                   /* If so, check if we can use boyer-moore.  */
1353                   if (c != inverse && boyer_moore_ok)
1354                     {
1355                       /* Check if all equivalents belong to the same
1356                          group of characters.  Note that the check of C
1357                          itself is done by the last iteration.  */
1358                       int this_char_base = -1;
1359
1360                       while (boyer_moore_ok)
1361                         {
1362                           if (ASCII_BYTE_P (inverse))
1363                             {
1364                               if (this_char_base > 0)
1365                                 boyer_moore_ok = 0;
1366                               else
1367                                 this_char_base = 0;
1368                             }
1369                           else if (CHAR_BYTE8_P (inverse))
1370                             /* Boyer-moore search can't handle a
1371                                translation of an eight-bit
1372                                character.  */
1373                             boyer_moore_ok = 0;
1374                           else if (this_char_base < 0)
1375                             {
1376                               this_char_base = inverse & ~0x3F;
1377                               if (char_base < 0)
1378                                 char_base = this_char_base;
1379                               else if (this_char_base != char_base)
1380                                 boyer_moore_ok = 0;
1381                             }
1382                           else if ((inverse & ~0x3F) != this_char_base)
1383                             boyer_moore_ok = 0;
1384                           if (c == inverse)
1385                             break;
1386                           TRANSLATE (inverse, inverse_trt, inverse);
1387                         }
1388                     }
1389                 }
1390
1391               /* Store this character into the translated pattern.  */
1392               memcpy (pat, str, charlen);
1393               pat += charlen;
1394               base_pat += in_charlen;
1395               len_byte -= in_charlen;
1396             }
1397
1398           /* If char_base is still negative we didn't find any translated
1399              non-ASCII characters.  */
1400           if (char_base < 0)
1401             char_base = 0;
1402         }
1403       else
1404         {
1405           /* Unibyte buffer.  */
1406           char_base = 0;
1407           while (--len >= 0)
1408             {
1409               int c, translated;
1410
1411               /* If we got here and the RE flag is set, it's because we're
1412                  dealing with a regexp known to be trivial, so the backslash
1413                  just quotes the next character.  */
1414               if (RE && *base_pat == '\\')
1415                 {
1416                   len--;
1417                   raw_pattern_size--;
1418                   base_pat++;
1419                 }
1420               c = *base_pat++;
1421               TRANSLATE (translated, trt, c);
1422               *pat++ = translated;
1423             }
1424         }
1425
1426       len_byte = pat - patbuf;
1427       len = raw_pattern_size;
1428       pat = base_pat = patbuf;
1429
1430       if (boyer_moore_ok)
1431         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1432                             pos, pos_byte, lim, lim_byte,
1433                             char_base);
1434       else
1435         return simple_search (n, pat, len, len_byte, trt,
1436                               pos, pos_byte, lim, lim_byte);
1437     }
1438 }
1439 \f
1440 /* Do a simple string search N times for the string PAT,
1441    whose length is LEN/LEN_BYTE,
1442    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1443    TRT is the translation table.
1444
1445    Return the character position where the match is found.
1446    Otherwise, if M matches remained to be found, return -M.
1447
1448    This kind of search works regardless of what is in PAT and
1449    regardless of what is in TRT.  It is used in cases where
1450    boyer_moore cannot work.  */
1451
1452 static EMACS_INT
1453 simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
1454 {
1455   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1456   int forward = n > 0;
1457   /* Number of buffer bytes matched.  Note that this may be different
1458      from len_byte in a multibyte buffer.  */
1459   int match_byte;
1460
1461   if (lim > pos && multibyte)
1462     while (n > 0)
1463       {
1464         while (1)
1465           {
1466             /* Try matching at position POS.  */
1467             EMACS_INT this_pos = pos;
1468             EMACS_INT this_pos_byte = pos_byte;
1469             int this_len = len;
1470             unsigned char *p = pat;
1471             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1472               goto stop;
1473
1474             while (this_len > 0)
1475               {
1476                 int charlen, buf_charlen;
1477                 int pat_ch, buf_ch;
1478
1479                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1480                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1481                                                  buf_charlen);
1482                 TRANSLATE (buf_ch, trt, buf_ch);
1483
1484                 if (buf_ch != pat_ch)
1485                   break;
1486
1487                 this_len--;
1488                 p += charlen;
1489
1490                 this_pos_byte += buf_charlen;
1491                 this_pos++;
1492               }
1493
1494             if (this_len == 0)
1495               {
1496                 match_byte = this_pos_byte - pos_byte;
1497                 pos += len;
1498                 pos_byte += match_byte;
1499                 break;
1500               }
1501
1502             INC_BOTH (pos, pos_byte);
1503           }
1504
1505         n--;
1506       }
1507   else if (lim > pos)
1508     while (n > 0)
1509       {
1510         while (1)
1511           {
1512             /* Try matching at position POS.  */
1513             EMACS_INT this_pos = pos;
1514             int this_len = len;
1515             unsigned char *p = pat;
1516
1517             if (pos + len > lim)
1518               goto stop;
1519
1520             while (this_len > 0)
1521               {
1522                 int pat_ch = *p++;
1523                 int buf_ch = FETCH_BYTE (this_pos);
1524                 TRANSLATE (buf_ch, trt, buf_ch);
1525
1526                 if (buf_ch != pat_ch)
1527                   break;
1528
1529                 this_len--;
1530                 this_pos++;
1531               }
1532
1533             if (this_len == 0)
1534               {
1535                 match_byte = len;
1536                 pos += len;
1537                 break;
1538               }
1539
1540             pos++;
1541           }
1542
1543         n--;
1544       }
1545   /* Backwards search.  */
1546   else if (lim < pos && multibyte)
1547     while (n < 0)
1548       {
1549         while (1)
1550           {
1551             /* Try matching at position POS.  */
1552             EMACS_INT this_pos = pos;
1553             EMACS_INT this_pos_byte = pos_byte;
1554             int this_len = len;
1555             const unsigned char *p = pat + len_byte;
1556
1557             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1558               goto stop;
1559
1560             while (this_len > 0)
1561               {
1562                 int charlen;
1563                 int pat_ch, buf_ch;
1564
1565                 DEC_BOTH (this_pos, this_pos_byte);
1566                 PREV_CHAR_BOUNDARY (p, pat);
1567                 pat_ch = STRING_CHAR (p);
1568                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1569                 TRANSLATE (buf_ch, trt, buf_ch);
1570
1571                 if (buf_ch != pat_ch)
1572                   break;
1573
1574                 this_len--;
1575               }
1576
1577             if (this_len == 0)
1578               {
1579                 match_byte = pos_byte - this_pos_byte;
1580                 pos = this_pos;
1581                 pos_byte = this_pos_byte;
1582                 break;
1583               }
1584
1585             DEC_BOTH (pos, pos_byte);
1586           }
1587
1588         n++;
1589       }
1590   else if (lim < pos)
1591     while (n < 0)
1592       {
1593         while (1)
1594           {
1595             /* Try matching at position POS.  */
1596             EMACS_INT this_pos = pos - len;
1597             int this_len = len;
1598             unsigned char *p = pat;
1599
1600             if (this_pos < lim)
1601               goto stop;
1602
1603             while (this_len > 0)
1604               {
1605                 int pat_ch = *p++;
1606                 int buf_ch = FETCH_BYTE (this_pos);
1607                 TRANSLATE (buf_ch, trt, buf_ch);
1608
1609                 if (buf_ch != pat_ch)
1610                   break;
1611                 this_len--;
1612                 this_pos++;
1613               }
1614
1615             if (this_len == 0)
1616               {
1617                 match_byte = len;
1618                 pos -= len;
1619                 break;
1620               }
1621
1622             pos--;
1623           }
1624
1625         n++;
1626       }
1627
1628  stop:
1629   if (n == 0)
1630     {
1631       if (forward)
1632         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1633       else
1634         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1635
1636       return pos;
1637     }
1638   else if (n > 0)
1639     return -n;
1640   else
1641     return n;
1642 }
1643 \f
1644 /* Do Boyer-Moore search N times for the string BASE_PAT,
1645    whose length is LEN/LEN_BYTE,
1646    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1647    DIRECTION says which direction we search in.
1648    TRT and INVERSE_TRT are translation tables.
1649    Characters in PAT are already translated by TRT.
1650
1651    This kind of search works if all the characters in BASE_PAT that
1652    have nontrivial translation are the same aside from the last byte.
1653    This makes it possible to translate just the last byte of a
1654    character, and do so after just a simple test of the context.
1655    CHAR_BASE is nonzero if there is such a non-ASCII character.
1656
1657    If that criterion is not satisfied, do not call this function.  */
1658
1659 static EMACS_INT
1660 boyer_moore (int n, unsigned char *base_pat, int len, int len_byte,
1661              Lisp_Object trt, Lisp_Object inverse_trt,
1662              EMACS_INT pos, EMACS_INT pos_byte,
1663              EMACS_INT lim, EMACS_INT lim_byte, int char_base)
1664 {
1665   int direction = ((n > 0) ? 1 : -1);
1666   register int dirlen;
1667   EMACS_INT limit;
1668   int stride_for_teases = 0;
1669   int BM_tab[0400];
1670   register unsigned char *cursor, *p_limit;
1671   register int i, j;
1672   unsigned char *pat, *pat_end;
1673   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1674
1675   unsigned char simple_translate[0400];
1676   /* These are set to the preceding bytes of a byte to be translated
1677      if char_base is nonzero.  As the maximum byte length of a
1678      multibyte character is 5, we have to check at most four previous
1679      bytes.  */
1680   int translate_prev_byte1 = 0;
1681   int translate_prev_byte2 = 0;
1682   int translate_prev_byte3 = 0;
1683   int translate_prev_byte4 = 0;
1684
1685   /* The general approach is that we are going to maintain that we know
1686      the first (closest to the present position, in whatever direction
1687      we're searching) character that could possibly be the last
1688      (furthest from present position) character of a valid match.  We
1689      advance the state of our knowledge by looking at that character
1690      and seeing whether it indeed matches the last character of the
1691      pattern.  If it does, we take a closer look.  If it does not, we
1692      move our pointer (to putative last characters) as far as is
1693      logically possible.  This amount of movement, which I call a
1694      stride, will be the length of the pattern if the actual character
1695      appears nowhere in the pattern, otherwise it will be the distance
1696      from the last occurrence of that character to the end of the
1697      pattern.  If the amount is zero we have a possible match.  */
1698
1699   /* Here we make a "mickey mouse" BM table.  The stride of the search
1700      is determined only by the last character of the putative match.
1701      If that character does not match, we will stride the proper
1702      distance to propose a match that superimposes it on the last
1703      instance of a character that matches it (per trt), or misses
1704      it entirely if there is none. */
1705
1706   dirlen = len_byte * direction;
1707
1708   /* Record position after the end of the pattern.  */
1709   pat_end = base_pat + len_byte;
1710   /* BASE_PAT points to a character that we start scanning from.
1711      It is the first character in a forward search,
1712      the last character in a backward search.  */
1713   if (direction < 0)
1714     base_pat = pat_end - 1;
1715
1716   /* A character that does not appear in the pattern induces a
1717      stride equal to the pattern length.  */
1718   for (i = 0; i < 0400; i++)
1719     BM_tab[i] = dirlen;
1720
1721   /* We use this for translation, instead of TRT itself.
1722      We fill this in to handle the characters that actually
1723      occur in the pattern.  Others don't matter anyway!  */
1724   for (i = 0; i < 0400; i++)
1725     simple_translate[i] = i;
1726
1727   if (char_base)
1728     {
1729       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1730          byte following them are the target of translation.  */
1731       unsigned char str[MAX_MULTIBYTE_LENGTH];
1732       int len = CHAR_STRING (char_base, str);
1733
1734       translate_prev_byte1 = str[len - 2];
1735       if (len > 2)
1736         {
1737           translate_prev_byte2 = str[len - 3];
1738           if (len > 3)
1739             {
1740               translate_prev_byte3 = str[len - 4];
1741               if (len > 4)
1742                 translate_prev_byte4 = str[len - 5];
1743             }
1744         }
1745     }
1746
1747   i = 0;
1748   while (i != dirlen)
1749     {
1750       unsigned char *ptr = base_pat + i;
1751       i += direction;
1752       if (! NILP (trt))
1753         {
1754           /* If the byte currently looking at is the last of a
1755              character to check case-equivalents, set CH to that
1756              character.  An ASCII character and a non-ASCII character
1757              matching with CHAR_BASE are to be checked.  */
1758           int ch = -1;
1759
1760           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1761             ch = *ptr;
1762           else if (char_base
1763                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1764             {
1765               unsigned char *charstart = ptr - 1;
1766
1767               while (! (CHAR_HEAD_P (*charstart)))
1768                 charstart--;
1769               ch = STRING_CHAR (charstart);
1770               if (char_base != (ch & ~0x3F))
1771                 ch = -1;
1772             }
1773
1774           if (ch >= 0200)
1775             j = (ch & 0x3F) | 0200;
1776           else
1777             j = *ptr;
1778
1779           if (i == dirlen)
1780             stride_for_teases = BM_tab[j];
1781
1782           BM_tab[j] = dirlen - i;
1783           /* A translation table is accompanied by its inverse -- see */
1784           /* comment following downcase_table for details */
1785           if (ch >= 0)
1786             {
1787               int starting_ch = ch;
1788               int starting_j = j;
1789
1790               while (1)
1791                 {
1792                   TRANSLATE (ch, inverse_trt, ch);
1793                   if (ch >= 0200)
1794                     j = (ch & 0x3F) | 0200;
1795                   else
1796                     j = ch;
1797
1798                   /* For all the characters that map into CH,
1799                      set up simple_translate to map the last byte
1800                      into STARTING_J.  */
1801                   simple_translate[j] = starting_j;
1802                   if (ch == starting_ch)
1803                     break;
1804                   BM_tab[j] = dirlen - i;
1805                 }
1806             }
1807         }
1808       else
1809         {
1810           j = *ptr;
1811
1812           if (i == dirlen)
1813             stride_for_teases = BM_tab[j];
1814           BM_tab[j] = dirlen - i;
1815         }
1816       /* stride_for_teases tells how much to stride if we get a
1817          match on the far character but are subsequently
1818          disappointed, by recording what the stride would have been
1819          for that character if the last character had been
1820          different.  */
1821     }
1822   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1823   /* loop invariant - POS_BYTE points at where last char (first
1824      char if reverse) of pattern would align in a possible match.  */
1825   while (n != 0)
1826     {
1827       EMACS_INT tail_end;
1828       unsigned char *tail_end_ptr;
1829
1830       /* It's been reported that some (broken) compiler thinks that
1831          Boolean expressions in an arithmetic context are unsigned.
1832          Using an explicit ?1:0 prevents this.  */
1833       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1834           < 0)
1835         return (n * (0 - direction));
1836       /* First we do the part we can by pointers (maybe nothing) */
1837       QUIT;
1838       pat = base_pat;
1839       limit = pos_byte - dirlen + direction;
1840       if (direction > 0)
1841         {
1842           limit = BUFFER_CEILING_OF (limit);
1843           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1844              can take on without hitting edge of buffer or the gap.  */
1845           limit = min (limit, pos_byte + 20000);
1846           limit = min (limit, lim_byte - 1);
1847         }
1848       else
1849         {
1850           limit = BUFFER_FLOOR_OF (limit);
1851           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1852              can take on without hitting edge of buffer or the gap.  */
1853           limit = max (limit, pos_byte - 20000);
1854           limit = max (limit, lim_byte);
1855         }
1856       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1857       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1858
1859       if ((limit - pos_byte) * direction > 20)
1860         {
1861           unsigned char *p2;
1862
1863           p_limit = BYTE_POS_ADDR (limit);
1864           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1865           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1866           while (1)             /* use one cursor setting as long as i can */
1867             {
1868               if (direction > 0) /* worth duplicating */
1869                 {
1870                   while (cursor <= p_limit)
1871                     {
1872                       if (BM_tab[*cursor] == 0)
1873                         goto hit;
1874                       cursor += BM_tab[*cursor];
1875                     }
1876                 }
1877               else
1878                 {
1879                   while (cursor >= p_limit)
1880                     {
1881                       if (BM_tab[*cursor] == 0)
1882                         goto hit;
1883                       cursor += BM_tab[*cursor];
1884                     }
1885                 }
1886               /* If you are here, cursor is beyond the end of the
1887                  searched region.  You fail to match within the
1888                  permitted region and would otherwise try a character
1889                  beyond that region.  */
1890               break;
1891
1892             hit:
1893               i = dirlen - direction;
1894               if (! NILP (trt))
1895                 {
1896                   while ((i -= direction) + direction != 0)
1897                     {
1898                       int ch;
1899                       cursor -= direction;
1900                       /* Translate only the last byte of a character.  */
1901                       if (! multibyte
1902                           || ((cursor == tail_end_ptr
1903                                || CHAR_HEAD_P (cursor[1]))
1904                               && (CHAR_HEAD_P (cursor[0])
1905                                   /* Check if this is the last byte of
1906                                      a translable character.  */
1907                                   || (translate_prev_byte1 == cursor[-1]
1908                                       && (CHAR_HEAD_P (translate_prev_byte1)
1909                                           || (translate_prev_byte2 == cursor[-2]
1910                                               && (CHAR_HEAD_P (translate_prev_byte2)
1911                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1912                         ch = simple_translate[*cursor];
1913                       else
1914                         ch = *cursor;
1915                       if (pat[i] != ch)
1916                         break;
1917                     }
1918                 }
1919               else
1920                 {
1921                   while ((i -= direction) + direction != 0)
1922                     {
1923                       cursor -= direction;
1924                       if (pat[i] != *cursor)
1925                         break;
1926                     }
1927                 }
1928               cursor += dirlen - i - direction; /* fix cursor */
1929               if (i + direction == 0)
1930                 {
1931                   EMACS_INT position, start, end;
1932
1933                   cursor -= direction;
1934
1935                   position = pos_byte + cursor - p2 + ((direction > 0)
1936                                                        ? 1 - len_byte : 0);
1937                   set_search_regs (position, len_byte);
1938
1939                   if (NILP (Vinhibit_changing_match_data))
1940                     {
1941                       start = search_regs.start[0];
1942                       end = search_regs.end[0];
1943                     }
1944                   else
1945                     /* If Vinhibit_changing_match_data is non-nil,
1946                        search_regs will not be changed.  So let's
1947                        compute start and end here.  */
1948                     {
1949                       start = BYTE_TO_CHAR (position);
1950                       end = BYTE_TO_CHAR (position + len_byte);
1951                     }
1952
1953                   if ((n -= direction) != 0)
1954                     cursor += dirlen; /* to resume search */
1955                   else
1956                     return direction > 0 ? end : start;
1957                 }
1958               else
1959                 cursor += stride_for_teases; /* <sigh> we lose -  */
1960             }
1961           pos_byte += cursor - p2;
1962         }
1963       else
1964         /* Now we'll pick up a clump that has to be done the hard
1965            way because it covers a discontinuity.  */
1966         {
1967           limit = ((direction > 0)
1968                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1969                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1970           limit = ((direction > 0)
1971                    ? min (limit + len_byte, lim_byte - 1)
1972                    : max (limit - len_byte, lim_byte));
1973           /* LIMIT is now the last value POS_BYTE can have
1974              and still be valid for a possible match.  */
1975           while (1)
1976             {
1977               /* This loop can be coded for space rather than
1978                  speed because it will usually run only once.
1979                  (the reach is at most len + 21, and typically
1980                  does not exceed len).  */
1981               while ((limit - pos_byte) * direction >= 0)
1982                 {
1983                   int ch = FETCH_BYTE (pos_byte);
1984                   if (BM_tab[ch] == 0)
1985                     goto hit2;
1986                   pos_byte += BM_tab[ch];
1987                 }
1988               break;    /* ran off the end */
1989
1990             hit2:
1991               /* Found what might be a match.  */
1992               i = dirlen - direction;
1993               while ((i -= direction) + direction != 0)
1994                 {
1995                   int ch;
1996                   unsigned char *ptr;
1997                   pos_byte -= direction;
1998                   ptr = BYTE_POS_ADDR (pos_byte);
1999                   /* Translate only the last byte of a character.  */
2000                   if (! multibyte
2001                       || ((ptr == tail_end_ptr
2002                            || CHAR_HEAD_P (ptr[1]))
2003                           && (CHAR_HEAD_P (ptr[0])
2004                               /* Check if this is the last byte of a
2005                                  translable character.  */
2006                               || (translate_prev_byte1 == ptr[-1]
2007                                   && (CHAR_HEAD_P (translate_prev_byte1)
2008                                       || (translate_prev_byte2 == ptr[-2]
2009                                           && (CHAR_HEAD_P (translate_prev_byte2)
2010                                               || translate_prev_byte3 == ptr[-3])))))))
2011                     ch = simple_translate[*ptr];
2012                   else
2013                     ch = *ptr;
2014                   if (pat[i] != ch)
2015                     break;
2016                 }
2017               /* Above loop has moved POS_BYTE part or all the way
2018                  back to the first pos (last pos if reverse).
2019                  Set it once again at the last (first if reverse) char.  */
2020               pos_byte += dirlen - i - direction;
2021               if (i + direction == 0)
2022                 {
2023                   EMACS_INT position, start, end;
2024                   pos_byte -= direction;
2025
2026                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2027                   set_search_regs (position, len_byte);
2028
2029                   if (NILP (Vinhibit_changing_match_data))
2030                     {
2031                       start = search_regs.start[0];
2032                       end = search_regs.end[0];
2033                     }
2034                   else
2035                     /* If Vinhibit_changing_match_data is non-nil,
2036                        search_regs will not be changed.  So let's
2037                        compute start and end here.  */
2038                     {
2039                       start = BYTE_TO_CHAR (position);
2040                       end = BYTE_TO_CHAR (position + len_byte);
2041                     }
2042
2043                   if ((n -= direction) != 0)
2044                     pos_byte += dirlen; /* to resume search */
2045                   else
2046                     return direction > 0 ? end : start;
2047                 }
2048               else
2049                 pos_byte += stride_for_teases;
2050             }
2051           }
2052       /* We have done one clump.  Can we continue? */
2053       if ((lim_byte - pos_byte) * direction < 0)
2054         return ((0 - n) * direction);
2055     }
2056   return BYTE_TO_CHAR (pos_byte);
2057 }
2058
2059 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2060    for the overall match just found in the current buffer.
2061    Also clear out the match data for registers 1 and up.  */
2062
2063 static void
2064 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2065 {
2066   int i;
2067
2068   if (!NILP (Vinhibit_changing_match_data))
2069     return;
2070
2071   /* Make sure we have registers in which to store
2072      the match position.  */
2073   if (search_regs.num_regs == 0)
2074     {
2075       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2076       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2077       search_regs.num_regs = 2;
2078     }
2079
2080   /* Clear out the other registers.  */
2081   for (i = 1; i < search_regs.num_regs; i++)
2082     {
2083       search_regs.start[i] = -1;
2084       search_regs.end[i] = -1;
2085     }
2086
2087   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2088   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2089   XSETBUFFER (last_thing_searched, current_buffer);
2090 }
2091 \f
2092 /* Given STRING, a string of words separated by word delimiters,
2093    compute a regexp that matches those exact words separated by
2094    arbitrary punctuation.  If LAX is nonzero, the end of the string
2095    need not match a word boundary unless it ends in whitespace.  */
2096
2097 static Lisp_Object
2098 wordify (Lisp_Object string, int lax)
2099 {
2100   register unsigned char *p, *o;
2101   register int i, i_byte, len, punct_count = 0, word_count = 0;
2102   Lisp_Object val;
2103   int prev_c = 0;
2104   int adjust, whitespace_at_end;
2105
2106   CHECK_STRING (string);
2107   p = SDATA (string);
2108   len = SCHARS (string);
2109
2110   for (i = 0, i_byte = 0; i < len; )
2111     {
2112       int c;
2113
2114       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2115
2116       if (SYNTAX (c) != Sword)
2117         {
2118           punct_count++;
2119           if (i > 0 && SYNTAX (prev_c) == Sword)
2120             word_count++;
2121         }
2122
2123       prev_c = c;
2124     }
2125
2126   if (SYNTAX (prev_c) == Sword)
2127     {
2128       word_count++;
2129       whitespace_at_end = 0;
2130     }
2131   else
2132     whitespace_at_end = 1;
2133
2134   if (!word_count)
2135     return empty_unibyte_string;
2136
2137   adjust = - punct_count + 5 * (word_count - 1)
2138     + ((lax && !whitespace_at_end) ? 2 : 4);
2139   if (STRING_MULTIBYTE (string))
2140     val = make_uninit_multibyte_string (len + adjust,
2141                                         SBYTES (string)
2142                                         + adjust);
2143   else
2144     val = make_uninit_string (len + adjust);
2145
2146   o = SDATA (val);
2147   *o++ = '\\';
2148   *o++ = 'b';
2149   prev_c = 0;
2150
2151   for (i = 0, i_byte = 0; i < len; )
2152     {
2153       int c;
2154       int i_byte_orig = i_byte;
2155
2156       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2157
2158       if (SYNTAX (c) == Sword)
2159         {
2160           memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2161           o += i_byte - i_byte_orig;
2162         }
2163       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2164         {
2165           *o++ = '\\';
2166           *o++ = 'W';
2167           *o++ = '\\';
2168           *o++ = 'W';
2169           *o++ = '*';
2170         }
2171
2172       prev_c = c;
2173     }
2174
2175   if (!lax || whitespace_at_end)
2176     {
2177       *o++ = '\\';
2178       *o++ = 'b';
2179     }
2180
2181   return val;
2182 }
2183 \f
2184 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2185        "MSearch backward: ",
2186        doc: /* Search backward from point for STRING.
2187 Set point to the beginning of the occurrence found, and return point.
2188 An optional second argument bounds the search; it is a buffer position.
2189 The match found must not extend before that position.
2190 Optional third argument, if t, means if fail just return nil (no error).
2191  If not nil and not t, position at limit of search and return nil.
2192 Optional fourth argument is repeat count--search for successive occurrences.
2193
2194 Search case-sensitivity is determined by the value of the variable
2195 `case-fold-search', which see.
2196
2197 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2198   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2199 {
2200   return search_command (string, bound, noerror, count, -1, 0, 0);
2201 }
2202
2203 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2204        doc: /* Search forward from point for STRING.
2205 Set point to the end of the occurrence found, and return point.
2206 An optional second argument bounds the search; it is a buffer position.
2207 The match found must not extend after that position.  A value of nil is
2208   equivalent to (point-max).
2209 Optional third argument, if t, means if fail just return nil (no error).
2210   If not nil and not t, move to limit of search and return nil.
2211 Optional fourth argument is repeat count--search for successive occurrences.
2212
2213 Search case-sensitivity is determined by the value of the variable
2214 `case-fold-search', which see.
2215
2216 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2217   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2218 {
2219   return search_command (string, bound, noerror, count, 1, 0, 0);
2220 }
2221
2222 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2223        "sWord search backward: ",
2224        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2225 Set point to the beginning of the occurrence found, and return point.
2226 An optional second argument bounds the search; it is a buffer position.
2227 The match found must not extend before that position.
2228 Optional third argument, if t, means if fail just return nil (no error).
2229   If not nil and not t, move to limit of search and return nil.
2230 Optional fourth argument is repeat count--search for successive occurrences.  */)
2231   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2232 {
2233   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2234 }
2235
2236 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2237        "sWord search: ",
2238        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2239 Set point to the end of the occurrence found, and return point.
2240 An optional second argument bounds the search; it is a buffer position.
2241 The match found must not extend after that position.
2242 Optional third argument, if t, means if fail just return nil (no error).
2243   If not nil and not t, move to limit of search and return nil.
2244 Optional fourth argument is repeat count--search for successive occurrences.  */)
2245   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2246 {
2247   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2248 }
2249
2250 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2251        "sWord search backward: ",
2252        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2253 Set point to the beginning of the occurrence found, and return point.
2254
2255 Unlike `word-search-backward', the end of STRING need not match a word
2256 boundary unless it ends in whitespace.
2257
2258 An optional second argument bounds the search; it is a buffer position.
2259 The match found must not extend before that position.
2260 Optional third argument, if t, means if fail just return nil (no error).
2261   If not nil and not t, move to limit of search and return nil.
2262 Optional fourth argument is repeat count--search for successive occurrences.  */)
2263   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2264 {
2265   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2266 }
2267
2268 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2269        "sWord search: ",
2270        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2271 Set point to the end of the occurrence found, and return point.
2272
2273 Unlike `word-search-forward', the end of STRING need not match a word
2274 boundary unless it ends in whitespace.
2275
2276 An optional second argument bounds the search; it is a buffer position.
2277 The match found must not extend after that position.
2278 Optional third argument, if t, means if fail just return nil (no error).
2279   If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences.  */)
2281   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2282 {
2283   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2284 }
2285
2286 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2287        "sRE search backward: ",
2288        doc: /* Search backward from point for match for regular expression REGEXP.
2289 Set point to the beginning of the match, and return point.
2290 The match found is the one starting last in the buffer
2291 and yet ending before the origin of the search.
2292 An optional second argument bounds the search; it is a buffer position.
2293 The match found must start at or after that position.
2294 Optional third argument, if t, means if fail just return nil (no error).
2295   If not nil and not t, move to limit of search and return nil.
2296 Optional fourth argument is repeat count--search for successive occurrences.
2297 See also the functions `match-beginning', `match-end', `match-string',
2298 and `replace-match'.  */)
2299   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2300 {
2301   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2302 }
2303
2304 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2305        "sRE search: ",
2306        doc: /* Search forward from point for regular expression REGEXP.
2307 Set point to the end of the occurrence found, and return point.
2308 An optional second argument bounds the search; it is a buffer position.
2309 The match found must not extend after that position.
2310 Optional third argument, if t, means if fail just return nil (no error).
2311   If not nil and not t, move to limit of search and return nil.
2312 Optional fourth argument is repeat count--search for successive occurrences.
2313 See also the functions `match-beginning', `match-end', `match-string',
2314 and `replace-match'.  */)
2315   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2316 {
2317   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2318 }
2319
2320 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2321        "sPosix search backward: ",
2322        doc: /* Search backward from point for match for regular expression REGEXP.
2323 Find the longest match in accord with Posix regular expression rules.
2324 Set point to the beginning of the match, and return point.
2325 The match found is the one starting last in the buffer
2326 and yet ending before the origin of the search.
2327 An optional second argument bounds the search; it is a buffer position.
2328 The match found must start at or after that position.
2329 Optional third argument, if t, means if fail just return nil (no error).
2330   If not nil and not t, move to limit of search and return nil.
2331 Optional fourth argument is repeat count--search for successive occurrences.
2332 See also the functions `match-beginning', `match-end', `match-string',
2333 and `replace-match'.  */)
2334   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2335 {
2336   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2337 }
2338
2339 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2340        "sPosix search: ",
2341        doc: /* Search forward from point for regular expression REGEXP.
2342 Find the longest match in accord with Posix regular expression rules.
2343 Set point to the end of the occurrence found, and return point.
2344 An optional second argument bounds the search; it is a buffer position.
2345 The match found must not extend after that position.
2346 Optional third argument, if t, means if fail just return nil (no error).
2347   If not nil and not t, move to limit of search and return nil.
2348 Optional fourth argument is repeat count--search for successive occurrences.
2349 See also the functions `match-beginning', `match-end', `match-string',
2350 and `replace-match'.  */)
2351   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2352 {
2353   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2354 }
2355 \f
2356 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2357        doc: /* Replace text matched by last search with NEWTEXT.
2358 Leave point at the end of the replacement text.
2359
2360 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2361 Otherwise maybe capitalize the whole text, or maybe just word initials,
2362 based on the replaced text.
2363 If the replaced text has only capital letters
2364 and has at least one multiletter word, convert NEWTEXT to all caps.
2365 Otherwise if all words are capitalized in the replaced text,
2366 capitalize each word in NEWTEXT.
2367
2368 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2369 Otherwise treat `\\' as special:
2370   `\\&' in NEWTEXT means substitute original matched text.
2371   `\\N' means substitute what matched the Nth `\\(...\\)'.
2372        If Nth parens didn't match, substitute nothing.
2373   `\\\\' means insert one `\\'.
2374 Case conversion does not apply to these substitutions.
2375
2376 FIXEDCASE and LITERAL are optional arguments.
2377
2378 The optional fourth argument STRING can be a string to modify.
2379 This is meaningful when the previous match was done against STRING,
2380 using `string-match'.  When used this way, `replace-match'
2381 creates and returns a new string made by copying STRING and replacing
2382 the part of STRING that was matched.
2383
2384 The optional fifth argument SUBEXP specifies a subexpression;
2385 it says to replace just that subexpression with NEWTEXT,
2386 rather than replacing the entire matched text.
2387 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2388 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2389 NEWTEXT in place of subexp N.
2390 This is useful only after a regular expression search or match,
2391 since only regular expressions have distinguished subexpressions.  */)
2392   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2393 {
2394   enum { nochange, all_caps, cap_initial } case_action;
2395   register int pos, pos_byte;
2396   int some_multiletter_word;
2397   int some_lowercase;
2398   int some_uppercase;
2399   int some_nonuppercase_initial;
2400   register int c, prevc;
2401   int sub;
2402   EMACS_INT opoint, newpoint;
2403
2404   CHECK_STRING (newtext);
2405
2406   if (! NILP (string))
2407     CHECK_STRING (string);
2408
2409   case_action = nochange;       /* We tried an initialization */
2410                                 /* but some C compilers blew it */
2411
2412   if (search_regs.num_regs <= 0)
2413     error ("`replace-match' called before any match found");
2414
2415   if (NILP (subexp))
2416     sub = 0;
2417   else
2418     {
2419       CHECK_NUMBER (subexp);
2420       sub = XINT (subexp);
2421       if (sub < 0 || sub >= search_regs.num_regs)
2422         args_out_of_range (subexp, make_number (search_regs.num_regs));
2423     }
2424
2425   if (NILP (string))
2426     {
2427       if (search_regs.start[sub] < BEGV
2428           || search_regs.start[sub] > search_regs.end[sub]
2429           || search_regs.end[sub] > ZV)
2430         args_out_of_range (make_number (search_regs.start[sub]),
2431                            make_number (search_regs.end[sub]));
2432     }
2433   else
2434     {
2435       if (search_regs.start[sub] < 0
2436           || search_regs.start[sub] > search_regs.end[sub]
2437           || search_regs.end[sub] > SCHARS (string))
2438         args_out_of_range (make_number (search_regs.start[sub]),
2439                            make_number (search_regs.end[sub]));
2440     }
2441
2442   if (NILP (fixedcase))
2443     {
2444       /* Decide how to casify by examining the matched text. */
2445       EMACS_INT last;
2446
2447       pos = search_regs.start[sub];
2448       last = search_regs.end[sub];
2449
2450       if (NILP (string))
2451         pos_byte = CHAR_TO_BYTE (pos);
2452       else
2453         pos_byte = string_char_to_byte (string, pos);
2454
2455       prevc = '\n';
2456       case_action = all_caps;
2457
2458       /* some_multiletter_word is set nonzero if any original word
2459          is more than one letter long. */
2460       some_multiletter_word = 0;
2461       some_lowercase = 0;
2462       some_nonuppercase_initial = 0;
2463       some_uppercase = 0;
2464
2465       while (pos < last)
2466         {
2467           if (NILP (string))
2468             {
2469               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2470               INC_BOTH (pos, pos_byte);
2471             }
2472           else
2473             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2474
2475           if (LOWERCASEP (c))
2476             {
2477               /* Cannot be all caps if any original char is lower case */
2478
2479               some_lowercase = 1;
2480               if (SYNTAX (prevc) != Sword)
2481                 some_nonuppercase_initial = 1;
2482               else
2483                 some_multiletter_word = 1;
2484             }
2485           else if (UPPERCASEP (c))
2486             {
2487               some_uppercase = 1;
2488               if (SYNTAX (prevc) != Sword)
2489                 ;
2490               else
2491                 some_multiletter_word = 1;
2492             }
2493           else
2494             {
2495               /* If the initial is a caseless word constituent,
2496                  treat that like a lowercase initial.  */
2497               if (SYNTAX (prevc) != Sword)
2498                 some_nonuppercase_initial = 1;
2499             }
2500
2501           prevc = c;
2502         }
2503
2504       /* Convert to all caps if the old text is all caps
2505          and has at least one multiletter word.  */
2506       if (! some_lowercase && some_multiletter_word)
2507         case_action = all_caps;
2508       /* Capitalize each word, if the old text has all capitalized words.  */
2509       else if (!some_nonuppercase_initial && some_multiletter_word)
2510         case_action = cap_initial;
2511       else if (!some_nonuppercase_initial && some_uppercase)
2512         /* Should x -> yz, operating on X, give Yz or YZ?
2513            We'll assume the latter.  */
2514         case_action = all_caps;
2515       else
2516         case_action = nochange;
2517     }
2518
2519   /* Do replacement in a string.  */
2520   if (!NILP (string))
2521     {
2522       Lisp_Object before, after;
2523
2524       before = Fsubstring (string, make_number (0),
2525                            make_number (search_regs.start[sub]));
2526       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2527
2528       /* Substitute parts of the match into NEWTEXT
2529          if desired.  */
2530       if (NILP (literal))
2531         {
2532           EMACS_INT lastpos = 0;
2533           EMACS_INT lastpos_byte = 0;
2534           /* We build up the substituted string in ACCUM.  */
2535           Lisp_Object accum;
2536           Lisp_Object middle;
2537           int length = SBYTES (newtext);
2538
2539           accum = Qnil;
2540
2541           for (pos_byte = 0, pos = 0; pos_byte < length;)
2542             {
2543               int substart = -1;
2544               int subend = 0;
2545               int delbackslash = 0;
2546
2547               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2548
2549               if (c == '\\')
2550                 {
2551                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2552
2553                   if (c == '&')
2554                     {
2555                       substart = search_regs.start[sub];
2556                       subend = search_regs.end[sub];
2557                     }
2558                   else if (c >= '1' && c <= '9')
2559                     {
2560                       if (search_regs.start[c - '0'] >= 0
2561                           && c <= search_regs.num_regs + '0')
2562                         {
2563                           substart = search_regs.start[c - '0'];
2564                           subend = search_regs.end[c - '0'];
2565                         }
2566                       else
2567                         {
2568                           /* If that subexp did not match,
2569                              replace \\N with nothing.  */
2570                           substart = 0;
2571                           subend = 0;
2572                         }
2573                     }
2574                   else if (c == '\\')
2575                     delbackslash = 1;
2576                   else
2577                     error ("Invalid use of `\\' in replacement text");
2578                 }
2579               if (substart >= 0)
2580                 {
2581                   if (pos - 2 != lastpos)
2582                     middle = substring_both (newtext, lastpos,
2583                                              lastpos_byte,
2584                                              pos - 2, pos_byte - 2);
2585                   else
2586                     middle = Qnil;
2587                   accum = concat3 (accum, middle,
2588                                    Fsubstring (string,
2589                                                make_number (substart),
2590                                                make_number (subend)));
2591                   lastpos = pos;
2592                   lastpos_byte = pos_byte;
2593                 }
2594               else if (delbackslash)
2595                 {
2596                   middle = substring_both (newtext, lastpos,
2597                                            lastpos_byte,
2598                                            pos - 1, pos_byte - 1);
2599
2600                   accum = concat2 (accum, middle);
2601                   lastpos = pos;
2602                   lastpos_byte = pos_byte;
2603                 }
2604             }
2605
2606           if (pos != lastpos)
2607             middle = substring_both (newtext, lastpos,
2608                                      lastpos_byte,
2609                                      pos, pos_byte);
2610           else
2611             middle = Qnil;
2612
2613           newtext = concat2 (accum, middle);
2614         }
2615
2616       /* Do case substitution in NEWTEXT if desired.  */
2617       if (case_action == all_caps)
2618         newtext = Fupcase (newtext);
2619       else if (case_action == cap_initial)
2620         newtext = Fupcase_initials (newtext);
2621
2622       return concat3 (before, newtext, after);
2623     }
2624
2625   /* Record point, then move (quietly) to the start of the match.  */
2626   if (PT >= search_regs.end[sub])
2627     opoint = PT - ZV;
2628   else if (PT > search_regs.start[sub])
2629     opoint = search_regs.end[sub] - ZV;
2630   else
2631     opoint = PT;
2632
2633   /* If we want non-literal replacement,
2634      perform substitution on the replacement string.  */
2635   if (NILP (literal))
2636     {
2637       int length = SBYTES (newtext);
2638       unsigned char *substed;
2639       int substed_alloc_size, substed_len;
2640       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2641       int str_multibyte = STRING_MULTIBYTE (newtext);
2642       Lisp_Object rev_tbl;
2643       int really_changed = 0;
2644
2645       rev_tbl = Qnil;
2646
2647       substed_alloc_size = length * 2 + 100;
2648       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2649       substed_len = 0;
2650
2651       /* Go thru NEWTEXT, producing the actual text to insert in
2652          SUBSTED while adjusting multibyteness to that of the current
2653          buffer.  */
2654
2655       for (pos_byte = 0, pos = 0; pos_byte < length;)
2656         {
2657           unsigned char str[MAX_MULTIBYTE_LENGTH];
2658           const unsigned char *add_stuff = NULL;
2659           int add_len = 0;
2660           int idx = -1;
2661
2662           if (str_multibyte)
2663             {
2664               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2665               if (!buf_multibyte)
2666                 c = multibyte_char_to_unibyte (c, rev_tbl);
2667             }
2668           else
2669             {
2670               /* Note that we don't have to increment POS.  */
2671               c = SREF (newtext, pos_byte++);
2672               if (buf_multibyte)
2673                 MAKE_CHAR_MULTIBYTE (c);
2674             }
2675
2676           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2677              or set IDX to a match index, which means put that part
2678              of the buffer text into SUBSTED.  */
2679
2680           if (c == '\\')
2681             {
2682               really_changed = 1;
2683
2684               if (str_multibyte)
2685                 {
2686                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2687                                                       pos, pos_byte);
2688                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2689                     c = multibyte_char_to_unibyte (c, rev_tbl);
2690                 }
2691               else
2692                 {
2693                   c = SREF (newtext, pos_byte++);
2694                   if (buf_multibyte)
2695                     MAKE_CHAR_MULTIBYTE (c);
2696                 }
2697
2698               if (c == '&')
2699                 idx = sub;
2700               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2701                 {
2702                   if (search_regs.start[c - '0'] >= 1)
2703                     idx = c - '0';
2704                 }
2705               else if (c == '\\')
2706                 add_len = 1, add_stuff = "\\";
2707               else
2708                 {
2709                   xfree (substed);
2710                   error ("Invalid use of `\\' in replacement text");
2711                 }
2712             }
2713           else
2714             {
2715               add_len = CHAR_STRING (c, str);
2716               add_stuff = str;
2717             }
2718
2719           /* If we want to copy part of a previous match,
2720              set up ADD_STUFF and ADD_LEN to point to it.  */
2721           if (idx >= 0)
2722             {
2723               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2724               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2725               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2726                 move_gap (search_regs.start[idx]);
2727               add_stuff = BYTE_POS_ADDR (begbyte);
2728             }
2729
2730           /* Now the stuff we want to add to SUBSTED
2731              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2732
2733           /* Make sure SUBSTED is big enough.  */
2734           if (substed_len + add_len >= substed_alloc_size)
2735             {
2736               substed_alloc_size = substed_len + add_len + 500;
2737               substed = (unsigned char *) xrealloc (substed,
2738                                                     substed_alloc_size + 1);
2739             }
2740
2741           /* Now add to the end of SUBSTED.  */
2742           if (add_stuff)
2743             {
2744               memcpy (substed + substed_len, add_stuff, add_len);
2745               substed_len += add_len;
2746             }
2747         }
2748
2749       if (really_changed)
2750         {
2751           if (buf_multibyte)
2752             {
2753               int nchars = multibyte_chars_in_text (substed, substed_len);
2754
2755               newtext = make_multibyte_string (substed, nchars, substed_len);
2756             }
2757           else
2758             newtext = make_unibyte_string (substed, substed_len);
2759         }
2760       xfree (substed);
2761     }
2762
2763   /* Replace the old text with the new in the cleanest possible way.  */
2764   replace_range (search_regs.start[sub], search_regs.end[sub],
2765                  newtext, 1, 0, 1);
2766   newpoint = search_regs.start[sub] + SCHARS (newtext);
2767
2768   if (case_action == all_caps)
2769     Fupcase_region (make_number (search_regs.start[sub]),
2770                     make_number (newpoint));
2771   else if (case_action == cap_initial)
2772     Fupcase_initials_region (make_number (search_regs.start[sub]),
2773                              make_number (newpoint));
2774
2775   /* Adjust search data for this change.  */
2776   {
2777     EMACS_INT oldend = search_regs.end[sub];
2778     EMACS_INT oldstart = search_regs.start[sub];
2779     EMACS_INT change = newpoint - search_regs.end[sub];
2780     int i;
2781
2782     for (i = 0; i < search_regs.num_regs; i++)
2783       {
2784         if (search_regs.start[i] >= oldend)
2785           search_regs.start[i] += change;
2786         else if (search_regs.start[i] > oldstart)
2787           search_regs.start[i] = oldstart;
2788         if (search_regs.end[i] >= oldend)
2789           search_regs.end[i] += change;
2790         else if (search_regs.end[i] > oldstart)
2791           search_regs.end[i] = oldstart;
2792       }
2793   }
2794
2795   /* Put point back where it was in the text.  */
2796   if (opoint <= 0)
2797     TEMP_SET_PT (opoint + ZV);
2798   else
2799     TEMP_SET_PT (opoint);
2800
2801   /* Now move point "officially" to the start of the inserted replacement.  */
2802   move_if_not_intangible (newpoint);
2803
2804   return Qnil;
2805 }
2806 \f
2807 static Lisp_Object
2808 match_limit (Lisp_Object num, int beginningp)
2809 {
2810   register int n;
2811
2812   CHECK_NUMBER (num);
2813   n = XINT (num);
2814   if (n < 0)
2815     args_out_of_range (num, make_number (0));
2816   if (search_regs.num_regs <= 0)
2817     error ("No match data, because no search succeeded");
2818   if (n >= search_regs.num_regs
2819       || search_regs.start[n] < 0)
2820     return Qnil;
2821   return (make_number ((beginningp) ? search_regs.start[n]
2822                                     : search_regs.end[n]));
2823 }
2824
2825 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2826        doc: /* Return position of start of text matched by last search.
2827 SUBEXP, a number, specifies which parenthesized expression in the last
2828   regexp.
2829 Value is nil if SUBEXPth pair didn't match, or there were less than
2830   SUBEXP pairs.
2831 Zero means the entire text matched by the whole regexp or whole string.  */)
2832   (Lisp_Object subexp)
2833 {
2834   return match_limit (subexp, 1);
2835 }
2836
2837 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2838        doc: /* Return position of end of text matched by last search.
2839 SUBEXP, a number, specifies which parenthesized expression in the last
2840   regexp.
2841 Value is nil if SUBEXPth pair didn't match, or there were less than
2842   SUBEXP pairs.
2843 Zero means the entire text matched by the whole regexp or whole string.  */)
2844   (Lisp_Object subexp)
2845 {
2846   return match_limit (subexp, 0);
2847 }
2848
2849 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2850        doc: /* Return a list containing all info on what the last search matched.
2851 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2852 All the elements are markers or nil (nil if the Nth pair didn't match)
2853 if the last match was on a buffer; integers or nil if a string was matched.
2854 Use `set-match-data' to reinstate the data in this list.
2855
2856 If INTEGERS (the optional first argument) is non-nil, always use
2857 integers \(rather than markers) to represent buffer positions.  In
2858 this case, and if the last match was in a buffer, the buffer will get
2859 stored as one additional element at the end of the list.
2860
2861 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2862 enough to hold all the values, and if INTEGERS is non-nil, no consing
2863 is done.
2864
2865 If optional third arg RESEAT is non-nil, any previous markers on the
2866 REUSE list will be modified to point to nowhere.
2867
2868 Return value is undefined if the last search failed.  */)
2869   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2870 {
2871   Lisp_Object tail, prev;
2872   Lisp_Object *data;
2873   int i, len;
2874
2875   if (!NILP (reseat))
2876     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2877       if (MARKERP (XCAR (tail)))
2878         {
2879           unchain_marker (XMARKER (XCAR (tail)));
2880           XSETCAR (tail, Qnil);
2881         }
2882
2883   if (NILP (last_thing_searched))
2884     return Qnil;
2885
2886   prev = Qnil;
2887
2888   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2889                                  * sizeof (Lisp_Object));
2890
2891   len = 0;
2892   for (i = 0; i < search_regs.num_regs; i++)
2893     {
2894       int start = search_regs.start[i];
2895       if (start >= 0)
2896         {
2897           if (EQ (last_thing_searched, Qt)
2898               || ! NILP (integers))
2899             {
2900               XSETFASTINT (data[2 * i], start);
2901               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2902             }
2903           else if (BUFFERP (last_thing_searched))
2904             {
2905               data[2 * i] = Fmake_marker ();
2906               Fset_marker (data[2 * i],
2907                            make_number (start),
2908                            last_thing_searched);
2909               data[2 * i + 1] = Fmake_marker ();
2910               Fset_marker (data[2 * i + 1],
2911                            make_number (search_regs.end[i]),
2912                            last_thing_searched);
2913             }
2914           else
2915             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2916             abort ();
2917
2918           len = 2 * i + 2;
2919         }
2920       else
2921         data[2 * i] = data[2 * i + 1] = Qnil;
2922     }
2923
2924   if (BUFFERP (last_thing_searched) && !NILP (integers))
2925     {
2926       data[len] = last_thing_searched;
2927       len++;
2928     }
2929
2930   /* If REUSE is not usable, cons up the values and return them.  */
2931   if (! CONSP (reuse))
2932     return Flist (len, data);
2933
2934   /* If REUSE is a list, store as many value elements as will fit
2935      into the elements of REUSE.  */
2936   for (i = 0, tail = reuse; CONSP (tail);
2937        i++, tail = XCDR (tail))
2938     {
2939       if (i < len)
2940         XSETCAR (tail, data[i]);
2941       else
2942         XSETCAR (tail, Qnil);
2943       prev = tail;
2944     }
2945
2946   /* If we couldn't fit all value elements into REUSE,
2947      cons up the rest of them and add them to the end of REUSE.  */
2948   if (i < len)
2949     XSETCDR (prev, Flist (len - i, data + i));
2950
2951   return reuse;
2952 }
2953
2954 /* We used to have an internal use variant of `reseat' described as:
2955
2956       If RESEAT is `evaporate', put the markers back on the free list
2957       immediately.  No other references to the markers must exist in this
2958       case, so it is used only internally on the unwind stack and
2959       save-match-data from Lisp.
2960
2961    But it was ill-conceived: those supposedly-internal markers get exposed via
2962    the undo-list, so freeing them here is unsafe.  */
2963
2964 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2965        doc: /* Set internal data on last search match from elements of LIST.
2966 LIST should have been created by calling `match-data' previously.
2967
2968 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2969   (register Lisp_Object list, Lisp_Object reseat)
2970 {
2971   register int i;
2972   register Lisp_Object marker;
2973
2974   if (running_asynch_code)
2975     save_search_regs ();
2976
2977   CHECK_LIST (list);
2978
2979   /* Unless we find a marker with a buffer or an explicit buffer
2980      in LIST, assume that this match data came from a string.  */
2981   last_thing_searched = Qt;
2982
2983   /* Allocate registers if they don't already exist.  */
2984   {
2985     int length = XFASTINT (Flength (list)) / 2;
2986
2987     if (length > search_regs.num_regs)
2988       {
2989         if (search_regs.num_regs == 0)
2990           {
2991             search_regs.start
2992               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2993             search_regs.end
2994               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2995           }
2996         else
2997           {
2998             search_regs.start
2999               = (regoff_t *) xrealloc (search_regs.start,
3000                                        length * sizeof (regoff_t));
3001             search_regs.end
3002               = (regoff_t *) xrealloc (search_regs.end,
3003                                        length * sizeof (regoff_t));
3004           }
3005
3006         for (i = search_regs.num_regs; i < length; i++)
3007           search_regs.start[i] = -1;
3008
3009         search_regs.num_regs = length;
3010       }
3011
3012     for (i = 0; CONSP (list); i++)
3013       {
3014         marker = XCAR (list);
3015         if (BUFFERP (marker))
3016           {
3017             last_thing_searched = marker;
3018             break;
3019           }
3020         if (i >= length)
3021           break;
3022         if (NILP (marker))
3023           {
3024             search_regs.start[i] = -1;
3025             list = XCDR (list);
3026           }
3027         else
3028           {
3029             EMACS_INT from;
3030             Lisp_Object m;
3031
3032             m = marker;
3033             if (MARKERP (marker))
3034               {
3035                 if (XMARKER (marker)->buffer == 0)
3036                   XSETFASTINT (marker, 0);
3037                 else
3038                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3039               }
3040
3041             CHECK_NUMBER_COERCE_MARKER (marker);
3042             from = XINT (marker);
3043
3044             if (!NILP (reseat) && MARKERP (m))
3045               {
3046                 unchain_marker (XMARKER (m));
3047                 XSETCAR (list, Qnil);
3048               }
3049
3050             if ((list = XCDR (list), !CONSP (list)))
3051               break;
3052
3053             m = marker = XCAR (list);
3054
3055             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3056               XSETFASTINT (marker, 0);
3057
3058             CHECK_NUMBER_COERCE_MARKER (marker);
3059             search_regs.start[i] = from;
3060             search_regs.end[i] = XINT (marker);
3061
3062             if (!NILP (reseat) && MARKERP (m))
3063               {
3064                 unchain_marker (XMARKER (m));
3065                 XSETCAR (list, Qnil);
3066               }
3067           }
3068         list = XCDR (list);
3069       }
3070
3071     for (; i < search_regs.num_regs; i++)
3072       search_regs.start[i] = -1;
3073   }
3074
3075   return Qnil;
3076 }
3077
3078 /* If non-zero the match data have been saved in saved_search_regs
3079    during the execution of a sentinel or filter. */
3080 static int search_regs_saved;
3081 static struct re_registers saved_search_regs;
3082 static Lisp_Object saved_last_thing_searched;
3083
3084 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3085    if asynchronous code (filter or sentinel) is running. */
3086 static void
3087 save_search_regs (void)
3088 {
3089   if (!search_regs_saved)
3090     {
3091       saved_search_regs.num_regs = search_regs.num_regs;
3092       saved_search_regs.start = search_regs.start;
3093       saved_search_regs.end = search_regs.end;
3094       saved_last_thing_searched = last_thing_searched;
3095       last_thing_searched = Qnil;
3096       search_regs.num_regs = 0;
3097       search_regs.start = 0;
3098       search_regs.end = 0;
3099
3100       search_regs_saved = 1;
3101     }
3102 }
3103
3104 /* Called upon exit from filters and sentinels. */
3105 void
3106 restore_search_regs (void)
3107 {
3108   if (search_regs_saved)
3109     {
3110       if (search_regs.num_regs > 0)
3111         {
3112           xfree (search_regs.start);
3113           xfree (search_regs.end);
3114         }
3115       search_regs.num_regs = saved_search_regs.num_regs;
3116       search_regs.start = saved_search_regs.start;
3117       search_regs.end = saved_search_regs.end;
3118       last_thing_searched = saved_last_thing_searched;
3119       saved_last_thing_searched = Qnil;
3120       search_regs_saved = 0;
3121     }
3122 }
3123
3124 static Lisp_Object
3125 unwind_set_match_data (Lisp_Object list)
3126 {
3127   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3128   return Fset_match_data (list, Qt);
3129 }
3130
3131 /* Called to unwind protect the match data.  */
3132 void
3133 record_unwind_save_match_data (void)
3134 {
3135   record_unwind_protect (unwind_set_match_data,
3136                          Fmatch_data (Qnil, Qnil, Qnil));
3137 }
3138
3139 /* Quote a string to inactivate reg-expr chars */
3140
3141 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3142        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3143   (Lisp_Object string)
3144 {
3145   register unsigned char *in, *out, *end;
3146   register unsigned char *temp;
3147   int backslashes_added = 0;
3148
3149   CHECK_STRING (string);
3150
3151   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3152
3153   /* Now copy the data into the new string, inserting escapes. */
3154
3155   in = SDATA (string);
3156   end = in + SBYTES (string);
3157   out = temp;
3158
3159   for (; in != end; in++)
3160     {
3161       if (*in == '['
3162           || *in == '*' || *in == '.' || *in == '\\'
3163           || *in == '?' || *in == '+'
3164           || *in == '^' || *in == '$')
3165         *out++ = '\\', backslashes_added++;
3166       *out++ = *in;
3167     }
3168
3169   return make_specified_string (temp,
3170                                 SCHARS (string) + backslashes_added,
3171                                 out - temp,
3172                                 STRING_MULTIBYTE (string));
3173 }
3174 \f
3175 void
3176 syms_of_search (void)
3177 {
3178   register int i;
3179
3180   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3181     {
3182       searchbufs[i].buf.allocated = 100;
3183       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3184       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3185       searchbufs[i].regexp = Qnil;
3186       searchbufs[i].whitespace_regexp = Qnil;
3187       searchbufs[i].syntax_table = Qnil;
3188       staticpro (&searchbufs[i].regexp);
3189       staticpro (&searchbufs[i].whitespace_regexp);
3190       staticpro (&searchbufs[i].syntax_table);
3191       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3192     }
3193   searchbuf_head = &searchbufs[0];
3194
3195   Qsearch_failed = intern_c_string ("search-failed");
3196   staticpro (&Qsearch_failed);
3197   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3198   staticpro (&Qinvalid_regexp);
3199
3200   Fput (Qsearch_failed, Qerror_conditions,
3201         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3202   Fput (Qsearch_failed, Qerror_message,
3203         make_pure_c_string ("Search failed"));
3204
3205   Fput (Qinvalid_regexp, Qerror_conditions,
3206         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3207   Fput (Qinvalid_regexp, Qerror_message,
3208         make_pure_c_string ("Invalid regexp"));
3209
3210   last_thing_searched = Qnil;
3211   staticpro (&last_thing_searched);
3212
3213   saved_last_thing_searched = Qnil;
3214   staticpro (&saved_last_thing_searched);
3215
3216   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3217       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3218 Some commands use this for user-specified regexps.
3219 Spaces that occur inside character classes or repetition operators
3220 or other such regexp constructs are not replaced with this.
3221 A value of nil (which is the normal value) means treat spaces literally.  */);
3222   Vsearch_spaces_regexp = Qnil;
3223
3224   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3225       doc: /* Internal use only.
3226 If non-nil, the primitive searching and matching functions
3227 such as `looking-at', `string-match', `re-search-forward', etc.,
3228 do not set the match data.  The proper way to use this variable
3229 is to bind it with `let' around a small expression.  */);
3230   Vinhibit_changing_match_data = Qnil;
3231
3232   defsubr (&Slooking_at);
3233   defsubr (&Sposix_looking_at);
3234   defsubr (&Sstring_match);
3235   defsubr (&Sposix_string_match);
3236   defsubr (&Ssearch_forward);
3237   defsubr (&Ssearch_backward);
3238   defsubr (&Sword_search_forward);
3239   defsubr (&Sword_search_backward);
3240   defsubr (&Sword_search_forward_lax);
3241   defsubr (&Sword_search_backward_lax);
3242   defsubr (&Sre_search_forward);
3243   defsubr (&Sre_search_backward);
3244   defsubr (&Sposix_search_forward);
3245   defsubr (&Sposix_search_backward);
3246   defsubr (&Sreplace_match);
3247   defsubr (&Smatch_beginning);
3248   defsubr (&Smatch_end);
3249   defsubr (&Smatch_data);
3250   defsubr (&Sset_match_data);
3251   defsubr (&Sregexp_quote);
3252 }
3253
3254 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3255    (do not change this comment) */