src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs ();
 101 static void save_search_regs ();
 102 static int simple_search ();
 103 static int boyer_moore ();
 104 static int search_buffer ();
 105 static void matcher_overflow () NO_RETURN;
 106
 107 static void
 108 matcher_overflow ()
 109 {
 110   error ("Stack overflow in regexp matcher");
 111 }
 112
 113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 114    PATTERN is the pattern to compile.
 115    CP is the place to put the result.
 116    TRANSLATE is a translation table for ignoring case, or nil for none.
 117    REGP is the structure that says where to store the "register"
 118    values that will result from matching this pattern.
 119    If it is 0, we should compile the pattern not to record any
 120    subexpression bounds.
 121    POSIX is nonzero if we want full backtracking (POSIX style)
 122    for this pattern.  0 means backtrack only enough to get a valid match.
 123
 124    The behavior also depends on Vsearch_spaces_regexp.  */
 125
 126 static void
 127 compile_pattern_1 (cp, pattern, translate, regp, posix)
 128      struct regexp_cache *cp;
 129      Lisp_Object pattern;
 130      Lisp_Object translate;
 131      struct re_registers *regp;
 132      int posix;
 133 {
 134   char *val;
 135   reg_syntax_t old;
 136
 137   cp->regexp = Qnil;
 138   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 139   cp->posix = posix;
 140   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 141   cp->buf.charset_unibyte = charset_unibyte;
 142   if (STRINGP (Vsearch_spaces_regexp))
 143     cp->whitespace_regexp = Vsearch_spaces_regexp;
 144   else
 145     cp->whitespace_regexp = Qnil;
 146
 147   /* rms: I think BLOCK_INPUT is not needed here any more,
 148      because regex.c defines malloc to call xmalloc.
 149      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 150      So let's turn it off.  */
 151   /*  BLOCK_INPUT;  */
 152   old = re_set_syntax (RE_SYNTAX_EMACS
 153                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 154
 155   if (STRINGP (Vsearch_spaces_regexp))
 156     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 157   else
 158     re_set_whitespace_regexp (NULL);
 159
 160   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 161                                      SBYTES (pattern), &cp->buf);
 162
 163   /* If the compiled pattern hard codes some of the contents of the
 164      syntax-table, it can only be reused with *this* syntax table.  */
 165   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 166
 167   re_set_whitespace_regexp (NULL);
 168
 169   re_set_syntax (old);
 170   /* UNBLOCK_INPUT;  */
 171   if (val)
 172     xsignal1 (Qinvalid_regexp, build_string (val));
 173
 174   cp->regexp = Fcopy_sequence (pattern);
 175 }
 176
 177 /* Shrink each compiled regexp buffer in the cache
 178    to the size actually used right now.
 179    This is called from garbage collection.  */
 180
 181 void
 182 shrink_regexp_cache ()
 183 {
 184   struct regexp_cache *cp;
 185
 186   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 187     {
 188       cp->buf.allocated = cp->buf.used;
 189       cp->buf.buffer
 190         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 191     }
 192 }
 193
 194 /* Clear the regexp cache w.r.t. a particular syntax table,
 195    because it was changed.
 196    There is no danger of memory leak here because re_compile_pattern
 197    automagically manages the memory in each re_pattern_buffer struct,
 198    based on its `allocated' and `buffer' values.  */
 199 void
 200 clear_regexp_cache ()
 201 {
 202   int i;
 203
 204   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 205     /* It's tempting to compare with the syntax-table we've actually changd,
 206        but it's not sufficient because char-table inheritance mewans that
 207        modifying one syntax-table can change others at the same time.  */
 208     if (!EQ (searchbufs[i].syntax_table, Qt))
 209       searchbufs[i].regexp = Qnil;
 210 }
 211
 212 /* Compile a regexp if necessary, but first check to see if there's one in
 213    the cache.
 214    PATTERN is the pattern to compile.
 215    TRANSLATE is a translation table for ignoring case, or nil for none.
 216    REGP is the structure that says where to store the "register"
 217    values that will result from matching this pattern.
 218    If it is 0, we should compile the pattern not to record any
 219    subexpression bounds.
 220    POSIX is nonzero if we want full backtracking (POSIX style)
 221    for this pattern.  0 means backtrack only enough to get a valid match.  */
 222
 223 struct re_pattern_buffer *
 224 compile_pattern (pattern, regp, translate, posix, multibyte)
 225      Lisp_Object pattern;
 226      struct re_registers *regp;
 227      Lisp_Object translate;
 228      int posix, multibyte;
 229 {
 230   struct regexp_cache *cp, **cpp;
 231
 232   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 233     {
 234       cp = *cpp;
 235       /* Entries are initialized to nil, and may be set to nil by
 236          compile_pattern_1 if the pattern isn't valid.  Don't apply
 237          string accessors in those cases.  However, compile_pattern_1
 238          is only applied to the cache entry we pick here to reuse.  So
 239          nil should never appear before a non-nil entry.  */
 240       if (NILP (cp->regexp))
 241         goto compile_it;
 242       if (SCHARS (cp->regexp) == SCHARS (pattern)
 243           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 244           && !NILP (Fstring_equal (cp->regexp, pattern))
 245           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 246           && cp->posix == posix
 247           && (EQ (cp->syntax_table, Qt)
 248               || EQ (cp->syntax_table, current_buffer->syntax_table))
 249           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 250           && cp->buf.charset_unibyte == charset_unibyte)
 251         break;
 252
 253       /* If we're at the end of the cache, compile into the nil cell
 254          we found, or the last (least recently used) cell with a
 255          string value.  */
 256       if (cp->next == 0)
 257         {
 258         compile_it:
 259           compile_pattern_1 (cp, pattern, translate, regp, posix);
 260           break;
 261         }
 262     }
 263
 264   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 265      either because we found it in the cache or because we just compiled it.
 266      Move it to the front of the queue to mark it as most recently used.  */
 267   *cpp = cp->next;
 268   cp->next = searchbuf_head;
 269   searchbuf_head = cp;
 270
 271   /* Advise the searching functions about the space we have allocated
 272      for register data.  */
 273   if (regp)
 274     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 275
 276   /* The compiled pattern can be used both for mulitbyte and unibyte
 277      target.  But, we have to tell which the pattern is used for. */
 278   cp->buf.target_multibyte = multibyte;
 279
 280   return &cp->buf;
 281 }
 282
 283 \f
 284 static Lisp_Object
 285 looking_at_1 (string, posix)
 286      Lisp_Object string;
 287      int posix;
 288 {
 289   Lisp_Object val;
 290   unsigned char *p1, *p2;
 291   int s1, s2;
 292   register int i;
 293   struct re_pattern_buffer *bufp;
 294
 295   if (running_asynch_code)
 296     save_search_regs ();
 297
 298   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 299   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 300     = current_buffer->case_eqv_table;
 301
 302   CHECK_STRING (string);
 303   bufp = compile_pattern (string,
 304                           (NILP (Vinhibit_changing_match_data)
 305                            ? &search_regs : NULL),
 306                           (!NILP (current_buffer->case_fold_search)
 307                            ? current_buffer->case_canon_table : Qnil),
 308                           posix,
 309                           !NILP (current_buffer->enable_multibyte_characters));
 310
 311   immediate_quit = 1;
 312   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 313
 314   /* Get pointers and sizes of the two strings
 315      that make up the visible portion of the buffer. */
 316
 317   p1 = BEGV_ADDR;
 318   s1 = GPT_BYTE - BEGV_BYTE;
 319   p2 = GAP_END_ADDR;
 320   s2 = ZV_BYTE - GPT_BYTE;
 321   if (s1 < 0)
 322     {
 323       p2 = p1;
 324       s2 = ZV_BYTE - BEGV_BYTE;
 325       s1 = 0;
 326     }
 327   if (s2 < 0)
 328     {
 329       s1 = ZV_BYTE - BEGV_BYTE;
 330       s2 = 0;
 331     }
 332
 333   re_match_object = Qnil;
 334
 335   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 336                   PT_BYTE - BEGV_BYTE,
 337                   (NILP (Vinhibit_changing_match_data)
 338                    ? &search_regs : NULL),
 339                   ZV_BYTE - BEGV_BYTE);
 340   immediate_quit = 0;
 341
 342   if (i == -2)
 343     matcher_overflow ();
 344
 345   val = (0 <= i ? Qt : Qnil);
 346   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 347     for (i = 0; i < search_regs.num_regs; i++)
 348       if (search_regs.start[i] >= 0)
 349         {
 350           search_regs.start[i]
 351             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 352           search_regs.end[i]
 353             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 354         }
 355
 356   /* Set last_thing_searched only when match data is changed.  */
 357   if (NILP (Vinhibit_changing_match_data))
 358     XSETBUFFER (last_thing_searched, current_buffer);
 359
 360   return val;
 361 }
 362
 363 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 364        doc: /* Return t if text after point matches regular expression REGEXP.
 365 This function modifies the match data that `match-beginning',
 366 `match-end' and `match-data' access; save and restore the match
 367 data if you want to preserve them.  */)
 368      (regexp)
 369      Lisp_Object regexp;
 370 {
 371   return looking_at_1 (regexp, 0);
 372 }
 373
 374 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 375        doc: /* Return t if text after point matches regular expression REGEXP.
 376 Find the longest match, in accord with Posix regular expression rules.
 377 This function modifies the match data that `match-beginning',
 378 `match-end' and `match-data' access; save and restore the match
 379 data if you want to preserve them.  */)
 380      (regexp)
 381      Lisp_Object regexp;
 382 {
 383   return looking_at_1 (regexp, 1);
 384 }
 385 \f
 386 static Lisp_Object
 387 string_match_1 (regexp, string, start, posix)
 388      Lisp_Object regexp, string, start;
 389      int posix;
 390 {
 391   int val;
 392   struct re_pattern_buffer *bufp;
 393   int pos, pos_byte;
 394   int i;
 395
 396   if (running_asynch_code)
 397     save_search_regs ();
 398
 399   CHECK_STRING (regexp);
 400   CHECK_STRING (string);
 401
 402   if (NILP (start))
 403     pos = 0, pos_byte = 0;
 404   else
 405     {
 406       int len = SCHARS (string);
 407
 408       CHECK_NUMBER (start);
 409       pos = XINT (start);
 410       if (pos < 0 && -pos <= len)
 411         pos = len + pos;
 412       else if (0 > pos || pos > len)
 413         args_out_of_range (string, start);
 414       pos_byte = string_char_to_byte (string, pos);
 415     }
 416
 417   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 418   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 419     = current_buffer->case_eqv_table;
 420
 421   bufp = compile_pattern (regexp,
 422                           (NILP (Vinhibit_changing_match_data)
 423                            ? &search_regs : NULL),
 424                           (!NILP (current_buffer->case_fold_search)
 425                            ? current_buffer->case_canon_table : Qnil),
 426                           posix,
 427                           STRING_MULTIBYTE (string));
 428   immediate_quit = 1;
 429   re_match_object = string;
 430
 431   val = re_search (bufp, (char *) SDATA (string),
 432                    SBYTES (string), pos_byte,
 433                    SBYTES (string) - pos_byte,
 434                    (NILP (Vinhibit_changing_match_data)
 435                     ? &search_regs : NULL));
 436   immediate_quit = 0;
 437
 438   /* Set last_thing_searched only when match data is changed.  */
 439   if (NILP (Vinhibit_changing_match_data))
 440     last_thing_searched = Qt;
 441
 442   if (val == -2)
 443     matcher_overflow ();
 444   if (val < 0) return Qnil;
 445
 446   if (NILP (Vinhibit_changing_match_data))
 447     for (i = 0; i < search_regs.num_regs; i++)
 448       if (search_regs.start[i] >= 0)
 449         {
 450           search_regs.start[i]
 451             = string_byte_to_char (string, search_regs.start[i]);
 452           search_regs.end[i]
 453             = string_byte_to_char (string, search_regs.end[i]);
 454         }
 455
 456   return make_number (string_byte_to_char (string, val));
 457 }
 458
 459 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 460        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 461 Matching ignores case if `case-fold-search' is non-nil.
 462 If third arg START is non-nil, start search at that index in STRING.
 463 For index of first char beyond the match, do (match-end 0).
 464 `match-end' and `match-beginning' also give indices of substrings
 465 matched by parenthesis constructs in the pattern.
 466
 467 You can use the function `match-string' to extract the substrings
 468 matched by the parenthesis constructions in REGEXP. */)
 469      (regexp, string, start)
 470      Lisp_Object regexp, string, start;
 471 {
 472   return string_match_1 (regexp, string, start, 0);
 473 }
 474
 475 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 476        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 477 Find the longest match, in accord with Posix regular expression rules.
 478 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 479 If third arg START is non-nil, start search at that index in STRING.
 480 For index of first char beyond the match, do (match-end 0).
 481 `match-end' and `match-beginning' also give indices of substrings
 482 matched by parenthesis constructs in the pattern.  */)
 483      (regexp, string, start)
 484      Lisp_Object regexp, string, start;
 485 {
 486   return string_match_1 (regexp, string, start, 1);
 487 }
 488
 489 /* Match REGEXP against STRING, searching all of STRING,
 490    and return the index of the match, or negative on failure.
 491    This does not clobber the match data.  */
 492
 493 int
 494 fast_string_match (regexp, string)
 495      Lisp_Object regexp, string;
 496 {
 497   int val;
 498   struct re_pattern_buffer *bufp;
 499
 500   bufp = compile_pattern (regexp, 0, Qnil,
 501                           0, STRING_MULTIBYTE (string));
 502   immediate_quit = 1;
 503   re_match_object = string;
 504
 505   val = re_search (bufp, (char *) SDATA (string),
 506                    SBYTES (string), 0,
 507                    SBYTES (string), 0);
 508   immediate_quit = 0;
 509   return val;
 510 }
 511
 512 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 513    and return the index of the match, or negative on failure.
 514    This does not clobber the match data.
 515    We assume that STRING contains single-byte characters.  */
 516
 517 extern Lisp_Object Vascii_downcase_table;
 518
 519 int
 520 fast_c_string_match_ignore_case (regexp, string)
 521      Lisp_Object regexp;
 522      const char *string;
 523 {
 524   int val;
 525   struct re_pattern_buffer *bufp;
 526   int len = strlen (string);
 527
 528   regexp = string_make_unibyte (regexp);
 529   re_match_object = Qt;
 530   bufp = compile_pattern (regexp, 0,
 531                           Vascii_canon_table, 0,
 532                           0);
 533   immediate_quit = 1;
 534   val = re_search (bufp, string, len, 0, len, 0);
 535   immediate_quit = 0;
 536   return val;
 537 }
 538
 539 /* Like fast_string_match but ignore case.  */
 540
 541 int
 542 fast_string_match_ignore_case (regexp, string)
 543      Lisp_Object regexp, string;
 544 {
 545   int val;
 546   struct re_pattern_buffer *bufp;
 547
 548   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 549                           0, STRING_MULTIBYTE (string));
 550   immediate_quit = 1;
 551   re_match_object = string;
 552
 553   val = re_search (bufp, (char *) SDATA (string),
 554                    SBYTES (string), 0,
 555                    SBYTES (string), 0);
 556   immediate_quit = 0;
 557   return val;
 558 }
 559 \f
 560 /* Match REGEXP atainst the characters after POS to LIMIT, and return
 561    the number of matched characters.  If STRING is non-nil, match
 562    against the characters in it.  In that case, POS and LIMIT are
 563    indices into the string.  This function doesn't modify the match
 564    data.  */
 565
 566 EMACS_INT
 567 fast_looking_at (regexp, pos, pos_byte, limit, limit_byte, string)
 568      Lisp_Object regexp;
 569      EMACS_INT pos, pos_byte, limit, limit_byte;
 570      Lisp_Object string;
 571 {
 572   int multibyte;
 573   struct re_pattern_buffer *buf;
 574   unsigned char *p1, *p2;
 575   int s1, s2;
 576   EMACS_INT len;
 577
 578   if (STRINGP (string))
 579     {
 580       if (pos_byte < 0)
 581         pos_byte = string_char_to_byte (string, pos);
 582       if (limit_byte < 0)
 583         limit_byte = string_char_to_byte (string, limit);
 584       p1 = NULL;
 585       s1 = 0;
 586       p2 = SDATA (string);
 587       s2 = SBYTES (string);
 588       re_match_object = string;
 589       multibyte = STRING_MULTIBYTE (string);
 590     }
 591   else
 592     {
 593       if (pos_byte < 0)
 594         pos_byte = CHAR_TO_BYTE (pos);
 595       if (limit_byte < 0)
 596         limit_byte = CHAR_TO_BYTE (limit);
 597       pos_byte -= BEGV_BYTE;
 598       limit_byte -= BEGV_BYTE;
 599       p1 = BEGV_ADDR;
 600       s1 = GPT_BYTE - BEGV_BYTE;
 601       p2 = GAP_END_ADDR;
 602       s2 = ZV_BYTE - GPT_BYTE;
 603       if (s1 < 0)
 604         {
 605           p2 = p1;
 606           s2 = ZV_BYTE - BEGV_BYTE;
 607           s1 = 0;
 608         }
 609       if (s2 < 0)
 610         {
 611           s1 = ZV_BYTE - BEGV_BYTE;
 612           s2 = 0;
 613         }
 614       re_match_object = Qnil;
 615       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 616     }
 617
 618   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 619   immediate_quit = 1;
 620   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 621                     pos_byte, NULL, limit_byte);
 622   immediate_quit = 0;
 623
 624   return len;
 625 }
 626
 627 \f
 628 /* The newline cache: remembering which sections of text have no newlines.  */
 629
 630 /* If the user has requested newline caching, make sure it's on.
 631    Otherwise, make sure it's off.
 632    This is our cheezy way of associating an action with the change of
 633    state of a buffer-local variable.  */
 634 static void
 635 newline_cache_on_off (buf)
 636      struct buffer *buf;
 637 {
 638   if (NILP (buf->cache_long_line_scans))
 639     {
 640       /* It should be off.  */
 641       if (buf->newline_cache)
 642         {
 643           free_region_cache (buf->newline_cache);
 644           buf->newline_cache = 0;
 645         }
 646     }
 647   else
 648     {
 649       /* It should be on.  */
 650       if (buf->newline_cache == 0)
 651         buf->newline_cache = new_region_cache ();
 652     }
 653 }
 654
 655 \f
 656 /* Search for COUNT instances of the character TARGET between START and END.
 657
 658    If COUNT is positive, search forwards; END must be >= START.
 659    If COUNT is negative, search backwards for the -COUNTth instance;
 660       END must be <= START.
 661    If COUNT is zero, do anything you please; run rogue, for all I care.
 662
 663    If END is zero, use BEGV or ZV instead, as appropriate for the
 664    direction indicated by COUNT.
 665
 666    If we find COUNT instances, set *SHORTAGE to zero, and return the
 667    position past the COUNTth match.  Note that for reverse motion
 668    this is not the same as the usual convention for Emacs motion commands.
 669
 670    If we don't find COUNT instances before reaching END, set *SHORTAGE
 671    to the number of TARGETs left unfound, and return END.
 672
 673    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 674    except when inside redisplay.  */
 675
 676 int
 677 scan_buffer (target, start, end, count, shortage, allow_quit)
 678      register int target;
 679      int start, end;
 680      int count;
 681      int *shortage;
 682      int allow_quit;
 683 {
 684   struct region_cache *newline_cache;
 685   int direction;
 686
 687   if (count > 0)
 688     {
 689       direction = 1;
 690       if (! end) end = ZV;
 691     }
 692   else
 693     {
 694       direction = -1;
 695       if (! end) end = BEGV;
 696     }
 697
 698   newline_cache_on_off (current_buffer);
 699   newline_cache = current_buffer->newline_cache;
 700
 701   if (shortage != 0)
 702     *shortage = 0;
 703
 704   immediate_quit = allow_quit;
 705
 706   if (count > 0)
 707     while (start != end)
 708       {
 709         /* Our innermost scanning loop is very simple; it doesn't know
 710            about gaps, buffer ends, or the newline cache.  ceiling is
 711            the position of the last character before the next such
 712            obstacle --- the last character the dumb search loop should
 713            examine.  */
 714         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 715         int start_byte = CHAR_TO_BYTE (start);
 716         int tem;
 717
 718         /* If we're looking for a newline, consult the newline cache
 719            to see where we can avoid some scanning.  */
 720         if (target == '\n' && newline_cache)
 721           {
 722             int next_change;
 723             immediate_quit = 0;
 724             while (region_cache_forward
 725                    (current_buffer, newline_cache, start_byte, &next_change))
 726               start_byte = next_change;
 727             immediate_quit = allow_quit;
 728
 729             /* START should never be after END.  */
 730             if (start_byte > ceiling_byte)
 731               start_byte = ceiling_byte;
 732
 733             /* Now the text after start is an unknown region, and
 734                next_change is the position of the next known region. */
 735             ceiling_byte = min (next_change - 1, ceiling_byte);
 736           }
 737
 738         /* The dumb loop can only scan text stored in contiguous
 739            bytes. BUFFER_CEILING_OF returns the last character
 740            position that is contiguous, so the ceiling is the
 741            position after that.  */
 742         tem = BUFFER_CEILING_OF (start_byte);
 743         ceiling_byte = min (tem, ceiling_byte);
 744
 745         {
 746           /* The termination address of the dumb loop.  */
 747           register unsigned char *ceiling_addr
 748             = BYTE_POS_ADDR (ceiling_byte) + 1;
 749           register unsigned char *cursor
 750             = BYTE_POS_ADDR (start_byte);
 751           unsigned char *base = cursor;
 752
 753           while (cursor < ceiling_addr)
 754             {
 755               unsigned char *scan_start = cursor;
 756
 757               /* The dumb loop.  */
 758               while (*cursor != target && ++cursor < ceiling_addr)
 759                 ;
 760
 761               /* If we're looking for newlines, cache the fact that
 762                  the region from start to cursor is free of them. */
 763               if (target == '\n' && newline_cache)
 764                 know_region_cache (current_buffer, newline_cache,
 765                                    start_byte + scan_start - base,
 766                                    start_byte + cursor - base);
 767
 768               /* Did we find the target character?  */
 769               if (cursor < ceiling_addr)
 770                 {
 771                   if (--count == 0)
 772                     {
 773                       immediate_quit = 0;
 774                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 775                     }
 776                   cursor++;
 777                 }
 778             }
 779
 780           start = BYTE_TO_CHAR (start_byte + cursor - base);
 781         }
 782       }
 783   else
 784     while (start > end)
 785       {
 786         /* The last character to check before the next obstacle.  */
 787         int ceiling_byte = CHAR_TO_BYTE (end);
 788         int start_byte = CHAR_TO_BYTE (start);
 789         int tem;
 790
 791         /* Consult the newline cache, if appropriate.  */
 792         if (target == '\n' && newline_cache)
 793           {
 794             int next_change;
 795             immediate_quit = 0;
 796             while (region_cache_backward
 797                    (current_buffer, newline_cache, start_byte, &next_change))
 798               start_byte = next_change;
 799             immediate_quit = allow_quit;
 800
 801             /* Start should never be at or before end.  */
 802             if (start_byte <= ceiling_byte)
 803               start_byte = ceiling_byte + 1;
 804
 805             /* Now the text before start is an unknown region, and
 806                next_change is the position of the next known region. */
 807             ceiling_byte = max (next_change, ceiling_byte);
 808           }
 809
 810         /* Stop scanning before the gap.  */
 811         tem = BUFFER_FLOOR_OF (start_byte - 1);
 812         ceiling_byte = max (tem, ceiling_byte);
 813
 814         {
 815           /* The termination address of the dumb loop.  */
 816           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 817           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 818           unsigned char *base = cursor;
 819
 820           while (cursor >= ceiling_addr)
 821             {
 822               unsigned char *scan_start = cursor;
 823
 824               while (*cursor != target && --cursor >= ceiling_addr)
 825                 ;
 826
 827               /* If we're looking for newlines, cache the fact that
 828                  the region from after the cursor to start is free of them.  */
 829               if (target == '\n' && newline_cache)
 830                 know_region_cache (current_buffer, newline_cache,
 831                                    start_byte + cursor - base,
 832                                    start_byte + scan_start - base);
 833
 834               /* Did we find the target character?  */
 835               if (cursor >= ceiling_addr)
 836                 {
 837                   if (++count >= 0)
 838                     {
 839                       immediate_quit = 0;
 840                       return BYTE_TO_CHAR (start_byte + cursor - base);
 841                     }
 842                   cursor--;
 843                 }
 844             }
 845
 846           start = BYTE_TO_CHAR (start_byte + cursor - base);
 847         }
 848       }
 849
 850   immediate_quit = 0;
 851   if (shortage != 0)
 852     *shortage = count * direction;
 853   return start;
 854 }
 855 \f
 856 /* Search for COUNT instances of a line boundary, which means either a
 857    newline or (if selective display enabled) a carriage return.
 858    Start at START.  If COUNT is negative, search backwards.
 859
 860    We report the resulting position by calling TEMP_SET_PT_BOTH.
 861
 862    If we find COUNT instances. we position after (always after,
 863    even if scanning backwards) the COUNTth match, and return 0.
 864
 865    If we don't find COUNT instances before reaching the end of the
 866    buffer (or the beginning, if scanning backwards), we return
 867    the number of line boundaries left unfound, and position at
 868    the limit we bumped up against.
 869
 870    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 871    except in special cases.  */
 872
 873 int
 874 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 875      int start, start_byte;
 876      int limit, limit_byte;
 877      register int count;
 878      int allow_quit;
 879 {
 880   int direction = ((count > 0) ? 1 : -1);
 881
 882   register unsigned char *cursor;
 883   unsigned char *base;
 884
 885   register int ceiling;
 886   register unsigned char *ceiling_addr;
 887
 888   int old_immediate_quit = immediate_quit;
 889
 890   /* The code that follows is like scan_buffer
 891      but checks for either newline or carriage return.  */
 892
 893   if (allow_quit)
 894     immediate_quit++;
 895
 896   start_byte = CHAR_TO_BYTE (start);
 897
 898   if (count > 0)
 899     {
 900       while (start_byte < limit_byte)
 901         {
 902           ceiling =  BUFFER_CEILING_OF (start_byte);
 903           ceiling = min (limit_byte - 1, ceiling);
 904           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 905           base = (cursor = BYTE_POS_ADDR (start_byte));
 906           while (1)
 907             {
 908               while (*cursor != '\n' && ++cursor != ceiling_addr)
 909                 ;
 910
 911               if (cursor != ceiling_addr)
 912                 {
 913                   if (--count == 0)
 914                     {
 915                       immediate_quit = old_immediate_quit;
 916                       start_byte = start_byte + cursor - base + 1;
 917                       start = BYTE_TO_CHAR (start_byte);
 918                       TEMP_SET_PT_BOTH (start, start_byte);
 919                       return 0;
 920                     }
 921                   else
 922                     if (++cursor == ceiling_addr)
 923                       break;
 924                 }
 925               else
 926                 break;
 927             }
 928           start_byte += cursor - base;
 929         }
 930     }
 931   else
 932     {
 933       while (start_byte > limit_byte)
 934         {
 935           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 936           ceiling = max (limit_byte, ceiling);
 937           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 938           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 939           while (1)
 940             {
 941               while (--cursor != ceiling_addr && *cursor != '\n')
 942                 ;
 943
 944               if (cursor != ceiling_addr)
 945                 {
 946                   if (++count == 0)
 947                     {
 948                       immediate_quit = old_immediate_quit;
 949                       /* Return the position AFTER the match we found.  */
 950                       start_byte = start_byte + cursor - base + 1;
 951                       start = BYTE_TO_CHAR (start_byte);
 952                       TEMP_SET_PT_BOTH (start, start_byte);
 953                       return 0;
 954                     }
 955                 }
 956               else
 957                 break;
 958             }
 959           /* Here we add 1 to compensate for the last decrement
 960              of CURSOR, which took it past the valid range.  */
 961           start_byte += cursor - base + 1;
 962         }
 963     }
 964
 965   TEMP_SET_PT_BOTH (limit, limit_byte);
 966   immediate_quit = old_immediate_quit;
 967
 968   return count * direction;
 969 }
 970
 971 int
 972 find_next_newline_no_quit (from, cnt)
 973      register int from, cnt;
 974 {
 975   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 976 }
 977
 978 /* Like find_next_newline, but returns position before the newline,
 979    not after, and only search up to TO.  This isn't just
 980    find_next_newline (...)-1, because you might hit TO.  */
 981
 982 int
 983 find_before_next_newline (from, to, cnt)
 984      int from, to, cnt;
 985 {
 986   int shortage;
 987   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 988
 989   if (shortage == 0)
 990     pos--;
 991
 992   return pos;
 993 }
 994 \f
 995 /* Subroutines of Lisp buffer search functions. */
 996
 997 static Lisp_Object
 998 search_command (string, bound, noerror, count, direction, RE, posix)
 999      Lisp_Object string, bound, noerror, count;
1000      int direction;
1001      int RE;
1002      int posix;
1003 {
1004   register int np;
1005   int lim, lim_byte;
1006   int n = direction;
1007
1008   if (!NILP (count))
1009     {
1010       CHECK_NUMBER (count);
1011       n *= XINT (count);
1012     }
1013
1014   CHECK_STRING (string);
1015   if (NILP (bound))
1016     {
1017       if (n > 0)
1018         lim = ZV, lim_byte = ZV_BYTE;
1019       else
1020         lim = BEGV, lim_byte = BEGV_BYTE;
1021     }
1022   else
1023     {
1024       CHECK_NUMBER_COERCE_MARKER (bound);
1025       lim = XINT (bound);
1026       if (n > 0 ? lim < PT : lim > PT)
1027         error ("Invalid search bound (wrong side of point)");
1028       if (lim > ZV)
1029         lim = ZV, lim_byte = ZV_BYTE;
1030       else if (lim < BEGV)
1031         lim = BEGV, lim_byte = BEGV_BYTE;
1032       else
1033         lim_byte = CHAR_TO_BYTE (lim);
1034     }
1035
1036   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1037   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1038     = current_buffer->case_eqv_table;
1039
1040   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1041                       (!NILP (current_buffer->case_fold_search)
1042                        ? current_buffer->case_canon_table
1043                        : Qnil),
1044                       (!NILP (current_buffer->case_fold_search)
1045                        ? current_buffer->case_eqv_table
1046                        : Qnil),
1047                       posix);
1048   if (np <= 0)
1049     {
1050       if (NILP (noerror))
1051         xsignal1 (Qsearch_failed, string);
1052
1053       if (!EQ (noerror, Qt))
1054         {
1055           if (lim < BEGV || lim > ZV)
1056             abort ();
1057           SET_PT_BOTH (lim, lim_byte);
1058           return Qnil;
1059 #if 0 /* This would be clean, but maybe programs depend on
1060          a value of nil here.  */
1061           np = lim;
1062 #endif
1063         }
1064       else
1065         return Qnil;
1066     }
1067
1068   if (np < BEGV || np > ZV)
1069     abort ();
1070
1071   SET_PT (np);
1072
1073   return make_number (np);
1074 }
1075 \f
1076 /* Return 1 if REGEXP it matches just one constant string.  */
1077
1078 static int
1079 trivial_regexp_p (regexp)
1080      Lisp_Object regexp;
1081 {
1082   int len = SBYTES (regexp);
1083   unsigned char *s = SDATA (regexp);
1084   while (--len >= 0)
1085     {
1086       switch (*s++)
1087         {
1088         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1089           return 0;
1090         case '\\':
1091           if (--len < 0)
1092             return 0;
1093           switch (*s++)
1094             {
1095             case '|': case '(': case ')': case '`': case '\'': case 'b':
1096             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1097             case 'S': case '=': case '{': case '}': case '_':
1098             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1099             case '1': case '2': case '3': case '4': case '5':
1100             case '6': case '7': case '8': case '9':
1101               return 0;
1102             }
1103         }
1104     }
1105   return 1;
1106 }
1107
1108 /* Search for the n'th occurrence of STRING in the current buffer,
1109    starting at position POS and stopping at position LIM,
1110    treating STRING as a literal string if RE is false or as
1111    a regular expression if RE is true.
1112
1113    If N is positive, searching is forward and LIM must be greater than POS.
1114    If N is negative, searching is backward and LIM must be less than POS.
1115
1116    Returns -x if x occurrences remain to be found (x > 0),
1117    or else the position at the beginning of the Nth occurrence
1118    (if searching backward) or the end (if searching forward).
1119
1120    POSIX is nonzero if we want full backtracking (POSIX style)
1121    for this pattern.  0 means backtrack only enough to get a valid match.  */
1122
1123 #define TRANSLATE(out, trt, d)                  \
1124 do                                              \
1125   {                                             \
1126     if (! NILP (trt))                           \
1127       {                                         \
1128         Lisp_Object temp;                       \
1129         temp = Faref (trt, make_number (d));    \
1130         if (INTEGERP (temp))                    \
1131           out = XINT (temp);                    \
1132         else                                    \
1133           out = d;                              \
1134       }                                         \
1135     else                                        \
1136       out = d;                                  \
1137   }                                             \
1138 while (0)
1139
1140 /* Only used in search_buffer, to record the end position of the match
1141    when searching regexps and SEARCH_REGS should not be changed
1142    (i.e. Vinhibit_changing_match_data is non-nil).  */
1143 static struct re_registers search_regs_1;
1144
1145 static int
1146 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1147                RE, trt, inverse_trt, posix)
1148      Lisp_Object string;
1149      int pos;
1150      int pos_byte;
1151      int lim;
1152      int lim_byte;
1153      int n;
1154      int RE;
1155      Lisp_Object trt;
1156      Lisp_Object inverse_trt;
1157      int posix;
1158 {
1159   int len = SCHARS (string);
1160   int len_byte = SBYTES (string);
1161   register int i;
1162
1163   if (running_asynch_code)
1164     save_search_regs ();
1165
1166   /* Searching 0 times means don't move.  */
1167   /* Null string is found at starting position.  */
1168   if (len == 0 || n == 0)
1169     {
1170       set_search_regs (pos_byte, 0);
1171       return pos;
1172     }
1173
1174   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1175     {
1176       unsigned char *p1, *p2;
1177       int s1, s2;
1178       struct re_pattern_buffer *bufp;
1179
1180       bufp = compile_pattern (string,
1181                               (NILP (Vinhibit_changing_match_data)
1182                                ? &search_regs : &search_regs_1),
1183                               trt, posix,
1184                               !NILP (current_buffer->enable_multibyte_characters));
1185
1186       immediate_quit = 1;       /* Quit immediately if user types ^G,
1187                                    because letting this function finish
1188                                    can take too long. */
1189       QUIT;                     /* Do a pending quit right away,
1190                                    to avoid paradoxical behavior */
1191       /* Get pointers and sizes of the two strings
1192          that make up the visible portion of the buffer. */
1193
1194       p1 = BEGV_ADDR;
1195       s1 = GPT_BYTE - BEGV_BYTE;
1196       p2 = GAP_END_ADDR;
1197       s2 = ZV_BYTE - GPT_BYTE;
1198       if (s1 < 0)
1199         {
1200           p2 = p1;
1201           s2 = ZV_BYTE - BEGV_BYTE;
1202           s1 = 0;
1203         }
1204       if (s2 < 0)
1205         {
1206           s1 = ZV_BYTE - BEGV_BYTE;
1207           s2 = 0;
1208         }
1209       re_match_object = Qnil;
1210
1211       while (n < 0)
1212         {
1213           int val;
1214           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1215                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1216                              (NILP (Vinhibit_changing_match_data)
1217                               ? &search_regs : &search_regs_1),
1218                              /* Don't allow match past current point */
1219                              pos_byte - BEGV_BYTE);
1220           if (val == -2)
1221             {
1222               matcher_overflow ();
1223             }
1224           if (val >= 0)
1225             {
1226               if (NILP (Vinhibit_changing_match_data))
1227                 {
1228                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1229                   for (i = 0; i < search_regs.num_regs; i++)
1230                     if (search_regs.start[i] >= 0)
1231                       {
1232                         search_regs.start[i]
1233                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1234                         search_regs.end[i]
1235                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1236                       }
1237                   XSETBUFFER (last_thing_searched, current_buffer);
1238                   /* Set pos to the new position. */
1239                   pos = search_regs.start[0];
1240                 }
1241               else
1242                 {
1243                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1244                   /* Set pos to the new position.  */
1245                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1246                 }
1247             }
1248           else
1249             {
1250               immediate_quit = 0;
1251               return (n);
1252             }
1253           n++;
1254         }
1255       while (n > 0)
1256         {
1257           int val;
1258           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1259                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1260                              (NILP (Vinhibit_changing_match_data)
1261                               ? &search_regs : &search_regs_1),
1262                              lim_byte - BEGV_BYTE);
1263           if (val == -2)
1264             {
1265               matcher_overflow ();
1266             }
1267           if (val >= 0)
1268             {
1269               if (NILP (Vinhibit_changing_match_data))
1270                 {
1271                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1272                   for (i = 0; i < search_regs.num_regs; i++)
1273                     if (search_regs.start[i] >= 0)
1274                       {
1275                         search_regs.start[i]
1276                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1277                         search_regs.end[i]
1278                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1279                       }
1280                   XSETBUFFER (last_thing_searched, current_buffer);
1281                   pos = search_regs.end[0];
1282                 }
1283               else
1284                 {
1285                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1286                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1287                 }
1288             }
1289           else
1290             {
1291               immediate_quit = 0;
1292               return (0 - n);
1293             }
1294           n--;
1295         }
1296       immediate_quit = 0;
1297       return (pos);
1298     }
1299   else                          /* non-RE case */
1300     {
1301       unsigned char *raw_pattern, *pat;
1302       int raw_pattern_size;
1303       int raw_pattern_size_byte;
1304       unsigned char *patbuf;
1305       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1306       unsigned char *base_pat;
1307       /* Set to positive if we find a non-ASCII char that need
1308          translation.  Otherwise set to zero later.  */
1309       int char_base = -1;
1310       int boyer_moore_ok = 1;
1311
1312       /* MULTIBYTE says whether the text to be searched is multibyte.
1313          We must convert PATTERN to match that, or we will not really
1314          find things right.  */
1315
1316       if (multibyte == STRING_MULTIBYTE (string))
1317         {
1318           raw_pattern = (unsigned char *) SDATA (string);
1319           raw_pattern_size = SCHARS (string);
1320           raw_pattern_size_byte = SBYTES (string);
1321         }
1322       else if (multibyte)
1323         {
1324           raw_pattern_size = SCHARS (string);
1325           raw_pattern_size_byte
1326             = count_size_as_multibyte (SDATA (string),
1327                                        raw_pattern_size);
1328           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1329           copy_text (SDATA (string), raw_pattern,
1330                      SCHARS (string), 0, 1);
1331         }
1332       else
1333         {
1334           /* Converting multibyte to single-byte.
1335
1336              ??? Perhaps this conversion should be done in a special way
1337              by subtracting nonascii-insert-offset from each non-ASCII char,
1338              so that only the multibyte chars which really correspond to
1339              the chosen single-byte character set can possibly match.  */
1340           raw_pattern_size = SCHARS (string);
1341           raw_pattern_size_byte = SCHARS (string);
1342           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1343           copy_text (SDATA (string), raw_pattern,
1344                      SBYTES (string), 1, 0);
1345         }
1346
1347       /* Copy and optionally translate the pattern.  */
1348       len = raw_pattern_size;
1349       len_byte = raw_pattern_size_byte;
1350       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1351       pat = patbuf;
1352       base_pat = raw_pattern;
1353       if (multibyte)
1354         {
1355           /* Fill patbuf by translated characters in STRING while
1356              checking if we can use boyer-moore search.  If TRT is
1357              non-nil, we can use boyer-moore search only if TRT can be
1358              represented by the byte array of 256 elements.  For that,
1359              all non-ASCII case-equivalents of all case-senstive
1360              characters in STRING must belong to the same charset and
1361              row.  */
1362
1363           while (--len >= 0)
1364             {
1365               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1366               int c, translated, inverse;
1367               int in_charlen, charlen;
1368
1369               /* If we got here and the RE flag is set, it's because we're
1370                  dealing with a regexp known to be trivial, so the backslash
1371                  just quotes the next character.  */
1372               if (RE && *base_pat == '\\')
1373                 {
1374                   len--;
1375                   raw_pattern_size--;
1376                   len_byte--;
1377                   base_pat++;
1378                 }
1379
1380               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1381
1382               if (NILP (trt))
1383                 {
1384                   str = base_pat;
1385                   charlen = in_charlen;
1386                 }
1387               else
1388                 {
1389                   /* Translate the character.  */
1390                   TRANSLATE (translated, trt, c);
1391                   charlen = CHAR_STRING (translated, str_base);
1392                   str = str_base;
1393
1394                   /* Check if C has any other case-equivalents.  */
1395                   TRANSLATE (inverse, inverse_trt, c);
1396                   /* If so, check if we can use boyer-moore.  */
1397                   if (c != inverse && boyer_moore_ok)
1398                     {
1399                       /* Check if all equivalents belong to the same
1400                          group of characters.  Note that the check of C
1401                          itself is done by the last iteration.  */
1402                       int this_char_base = -1;
1403
1404                       while (boyer_moore_ok)
1405                         {
1406                           if (ASCII_BYTE_P (inverse))
1407                             {
1408                               if (this_char_base > 0)
1409                                 boyer_moore_ok = 0;
1410                               else
1411                                 this_char_base = 0;
1412                             }
1413                           else if (CHAR_BYTE8_P (inverse))
1414                             /* Boyer-moore search can't handle a
1415                                translation of an eight-bit
1416                                character.  */
1417                             boyer_moore_ok = 0;
1418                           else if (this_char_base < 0)
1419                             {
1420                               this_char_base = inverse & ~0x3F;
1421                               if (char_base < 0)
1422                                 char_base = this_char_base;
1423                               else if (this_char_base != char_base)
1424                                 boyer_moore_ok = 0;
1425                             }
1426                           else if ((inverse & ~0x3F) != this_char_base)
1427                             boyer_moore_ok = 0;
1428                           if (c == inverse)
1429                             break;
1430                           TRANSLATE (inverse, inverse_trt, inverse);
1431                         }
1432                     }
1433                 }
1434
1435               /* Store this character into the translated pattern.  */
1436               bcopy (str, pat, charlen);
1437               pat += charlen;
1438               base_pat += in_charlen;
1439               len_byte -= in_charlen;
1440             }
1441
1442           /* If char_base is still negative we didn't find any translated
1443              non-ASCII characters.  */
1444           if (char_base < 0)
1445             char_base = 0;
1446         }
1447       else
1448         {
1449           /* Unibyte buffer.  */
1450           char_base = 0;
1451           while (--len >= 0)
1452             {
1453               int c, translated;
1454
1455               /* If we got here and the RE flag is set, it's because we're
1456                  dealing with a regexp known to be trivial, so the backslash
1457                  just quotes the next character.  */
1458               if (RE && *base_pat == '\\')
1459                 {
1460                   len--;
1461                   raw_pattern_size--;
1462                   base_pat++;
1463                 }
1464               c = *base_pat++;
1465               TRANSLATE (translated, trt, c);
1466               *pat++ = translated;
1467             }
1468         }
1469
1470       len_byte = pat - patbuf;
1471       len = raw_pattern_size;
1472       pat = base_pat = patbuf;
1473
1474       if (boyer_moore_ok)
1475         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1476                             pos, pos_byte, lim, lim_byte,
1477                             char_base);
1478       else
1479         return simple_search (n, pat, len, len_byte, trt,
1480                               pos, pos_byte, lim, lim_byte);
1481     }
1482 }
1483 \f
1484 /* Do a simple string search N times for the string PAT,
1485    whose length is LEN/LEN_BYTE,
1486    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1487    TRT is the translation table.
1488
1489    Return the character position where the match is found.
1490    Otherwise, if M matches remained to be found, return -M.
1491
1492    This kind of search works regardless of what is in PAT and
1493    regardless of what is in TRT.  It is used in cases where
1494    boyer_moore cannot work.  */
1495
1496 static int
1497 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1498      int n;
1499      unsigned char *pat;
1500      int len, len_byte;
1501      Lisp_Object trt;
1502      int pos, pos_byte;
1503      int lim, lim_byte;
1504 {
1505   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1506   int forward = n > 0;
1507   /* Number of buffer bytes matched.  Note that this may be different
1508      from len_byte in a multibyte buffer.  */
1509   int match_byte;
1510
1511   if (lim > pos && multibyte)
1512     while (n > 0)
1513       {
1514         while (1)
1515           {
1516             /* Try matching at position POS.  */
1517             int this_pos = pos;
1518             int this_pos_byte = pos_byte;
1519             int this_len = len;
1520             int this_len_byte = len_byte;
1521             unsigned char *p = pat;
1522             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1523               goto stop;
1524
1525             while (this_len > 0)
1526               {
1527                 int charlen, buf_charlen;
1528                 int pat_ch, buf_ch;
1529
1530                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1531                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1532                                                  ZV_BYTE - this_pos_byte,
1533                                                  buf_charlen);
1534                 TRANSLATE (buf_ch, trt, buf_ch);
1535
1536                 if (buf_ch != pat_ch)
1537                   break;
1538
1539                 this_len_byte -= charlen;
1540                 this_len--;
1541                 p += charlen;
1542
1543                 this_pos_byte += buf_charlen;
1544                 this_pos++;
1545               }
1546
1547             if (this_len == 0)
1548               {
1549                 match_byte = this_pos_byte - pos_byte;
1550                 pos += len;
1551                 pos_byte += match_byte;
1552                 break;
1553               }
1554
1555             INC_BOTH (pos, pos_byte);
1556           }
1557
1558         n--;
1559       }
1560   else if (lim > pos)
1561     while (n > 0)
1562       {
1563         while (1)
1564           {
1565             /* Try matching at position POS.  */
1566             int this_pos = pos;
1567             int this_len = len;
1568             unsigned char *p = pat;
1569
1570             if (pos + len > lim)
1571               goto stop;
1572
1573             while (this_len > 0)
1574               {
1575                 int pat_ch = *p++;
1576                 int buf_ch = FETCH_BYTE (this_pos);
1577                 TRANSLATE (buf_ch, trt, buf_ch);
1578
1579                 if (buf_ch != pat_ch)
1580                   break;
1581
1582                 this_len--;
1583                 this_pos++;
1584               }
1585
1586             if (this_len == 0)
1587               {
1588                 match_byte = len;
1589                 pos += len;
1590                 break;
1591               }
1592
1593             pos++;
1594           }
1595
1596         n--;
1597       }
1598   /* Backwards search.  */
1599   else if (lim < pos && multibyte)
1600     while (n < 0)
1601       {
1602         while (1)
1603           {
1604             /* Try matching at position POS.  */
1605             int this_pos = pos - len;
1606             int this_pos_byte;
1607             int this_len = len;
1608             int this_len_byte = len_byte;
1609             unsigned char *p = pat;
1610
1611             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1612               goto stop;
1613             this_pos_byte = CHAR_TO_BYTE (this_pos);
1614             match_byte = pos_byte - this_pos_byte;
1615
1616             while (this_len > 0)
1617               {
1618                 int charlen, buf_charlen;
1619                 int pat_ch, buf_ch;
1620
1621                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1622                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1623                                                  ZV_BYTE - this_pos_byte,
1624                                                  buf_charlen);
1625                 TRANSLATE (buf_ch, trt, buf_ch);
1626
1627                 if (buf_ch != pat_ch)
1628                   break;
1629
1630                 this_len_byte -= charlen;
1631                 this_len--;
1632                 p += charlen;
1633                 this_pos_byte += buf_charlen;
1634                 this_pos++;
1635               }
1636
1637             if (this_len == 0)
1638               {
1639                 pos -= len;
1640                 pos_byte -= match_byte;
1641                 break;
1642               }
1643
1644             DEC_BOTH (pos, pos_byte);
1645           }
1646
1647         n++;
1648       }
1649   else if (lim < pos)
1650     while (n < 0)
1651       {
1652         while (1)
1653           {
1654             /* Try matching at position POS.  */
1655             int this_pos = pos - len;
1656             int this_len = len;
1657             unsigned char *p = pat;
1658
1659             if (this_pos < lim)
1660               goto stop;
1661
1662             while (this_len > 0)
1663               {
1664                 int pat_ch = *p++;
1665                 int buf_ch = FETCH_BYTE (this_pos);
1666                 TRANSLATE (buf_ch, trt, buf_ch);
1667
1668                 if (buf_ch != pat_ch)
1669                   break;
1670                 this_len--;
1671                 this_pos++;
1672               }
1673
1674             if (this_len == 0)
1675               {
1676                 match_byte = len;
1677                 pos -= len;
1678                 break;
1679               }
1680
1681             pos--;
1682           }
1683
1684         n++;
1685       }
1686
1687  stop:
1688   if (n == 0)
1689     {
1690       if (forward)
1691         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1692       else
1693         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1694
1695       return pos;
1696     }
1697   else if (n > 0)
1698     return -n;
1699   else
1700     return n;
1701 }
1702 \f
1703 /* Do Boyer-Moore search N times for the string BASE_PAT,
1704    whose length is LEN/LEN_BYTE,
1705    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1706    DIRECTION says which direction we search in.
1707    TRT and INVERSE_TRT are translation tables.
1708    Characters in PAT are already translated by TRT.
1709
1710    This kind of search works if all the characters in BASE_PAT that
1711    have nontrivial translation are the same aside from the last byte.
1712    This makes it possible to translate just the last byte of a
1713    character, and do so after just a simple test of the context.
1714    CHAR_BASE is nonzero if there is such a non-ASCII character.
1715
1716    If that criterion is not satisfied, do not call this function.  */
1717
1718 static int
1719 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1720              pos, pos_byte, lim, lim_byte, char_base)
1721      int n;
1722      unsigned char *base_pat;
1723      int len, len_byte;
1724      Lisp_Object trt;
1725      Lisp_Object inverse_trt;
1726      int pos, pos_byte;
1727      int lim, lim_byte;
1728      int char_base;
1729 {
1730   int direction = ((n > 0) ? 1 : -1);
1731   register int dirlen;
1732   int infinity, limit, stride_for_teases = 0;
1733   register int *BM_tab;
1734   int *BM_tab_base;
1735   register unsigned char *cursor, *p_limit;
1736   register int i, j;
1737   unsigned char *pat, *pat_end;
1738   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1739
1740   unsigned char simple_translate[0400];
1741   /* These are set to the preceding bytes of a byte to be translated
1742      if char_base is nonzero.  As the maximum byte length of a
1743      multibyte character is 5, we have to check at most four previous
1744      bytes.  */
1745   int translate_prev_byte1 = 0;
1746   int translate_prev_byte2 = 0;
1747   int translate_prev_byte3 = 0;
1748   int translate_prev_byte4 = 0;
1749
1750   BM_tab = (int *) alloca (0400 * sizeof (int));
1751
1752   /* The general approach is that we are going to maintain that we know */
1753   /* the first (closest to the present position, in whatever direction */
1754   /* we're searching) character that could possibly be the last */
1755   /* (furthest from present position) character of a valid match.  We */
1756   /* advance the state of our knowledge by looking at that character */
1757   /* and seeing whether it indeed matches the last character of the */
1758   /* pattern.  If it does, we take a closer look.  If it does not, we */
1759   /* move our pointer (to putative last characters) as far as is */
1760   /* logically possible.  This amount of movement, which I call a */
1761   /* stride, will be the length of the pattern if the actual character */
1762   /* appears nowhere in the pattern, otherwise it will be the distance */
1763   /* from the last occurrence of that character to the end of the */
1764   /* pattern. */
1765   /* As a coding trick, an enormous stride is coded into the table for */
1766   /* characters that match the last character.  This allows use of only */
1767   /* a single test, a test for having gone past the end of the */
1768   /* permissible match region, to test for both possible matches (when */
1769   /* the stride goes past the end immediately) and failure to */
1770   /* match (where you get nudged past the end one stride at a time). */
1771
1772   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1773   /* is determined only by the last character of the putative match. */
1774   /* If that character does not match, we will stride the proper */
1775   /* distance to propose a match that superimposes it on the last */
1776   /* instance of a character that matches it (per trt), or misses */
1777   /* it entirely if there is none. */
1778
1779   dirlen = len_byte * direction;
1780   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1781
1782   /* Record position after the end of the pattern.  */
1783   pat_end = base_pat + len_byte;
1784   /* BASE_PAT points to a character that we start scanning from.
1785      It is the first character in a forward search,
1786      the last character in a backward search.  */
1787   if (direction < 0)
1788     base_pat = pat_end - 1;
1789
1790   BM_tab_base = BM_tab;
1791   BM_tab += 0400;
1792   j = dirlen;           /* to get it in a register */
1793   /* A character that does not appear in the pattern induces a */
1794   /* stride equal to the pattern length. */
1795   while (BM_tab_base != BM_tab)
1796     {
1797       *--BM_tab = j;
1798       *--BM_tab = j;
1799       *--BM_tab = j;
1800       *--BM_tab = j;
1801     }
1802
1803   /* We use this for translation, instead of TRT itself.
1804      We fill this in to handle the characters that actually
1805      occur in the pattern.  Others don't matter anyway!  */
1806   bzero (simple_translate, sizeof simple_translate);
1807   for (i = 0; i < 0400; i++)
1808     simple_translate[i] = i;
1809
1810   if (char_base)
1811     {
1812       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1813          byte following them are the target of translation.  */
1814       unsigned char str[MAX_MULTIBYTE_LENGTH];
1815       int len = CHAR_STRING (char_base, str);
1816
1817       translate_prev_byte1 = str[len - 2];
1818       if (len > 2)
1819         {
1820           translate_prev_byte2 = str[len - 3];
1821           if (len > 3)
1822             {
1823               translate_prev_byte3 = str[len - 4];
1824               if (len > 4)
1825                 translate_prev_byte4 = str[len - 5];
1826             }
1827         }
1828     }
1829
1830   i = 0;
1831   while (i != infinity)
1832     {
1833       unsigned char *ptr = base_pat + i;
1834       i += direction;
1835       if (i == dirlen)
1836         i = infinity;
1837       if (! NILP (trt))
1838         {
1839           /* If the byte currently looking at is the last of a
1840              character to check case-equivalents, set CH to that
1841              character.  An ASCII character and a non-ASCII character
1842              matching with CHAR_BASE are to be checked.  */
1843           int ch = -1;
1844
1845           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1846             ch = *ptr;
1847           else if (char_base
1848                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1849             {
1850               unsigned char *charstart = ptr - 1;
1851
1852               while (! (CHAR_HEAD_P (*charstart)))
1853                 charstart--;
1854               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1855               if (char_base != (ch & ~0x3F))
1856                 ch = -1;
1857             }
1858
1859           if (ch >= 0200)
1860             j = (ch & 0x3F) | 0200;
1861           else
1862             j = *ptr;
1863
1864           if (i == infinity)
1865             stride_for_teases = BM_tab[j];
1866
1867           BM_tab[j] = dirlen - i;
1868           /* A translation table is accompanied by its inverse -- see */
1869           /* comment following downcase_table for details */
1870           if (ch >= 0)
1871             {
1872               int starting_ch = ch;
1873               int starting_j = j;
1874
1875               while (1)
1876                 {
1877                   TRANSLATE (ch, inverse_trt, ch);
1878                   if (ch >= 0200)
1879                     j = (ch & 0x3F) | 0200;
1880                   else
1881                     j = ch;
1882
1883                   /* For all the characters that map into CH,
1884                      set up simple_translate to map the last byte
1885                      into STARTING_J.  */
1886                   simple_translate[j] = starting_j;
1887                   if (ch == starting_ch)
1888                     break;
1889                   BM_tab[j] = dirlen - i;
1890                 }
1891             }
1892         }
1893       else
1894         {
1895           j = *ptr;
1896
1897           if (i == infinity)
1898             stride_for_teases = BM_tab[j];
1899           BM_tab[j] = dirlen - i;
1900         }
1901       /* stride_for_teases tells how much to stride if we get a */
1902       /* match on the far character but are subsequently */
1903       /* disappointed, by recording what the stride would have been */
1904       /* for that character if the last character had been */
1905       /* different. */
1906     }
1907   infinity = dirlen - infinity;
1908   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1909   /* loop invariant - POS_BYTE points at where last char (first
1910      char if reverse) of pattern would align in a possible match.  */
1911   while (n != 0)
1912     {
1913       int tail_end;
1914       unsigned char *tail_end_ptr;
1915
1916       /* It's been reported that some (broken) compiler thinks that
1917          Boolean expressions in an arithmetic context are unsigned.
1918          Using an explicit ?1:0 prevents this.  */
1919       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1920           < 0)
1921         return (n * (0 - direction));
1922       /* First we do the part we can by pointers (maybe nothing) */
1923       QUIT;
1924       pat = base_pat;
1925       limit = pos_byte - dirlen + direction;
1926       if (direction > 0)
1927         {
1928           limit = BUFFER_CEILING_OF (limit);
1929           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1930              can take on without hitting edge of buffer or the gap.  */
1931           limit = min (limit, pos_byte + 20000);
1932           limit = min (limit, lim_byte - 1);
1933         }
1934       else
1935         {
1936           limit = BUFFER_FLOOR_OF (limit);
1937           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1938              can take on without hitting edge of buffer or the gap.  */
1939           limit = max (limit, pos_byte - 20000);
1940           limit = max (limit, lim_byte);
1941         }
1942       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1943       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1944
1945       if ((limit - pos_byte) * direction > 20)
1946         {
1947           unsigned char *p2;
1948
1949           p_limit = BYTE_POS_ADDR (limit);
1950           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1951           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1952           while (1)             /* use one cursor setting as long as i can */
1953             {
1954               if (direction > 0) /* worth duplicating */
1955                 {
1956                   /* Use signed comparison if appropriate
1957                      to make cursor+infinity sure to be > p_limit.
1958                      Assuming that the buffer lies in a range of addresses
1959                      that are all "positive" (as ints) or all "negative",
1960                      either kind of comparison will work as long
1961                      as we don't step by infinity.  So pick the kind
1962                      that works when we do step by infinity.  */
1963                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1964                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1965                       cursor += BM_tab[*cursor];
1966                   else
1967                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1968                       cursor += BM_tab[*cursor];
1969                 }
1970               else
1971                 {
1972                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1973                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1974                       cursor += BM_tab[*cursor];
1975                   else
1976                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1977                       cursor += BM_tab[*cursor];
1978                 }
1979 /* If you are here, cursor is beyond the end of the searched region. */
1980 /* This can happen if you match on the far character of the pattern, */
1981 /* because the "stride" of that character is infinity, a number able */
1982 /* to throw you well beyond the end of the search.  It can also */
1983 /* happen if you fail to match within the permitted region and would */
1984 /* otherwise try a character beyond that region */
1985               if ((cursor - p_limit) * direction <= len_byte)
1986                 break;  /* a small overrun is genuine */
1987               cursor -= infinity; /* large overrun = hit */
1988               i = dirlen - direction;
1989               if (! NILP (trt))
1990                 {
1991                   while ((i -= direction) + direction != 0)
1992                     {
1993                       int ch;
1994                       cursor -= direction;
1995                       /* Translate only the last byte of a character.  */
1996                       if (! multibyte
1997                           || ((cursor == tail_end_ptr
1998                                || CHAR_HEAD_P (cursor[1]))
1999                               && (CHAR_HEAD_P (cursor[0])
2000                                   /* Check if this is the last byte of
2001                                      a translable character.  */
2002                                   || (translate_prev_byte1 == cursor[-1]
2003                                       && (CHAR_HEAD_P (translate_prev_byte1)
2004                                           || (translate_prev_byte2 == cursor[-2]
2005                                               && (CHAR_HEAD_P (translate_prev_byte2)
2006                                                   || (translate_prev_byte3 == cursor[-3]))))))))
2007                         ch = simple_translate[*cursor];
2008                       else
2009                         ch = *cursor;
2010                       if (pat[i] != ch)
2011                         break;
2012                     }
2013                 }
2014               else
2015                 {
2016                   while ((i -= direction) + direction != 0)
2017                     {
2018                       cursor -= direction;
2019                       if (pat[i] != *cursor)
2020                         break;
2021                     }
2022                 }
2023               cursor += dirlen - i - direction; /* fix cursor */
2024               if (i + direction == 0)
2025                 {
2026                   int position, start, end;
2027
2028                   cursor -= direction;
2029
2030                   position = pos_byte + cursor - p2 + ((direction > 0)
2031                                                        ? 1 - len_byte : 0);
2032                   set_search_regs (position, len_byte);
2033
2034                   if (NILP (Vinhibit_changing_match_data))
2035                     {
2036                       start = search_regs.start[0];
2037                       end = search_regs.end[0];
2038                     }
2039                   else
2040                     /* If Vinhibit_changing_match_data is non-nil,
2041                        search_regs will not be changed.  So let's
2042                        compute start and end here.  */
2043                     {
2044                       start = BYTE_TO_CHAR (position);
2045                       end = BYTE_TO_CHAR (position + len_byte);
2046                     }
2047
2048                   if ((n -= direction) != 0)
2049                     cursor += dirlen; /* to resume search */
2050                   else
2051                     return direction > 0 ? end : start;
2052                 }
2053               else
2054                 cursor += stride_for_teases; /* <sigh> we lose -  */
2055             }
2056           pos_byte += cursor - p2;
2057         }
2058       else
2059         /* Now we'll pick up a clump that has to be done the hard */
2060         /* way because it covers a discontinuity */
2061         {
2062           limit = ((direction > 0)
2063                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2064                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2065           limit = ((direction > 0)
2066                    ? min (limit + len_byte, lim_byte - 1)
2067                    : max (limit - len_byte, lim_byte));
2068           /* LIMIT is now the last value POS_BYTE can have
2069              and still be valid for a possible match.  */
2070           while (1)
2071             {
2072               /* This loop can be coded for space rather than */
2073               /* speed because it will usually run only once. */
2074               /* (the reach is at most len + 21, and typically */
2075               /* does not exceed len) */
2076               while ((limit - pos_byte) * direction >= 0)
2077                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2078               /* now run the same tests to distinguish going off the */
2079               /* end, a match or a phony match. */
2080               if ((pos_byte - limit) * direction <= len_byte)
2081                 break;  /* ran off the end */
2082               /* Found what might be a match.
2083                  Set POS_BYTE back to last (first if reverse) pos.  */
2084               pos_byte -= infinity;
2085               i = dirlen - direction;
2086               while ((i -= direction) + direction != 0)
2087                 {
2088                   int ch;
2089                   unsigned char *ptr;
2090                   pos_byte -= direction;
2091                   ptr = BYTE_POS_ADDR (pos_byte);
2092                   /* Translate only the last byte of a character.  */
2093                   if (! multibyte
2094                       || ((ptr == tail_end_ptr
2095                            || CHAR_HEAD_P (ptr[1]))
2096                           && (CHAR_HEAD_P (ptr[0])
2097                               /* Check if this is the last byte of a
2098                                  translable character.  */
2099                               || (translate_prev_byte1 == ptr[-1]
2100                                   && (CHAR_HEAD_P (translate_prev_byte1)
2101                                       || (translate_prev_byte2 == ptr[-2]
2102                                           && (CHAR_HEAD_P (translate_prev_byte2)
2103                                               || translate_prev_byte3 == ptr[-3])))))))
2104                     ch = simple_translate[*ptr];
2105                   else
2106                     ch = *ptr;
2107                   if (pat[i] != ch)
2108                     break;
2109                 }
2110               /* Above loop has moved POS_BYTE part or all the way
2111                  back to the first pos (last pos if reverse).
2112                  Set it once again at the last (first if reverse) char.  */
2113               pos_byte += dirlen - i- direction;
2114               if (i + direction == 0)
2115                 {
2116                   int position, start, end;
2117                   pos_byte -= direction;
2118
2119                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2120                   set_search_regs (position, len_byte);
2121
2122                   if (NILP (Vinhibit_changing_match_data))
2123                     {
2124                       start = search_regs.start[0];
2125                       end = search_regs.end[0];
2126                     }
2127                   else
2128                     /* If Vinhibit_changing_match_data is non-nil,
2129                        search_regs will not be changed.  So let's
2130                        compute start and end here.  */
2131                     {
2132                       start = BYTE_TO_CHAR (position);
2133                       end = BYTE_TO_CHAR (position + len_byte);
2134                     }
2135
2136                   if ((n -= direction) != 0)
2137                     pos_byte += dirlen; /* to resume search */
2138                   else
2139                     return direction > 0 ? end : start;
2140                 }
2141               else
2142                 pos_byte += stride_for_teases;
2143             }
2144           }
2145       /* We have done one clump.  Can we continue? */
2146       if ((lim_byte - pos_byte) * direction < 0)
2147         return ((0 - n) * direction);
2148     }
2149   return BYTE_TO_CHAR (pos_byte);
2150 }
2151
2152 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2153    for the overall match just found in the current buffer.
2154    Also clear out the match data for registers 1 and up.  */
2155
2156 static void
2157 set_search_regs (beg_byte, nbytes)
2158      int beg_byte, nbytes;
2159 {
2160   int i;
2161
2162   if (!NILP (Vinhibit_changing_match_data))
2163     return;
2164
2165   /* Make sure we have registers in which to store
2166      the match position.  */
2167   if (search_regs.num_regs == 0)
2168     {
2169       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2170       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2171       search_regs.num_regs = 2;
2172     }
2173
2174   /* Clear out the other registers.  */
2175   for (i = 1; i < search_regs.num_regs; i++)
2176     {
2177       search_regs.start[i] = -1;
2178       search_regs.end[i] = -1;
2179     }
2180
2181   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2182   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2183   XSETBUFFER (last_thing_searched, current_buffer);
2184 }
2185 \f
2186 /* Given STRING, a string of words separated by word delimiters,
2187    compute a regexp that matches those exact words separated by
2188    arbitrary punctuation.  If LAX is nonzero, the end of the string
2189    need not match a word boundary unless it ends in whitespace.  */
2190
2191 static Lisp_Object
2192 wordify (string, lax)
2193      Lisp_Object string;
2194      int lax;
2195 {
2196   register unsigned char *p, *o;
2197   register int i, i_byte, len, punct_count = 0, word_count = 0;
2198   Lisp_Object val;
2199   int prev_c = 0;
2200   int adjust, whitespace_at_end;
2201
2202   CHECK_STRING (string);
2203   p = SDATA (string);
2204   len = SCHARS (string);
2205
2206   for (i = 0, i_byte = 0; i < len; )
2207     {
2208       int c;
2209
2210       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2211
2212       if (SYNTAX (c) != Sword)
2213         {
2214           punct_count++;
2215           if (i > 0 && SYNTAX (prev_c) == Sword)
2216             word_count++;
2217         }
2218
2219       prev_c = c;
2220     }
2221
2222   if (SYNTAX (prev_c) == Sword)
2223     {
2224       word_count++;
2225       whitespace_at_end = 0;
2226     }
2227   else
2228     whitespace_at_end = 1;
2229
2230   if (!word_count)
2231     return empty_unibyte_string;
2232
2233   adjust = - punct_count + 5 * (word_count - 1)
2234     + ((lax && !whitespace_at_end) ? 2 : 4);
2235   if (STRING_MULTIBYTE (string))
2236     val = make_uninit_multibyte_string (len + adjust,
2237                                         SBYTES (string)
2238                                         + adjust);
2239   else
2240     val = make_uninit_string (len + adjust);
2241
2242   o = SDATA (val);
2243   *o++ = '\\';
2244   *o++ = 'b';
2245   prev_c = 0;
2246
2247   for (i = 0, i_byte = 0; i < len; )
2248     {
2249       int c;
2250       int i_byte_orig = i_byte;
2251
2252       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2253
2254       if (SYNTAX (c) == Sword)
2255         {
2256           bcopy (SDATA (string) + i_byte_orig, o,
2257                  i_byte - i_byte_orig);
2258           o += i_byte - i_byte_orig;
2259         }
2260       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2261         {
2262           *o++ = '\\';
2263           *o++ = 'W';
2264           *o++ = '\\';
2265           *o++ = 'W';
2266           *o++ = '*';
2267         }
2268
2269       prev_c = c;
2270     }
2271
2272   if (!lax || whitespace_at_end)
2273     {
2274       *o++ = '\\';
2275       *o++ = 'b';
2276     }
2277
2278   return val;
2279 }
2280 \f
2281 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2282        "MSearch backward: ",
2283        doc: /* Search backward from point for STRING.
2284 Set point to the beginning of the occurrence found, and return point.
2285 An optional second argument bounds the search; it is a buffer position.
2286 The match found must not extend before that position.
2287 Optional third argument, if t, means if fail just return nil (no error).
2288  If not nil and not t, position at limit of search and return nil.
2289 Optional fourth argument is repeat count--search for successive occurrences.
2290
2291 Search case-sensitivity is determined by the value of the variable
2292 `case-fold-search', which see.
2293
2294 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2295      (string, bound, noerror, count)
2296      Lisp_Object string, bound, noerror, count;
2297 {
2298   return search_command (string, bound, noerror, count, -1, 0, 0);
2299 }
2300
2301 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2302        doc: /* Search forward from point for STRING.
2303 Set point to the end of the occurrence found, and return point.
2304 An optional second argument bounds the search; it is a buffer position.
2305 The match found must not extend after that position.  A value of nil is
2306   equivalent to (point-max).
2307 Optional third argument, if t, means if fail just return nil (no error).
2308   If not nil and not t, move to limit of search and return nil.
2309 Optional fourth argument is repeat count--search for successive occurrences.
2310
2311 Search case-sensitivity is determined by the value of the variable
2312 `case-fold-search', which see.
2313
2314 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2315      (string, bound, noerror, count)
2316      Lisp_Object string, bound, noerror, count;
2317 {
2318   return search_command (string, bound, noerror, count, 1, 0, 0);
2319 }
2320
2321 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2322        "sWord search backward: ",
2323        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2324 Set point to the beginning of the occurrence found, and return point.
2325 An optional second argument bounds the search; it is a buffer position.
2326 The match found must not extend before that position.
2327 Optional third argument, if t, means if fail just return nil (no error).
2328   If not nil and not t, move to limit of search and return nil.
2329 Optional fourth argument is repeat count--search for successive occurrences.  */)
2330      (string, bound, noerror, count)
2331      Lisp_Object string, bound, noerror, count;
2332 {
2333   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2334 }
2335
2336 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2337        "sWord search: ",
2338        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2339 Set point to the end of the occurrence found, and return point.
2340 An optional second argument bounds the search; it is a buffer position.
2341 The match found must not extend after that position.
2342 Optional third argument, if t, means if fail just return nil (no error).
2343   If not nil and not t, move to limit of search and return nil.
2344 Optional fourth argument is repeat count--search for successive occurrences.  */)
2345      (string, bound, noerror, count)
2346      Lisp_Object string, bound, noerror, count;
2347 {
2348   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2349 }
2350
2351 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2352        "sWord search backward: ",
2353        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2354 Set point to the beginning of the occurrence found, and return point.
2355
2356 Unlike `word-search-backward', the end of STRING need not match a word
2357 boundary unless it ends in whitespace.
2358
2359 An optional second argument bounds the search; it is a buffer position.
2360 The match found must not extend before that position.
2361 Optional third argument, if t, means if fail just return nil (no error).
2362   If not nil and not t, move to limit of search and return nil.
2363 Optional fourth argument is repeat count--search for successive occurrences.  */)
2364      (string, bound, noerror, count)
2365      Lisp_Object string, bound, noerror, count;
2366 {
2367   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2368 }
2369
2370 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2371        "sWord search: ",
2372        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2373 Set point to the end of the occurrence found, and return point.
2374
2375 Unlike `word-search-forward', the end of STRING need not match a word
2376 boundary unless it ends in whitespace.
2377
2378 An optional second argument bounds the search; it is a buffer position.
2379 The match found must not extend after that position.
2380 Optional third argument, if t, means if fail just return nil (no error).
2381   If not nil and not t, move to limit of search and return nil.
2382 Optional fourth argument is repeat count--search for successive occurrences.  */)
2383      (string, bound, noerror, count)
2384      Lisp_Object string, bound, noerror, count;
2385 {
2386   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2387 }
2388
2389 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2390        "sRE search backward: ",
2391        doc: /* Search backward from point for match for regular expression REGEXP.
2392 Set point to the beginning of the match, and return point.
2393 The match found is the one starting last in the buffer
2394 and yet ending before the origin of the search.
2395 An optional second argument bounds the search; it is a buffer position.
2396 The match found must start at or after that position.
2397 Optional third argument, if t, means if fail just return nil (no error).
2398   If not nil and not t, move to limit of search and return nil.
2399 Optional fourth argument is repeat count--search for successive occurrences.
2400 See also the functions `match-beginning', `match-end', `match-string',
2401 and `replace-match'.  */)
2402      (regexp, bound, noerror, count)
2403      Lisp_Object regexp, bound, noerror, count;
2404 {
2405   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2406 }
2407
2408 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2409        "sRE search: ",
2410        doc: /* Search forward from point for regular expression REGEXP.
2411 Set point to the end of the occurrence found, and return point.
2412 An optional second argument bounds the search; it is a buffer position.
2413 The match found must not extend after that position.
2414 Optional third argument, if t, means if fail just return nil (no error).
2415   If not nil and not t, move to limit of search and return nil.
2416 Optional fourth argument is repeat count--search for successive occurrences.
2417 See also the functions `match-beginning', `match-end', `match-string',
2418 and `replace-match'.  */)
2419      (regexp, bound, noerror, count)
2420      Lisp_Object regexp, bound, noerror, count;
2421 {
2422   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2423 }
2424
2425 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2426        "sPosix search backward: ",
2427        doc: /* Search backward from point for match for regular expression REGEXP.
2428 Find the longest match in accord with Posix regular expression rules.
2429 Set point to the beginning of the match, and return point.
2430 The match found is the one starting last in the buffer
2431 and yet ending before the origin of the search.
2432 An optional second argument bounds the search; it is a buffer position.
2433 The match found must start at or after that position.
2434 Optional third argument, if t, means if fail just return nil (no error).
2435   If not nil and not t, move to limit of search and return nil.
2436 Optional fourth argument is repeat count--search for successive occurrences.
2437 See also the functions `match-beginning', `match-end', `match-string',
2438 and `replace-match'.  */)
2439      (regexp, bound, noerror, count)
2440      Lisp_Object regexp, bound, noerror, count;
2441 {
2442   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2443 }
2444
2445 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2446        "sPosix search: ",
2447        doc: /* Search forward from point for regular expression REGEXP.
2448 Find the longest match in accord with Posix regular expression rules.
2449 Set point to the end of the occurrence found, and return point.
2450 An optional second argument bounds the search; it is a buffer position.
2451 The match found must not extend after that position.
2452 Optional third argument, if t, means if fail just return nil (no error).
2453   If not nil and not t, move to limit of search and return nil.
2454 Optional fourth argument is repeat count--search for successive occurrences.
2455 See also the functions `match-beginning', `match-end', `match-string',
2456 and `replace-match'.  */)
2457      (regexp, bound, noerror, count)
2458      Lisp_Object regexp, bound, noerror, count;
2459 {
2460   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2461 }
2462 \f
2463 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2464        doc: /* Replace text matched by last search with NEWTEXT.
2465 Leave point at the end of the replacement text.
2466
2467 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2468 Otherwise maybe capitalize the whole text, or maybe just word initials,
2469 based on the replaced text.
2470 If the replaced text has only capital letters
2471 and has at least one multiletter word, convert NEWTEXT to all caps.
2472 Otherwise if all words are capitalized in the replaced text,
2473 capitalize each word in NEWTEXT.
2474
2475 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2476 Otherwise treat `\\' as special:
2477   `\\&' in NEWTEXT means substitute original matched text.
2478   `\\N' means substitute what matched the Nth `\\(...\\)'.
2479        If Nth parens didn't match, substitute nothing.
2480   `\\\\' means insert one `\\'.
2481 Case conversion does not apply to these substitutions.
2482
2483 FIXEDCASE and LITERAL are optional arguments.
2484
2485 The optional fourth argument STRING can be a string to modify.
2486 This is meaningful when the previous match was done against STRING,
2487 using `string-match'.  When used this way, `replace-match'
2488 creates and returns a new string made by copying STRING and replacing
2489 the part of STRING that was matched.
2490
2491 The optional fifth argument SUBEXP specifies a subexpression;
2492 it says to replace just that subexpression with NEWTEXT,
2493 rather than replacing the entire matched text.
2494 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2495 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2496 NEWTEXT in place of subexp N.
2497 This is useful only after a regular expression search or match,
2498 since only regular expressions have distinguished subexpressions.  */)
2499      (newtext, fixedcase, literal, string, subexp)
2500      Lisp_Object newtext, fixedcase, literal, string, subexp;
2501 {
2502   enum { nochange, all_caps, cap_initial } case_action;
2503   register int pos, pos_byte;
2504   int some_multiletter_word;
2505   int some_lowercase;
2506   int some_uppercase;
2507   int some_nonuppercase_initial;
2508   register int c, prevc;
2509   int sub;
2510   int opoint, newpoint;
2511
2512   CHECK_STRING (newtext);
2513
2514   if (! NILP (string))
2515     CHECK_STRING (string);
2516
2517   case_action = nochange;       /* We tried an initialization */
2518                                 /* but some C compilers blew it */
2519
2520   if (search_regs.num_regs <= 0)
2521     error ("`replace-match' called before any match found");
2522
2523   if (NILP (subexp))
2524     sub = 0;
2525   else
2526     {
2527       CHECK_NUMBER (subexp);
2528       sub = XINT (subexp);
2529       if (sub < 0 || sub >= search_regs.num_regs)
2530         args_out_of_range (subexp, make_number (search_regs.num_regs));
2531     }
2532
2533   if (NILP (string))
2534     {
2535       if (search_regs.start[sub] < BEGV
2536           || search_regs.start[sub] > search_regs.end[sub]
2537           || search_regs.end[sub] > ZV)
2538         args_out_of_range (make_number (search_regs.start[sub]),
2539                            make_number (search_regs.end[sub]));
2540     }
2541   else
2542     {
2543       if (search_regs.start[sub] < 0
2544           || search_regs.start[sub] > search_regs.end[sub]
2545           || search_regs.end[sub] > SCHARS (string))
2546         args_out_of_range (make_number (search_regs.start[sub]),
2547                            make_number (search_regs.end[sub]));
2548     }
2549
2550   if (NILP (fixedcase))
2551     {
2552       /* Decide how to casify by examining the matched text. */
2553       int last;
2554
2555       pos = search_regs.start[sub];
2556       last = search_regs.end[sub];
2557
2558       if (NILP (string))
2559         pos_byte = CHAR_TO_BYTE (pos);
2560       else
2561         pos_byte = string_char_to_byte (string, pos);
2562
2563       prevc = '\n';
2564       case_action = all_caps;
2565
2566       /* some_multiletter_word is set nonzero if any original word
2567          is more than one letter long. */
2568       some_multiletter_word = 0;
2569       some_lowercase = 0;
2570       some_nonuppercase_initial = 0;
2571       some_uppercase = 0;
2572
2573       while (pos < last)
2574         {
2575           if (NILP (string))
2576             {
2577               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2578               INC_BOTH (pos, pos_byte);
2579             }
2580           else
2581             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2582
2583           if (LOWERCASEP (c))
2584             {
2585               /* Cannot be all caps if any original char is lower case */
2586
2587               some_lowercase = 1;
2588               if (SYNTAX (prevc) != Sword)
2589                 some_nonuppercase_initial = 1;
2590               else
2591                 some_multiletter_word = 1;
2592             }
2593           else if (UPPERCASEP (c))
2594             {
2595               some_uppercase = 1;
2596               if (SYNTAX (prevc) != Sword)
2597                 ;
2598               else
2599                 some_multiletter_word = 1;
2600             }
2601           else
2602             {
2603               /* If the initial is a caseless word constituent,
2604                  treat that like a lowercase initial.  */
2605               if (SYNTAX (prevc) != Sword)
2606                 some_nonuppercase_initial = 1;
2607             }
2608
2609           prevc = c;
2610         }
2611
2612       /* Convert to all caps if the old text is all caps
2613          and has at least one multiletter word.  */
2614       if (! some_lowercase && some_multiletter_word)
2615         case_action = all_caps;
2616       /* Capitalize each word, if the old text has all capitalized words.  */
2617       else if (!some_nonuppercase_initial && some_multiletter_word)
2618         case_action = cap_initial;
2619       else if (!some_nonuppercase_initial && some_uppercase)
2620         /* Should x -> yz, operating on X, give Yz or YZ?
2621            We'll assume the latter.  */
2622         case_action = all_caps;
2623       else
2624         case_action = nochange;
2625     }
2626
2627   /* Do replacement in a string.  */
2628   if (!NILP (string))
2629     {
2630       Lisp_Object before, after;
2631
2632       before = Fsubstring (string, make_number (0),
2633                            make_number (search_regs.start[sub]));
2634       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2635
2636       /* Substitute parts of the match into NEWTEXT
2637          if desired.  */
2638       if (NILP (literal))
2639         {
2640           int lastpos = 0;
2641           int lastpos_byte = 0;
2642           /* We build up the substituted string in ACCUM.  */
2643           Lisp_Object accum;
2644           Lisp_Object middle;
2645           int length = SBYTES (newtext);
2646
2647           accum = Qnil;
2648
2649           for (pos_byte = 0, pos = 0; pos_byte < length;)
2650             {
2651               int substart = -1;
2652               int subend = 0;
2653               int delbackslash = 0;
2654
2655               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2656
2657               if (c == '\\')
2658                 {
2659                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2660
2661                   if (c == '&')
2662                     {
2663                       substart = search_regs.start[sub];
2664                       subend = search_regs.end[sub];
2665                     }
2666                   else if (c >= '1' && c <= '9')
2667                     {
2668                       if (search_regs.start[c - '0'] >= 0
2669                           && c <= search_regs.num_regs + '0')
2670                         {
2671                           substart = search_regs.start[c - '0'];
2672                           subend = search_regs.end[c - '0'];
2673                         }
2674                       else
2675                         {
2676                           /* If that subexp did not match,
2677                              replace \\N with nothing.  */
2678                           substart = 0;
2679                           subend = 0;
2680                         }
2681                     }
2682                   else if (c == '\\')
2683                     delbackslash = 1;
2684                   else
2685                     error ("Invalid use of `\\' in replacement text");
2686                 }
2687               if (substart >= 0)
2688                 {
2689                   if (pos - 2 != lastpos)
2690                     middle = substring_both (newtext, lastpos,
2691                                              lastpos_byte,
2692                                              pos - 2, pos_byte - 2);
2693                   else
2694                     middle = Qnil;
2695                   accum = concat3 (accum, middle,
2696                                    Fsubstring (string,
2697                                                make_number (substart),
2698                                                make_number (subend)));
2699                   lastpos = pos;
2700                   lastpos_byte = pos_byte;
2701                 }
2702               else if (delbackslash)
2703                 {
2704                   middle = substring_both (newtext, lastpos,
2705                                            lastpos_byte,
2706                                            pos - 1, pos_byte - 1);
2707
2708                   accum = concat2 (accum, middle);
2709                   lastpos = pos;
2710                   lastpos_byte = pos_byte;
2711                 }
2712             }
2713
2714           if (pos != lastpos)
2715             middle = substring_both (newtext, lastpos,
2716                                      lastpos_byte,
2717                                      pos, pos_byte);
2718           else
2719             middle = Qnil;
2720
2721           newtext = concat2 (accum, middle);
2722         }
2723
2724       /* Do case substitution in NEWTEXT if desired.  */
2725       if (case_action == all_caps)
2726         newtext = Fupcase (newtext);
2727       else if (case_action == cap_initial)
2728         newtext = Fupcase_initials (newtext);
2729
2730       return concat3 (before, newtext, after);
2731     }
2732
2733   /* Record point, then move (quietly) to the start of the match.  */
2734   if (PT >= search_regs.end[sub])
2735     opoint = PT - ZV;
2736   else if (PT > search_regs.start[sub])
2737     opoint = search_regs.end[sub] - ZV;
2738   else
2739     opoint = PT;
2740
2741   /* If we want non-literal replacement,
2742      perform substitution on the replacement string.  */
2743   if (NILP (literal))
2744     {
2745       int length = SBYTES (newtext);
2746       unsigned char *substed;
2747       int substed_alloc_size, substed_len;
2748       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2749       int str_multibyte = STRING_MULTIBYTE (newtext);
2750       Lisp_Object rev_tbl;
2751       int really_changed = 0;
2752
2753       rev_tbl = Qnil;
2754
2755       substed_alloc_size = length * 2 + 100;
2756       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2757       substed_len = 0;
2758
2759       /* Go thru NEWTEXT, producing the actual text to insert in
2760          SUBSTED while adjusting multibyteness to that of the current
2761          buffer.  */
2762
2763       for (pos_byte = 0, pos = 0; pos_byte < length;)
2764         {
2765           unsigned char str[MAX_MULTIBYTE_LENGTH];
2766           unsigned char *add_stuff = NULL;
2767           int add_len = 0;
2768           int idx = -1;
2769
2770           if (str_multibyte)
2771             {
2772               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2773               if (!buf_multibyte)
2774                 c = multibyte_char_to_unibyte (c, rev_tbl);
2775             }
2776           else
2777             {
2778               /* Note that we don't have to increment POS.  */
2779               c = SREF (newtext, pos_byte++);
2780               if (buf_multibyte)
2781                 c = unibyte_char_to_multibyte (c);
2782             }
2783
2784           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2785              or set IDX to a match index, which means put that part
2786              of the buffer text into SUBSTED.  */
2787
2788           if (c == '\\')
2789             {
2790               really_changed = 1;
2791
2792               if (str_multibyte)
2793                 {
2794                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2795                                                       pos, pos_byte);
2796                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2797                     c = multibyte_char_to_unibyte (c, rev_tbl);
2798                 }
2799               else
2800                 {
2801                   c = SREF (newtext, pos_byte++);
2802                   if (buf_multibyte)
2803                     c = unibyte_char_to_multibyte (c);
2804                 }
2805
2806               if (c == '&')
2807                 idx = sub;
2808               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2809                 {
2810                   if (search_regs.start[c - '0'] >= 1)
2811                     idx = c - '0';
2812                 }
2813               else if (c == '\\')
2814                 add_len = 1, add_stuff = "\\";
2815               else
2816                 {
2817                   xfree (substed);
2818                   error ("Invalid use of `\\' in replacement text");
2819                 }
2820             }
2821           else
2822             {
2823               add_len = CHAR_STRING (c, str);
2824               add_stuff = str;
2825             }
2826
2827           /* If we want to copy part of a previous match,
2828              set up ADD_STUFF and ADD_LEN to point to it.  */
2829           if (idx >= 0)
2830             {
2831               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2832               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2833               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2834                 move_gap (search_regs.start[idx]);
2835               add_stuff = BYTE_POS_ADDR (begbyte);
2836             }
2837
2838           /* Now the stuff we want to add to SUBSTED
2839              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2840
2841           /* Make sure SUBSTED is big enough.  */
2842           if (substed_len + add_len >= substed_alloc_size)
2843             {
2844               substed_alloc_size = substed_len + add_len + 500;
2845               substed = (unsigned char *) xrealloc (substed,
2846                                                     substed_alloc_size + 1);
2847             }
2848
2849           /* Now add to the end of SUBSTED.  */
2850           if (add_stuff)
2851             {
2852               bcopy (add_stuff, substed + substed_len, add_len);
2853               substed_len += add_len;
2854             }
2855         }
2856
2857       if (really_changed)
2858         {
2859           if (buf_multibyte)
2860             {
2861               int nchars = multibyte_chars_in_text (substed, substed_len);
2862
2863               newtext = make_multibyte_string (substed, nchars, substed_len);
2864             }
2865           else
2866             newtext = make_unibyte_string (substed, substed_len);
2867         }
2868       xfree (substed);
2869     }
2870
2871   /* Replace the old text with the new in the cleanest possible way.  */
2872   replace_range (search_regs.start[sub], search_regs.end[sub],
2873                  newtext, 1, 0, 1);
2874   newpoint = search_regs.start[sub] + SCHARS (newtext);
2875
2876   if (case_action == all_caps)
2877     Fupcase_region (make_number (search_regs.start[sub]),
2878                     make_number (newpoint));
2879   else if (case_action == cap_initial)
2880     Fupcase_initials_region (make_number (search_regs.start[sub]),
2881                              make_number (newpoint));
2882
2883   /* Adjust search data for this change.  */
2884   {
2885     int oldend = search_regs.end[sub];
2886     int oldstart = search_regs.start[sub];
2887     int change = newpoint - search_regs.end[sub];
2888     int i;
2889
2890     for (i = 0; i < search_regs.num_regs; i++)
2891       {
2892         if (search_regs.start[i] >= oldend)
2893           search_regs.start[i] += change;
2894         else if (search_regs.start[i] > oldstart)
2895           search_regs.start[i] = oldstart;
2896         if (search_regs.end[i] >= oldend)
2897           search_regs.end[i] += change;
2898         else if (search_regs.end[i] > oldstart)
2899           search_regs.end[i] = oldstart;
2900       }
2901   }
2902
2903   /* Put point back where it was in the text.  */
2904   if (opoint <= 0)
2905     TEMP_SET_PT (opoint + ZV);
2906   else
2907     TEMP_SET_PT (opoint);
2908
2909   /* Now move point "officially" to the start of the inserted replacement.  */
2910   move_if_not_intangible (newpoint);
2911
2912   return Qnil;
2913 }
2914 \f
2915 static Lisp_Object
2916 match_limit (num, beginningp)
2917      Lisp_Object num;
2918      int beginningp;
2919 {
2920   register int n;
2921
2922   CHECK_NUMBER (num);
2923   n = XINT (num);
2924   if (n < 0)
2925     args_out_of_range (num, make_number (0));
2926   if (search_regs.num_regs <= 0)
2927     error ("No match data, because no search succeeded");
2928   if (n >= search_regs.num_regs
2929       || search_regs.start[n] < 0)
2930     return Qnil;
2931   return (make_number ((beginningp) ? search_regs.start[n]
2932                                     : search_regs.end[n]));
2933 }
2934
2935 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2936        doc: /* Return position of start of text matched by last search.
2937 SUBEXP, a number, specifies which parenthesized expression in the last
2938   regexp.
2939 Value is nil if SUBEXPth pair didn't match, or there were less than
2940   SUBEXP pairs.
2941 Zero means the entire text matched by the whole regexp or whole string.  */)
2942      (subexp)
2943      Lisp_Object subexp;
2944 {
2945   return match_limit (subexp, 1);
2946 }
2947
2948 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2949        doc: /* Return position of end of text matched by last search.
2950 SUBEXP, a number, specifies which parenthesized expression in the last
2951   regexp.
2952 Value is nil if SUBEXPth pair didn't match, or there were less than
2953   SUBEXP pairs.
2954 Zero means the entire text matched by the whole regexp or whole string.  */)
2955      (subexp)
2956      Lisp_Object subexp;
2957 {
2958   return match_limit (subexp, 0);
2959 }
2960
2961 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2962        doc: /* Return a list containing all info on what the last search matched.
2963 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2964 All the elements are markers or nil (nil if the Nth pair didn't match)
2965 if the last match was on a buffer; integers or nil if a string was matched.
2966 Use `store-match-data' to reinstate the data in this list.
2967
2968 If INTEGERS (the optional first argument) is non-nil, always use
2969 integers \(rather than markers) to represent buffer positions.  In
2970 this case, and if the last match was in a buffer, the buffer will get
2971 stored as one additional element at the end of the list.
2972
2973 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2974 enough to hold all the values, and if INTEGERS is non-nil, no consing
2975 is done.
2976
2977 If optional third arg RESEAT is non-nil, any previous markers on the
2978 REUSE list will be modified to point to nowhere.
2979
2980 Return value is undefined if the last search failed.  */)
2981   (integers, reuse, reseat)
2982      Lisp_Object integers, reuse, reseat;
2983 {
2984   Lisp_Object tail, prev;
2985   Lisp_Object *data;
2986   int i, len;
2987
2988   if (!NILP (reseat))
2989     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2990       if (MARKERP (XCAR (tail)))
2991         {
2992           unchain_marker (XMARKER (XCAR (tail)));
2993           XSETCAR (tail, Qnil);
2994         }
2995
2996   if (NILP (last_thing_searched))
2997     return Qnil;
2998
2999   prev = Qnil;
3000
3001   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
3002                                  * sizeof (Lisp_Object));
3003
3004   len = 0;
3005   for (i = 0; i < search_regs.num_regs; i++)
3006     {
3007       int start = search_regs.start[i];
3008       if (start >= 0)
3009         {
3010           if (EQ (last_thing_searched, Qt)
3011               || ! NILP (integers))
3012             {
3013               XSETFASTINT (data[2 * i], start);
3014               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
3015             }
3016           else if (BUFFERP (last_thing_searched))
3017             {
3018               data[2 * i] = Fmake_marker ();
3019               Fset_marker (data[2 * i],
3020                            make_number (start),
3021                            last_thing_searched);
3022               data[2 * i + 1] = Fmake_marker ();
3023               Fset_marker (data[2 * i + 1],
3024                            make_number (search_regs.end[i]),
3025                            last_thing_searched);
3026             }
3027           else
3028             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
3029             abort ();
3030
3031           len = 2 * i + 2;
3032         }
3033       else
3034         data[2 * i] = data[2 * i + 1] = Qnil;
3035     }
3036
3037   if (BUFFERP (last_thing_searched) && !NILP (integers))
3038     {
3039       data[len] = last_thing_searched;
3040       len++;
3041     }
3042
3043   /* If REUSE is not usable, cons up the values and return them.  */
3044   if (! CONSP (reuse))
3045     return Flist (len, data);
3046
3047   /* If REUSE is a list, store as many value elements as will fit
3048      into the elements of REUSE.  */
3049   for (i = 0, tail = reuse; CONSP (tail);
3050        i++, tail = XCDR (tail))
3051     {
3052       if (i < len)
3053         XSETCAR (tail, data[i]);
3054       else
3055         XSETCAR (tail, Qnil);
3056       prev = tail;
3057     }
3058
3059   /* If we couldn't fit all value elements into REUSE,
3060      cons up the rest of them and add them to the end of REUSE.  */
3061   if (i < len)
3062     XSETCDR (prev, Flist (len - i, data + i));
3063
3064   return reuse;
3065 }
3066
3067 /* We used to have an internal use variant of `reseat' described as:
3068
3069       If RESEAT is `evaporate', put the markers back on the free list
3070       immediately.  No other references to the markers must exist in this
3071       case, so it is used only internally on the unwind stack and
3072       save-match-data from Lisp.
3073
3074    But it was ill-conceived: those supposedly-internal markers get exposed via
3075    the undo-list, so freeing them here is unsafe.  */
3076
3077 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3078        doc: /* Set internal data on last search match from elements of LIST.
3079 LIST should have been created by calling `match-data' previously.
3080
3081 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
3082     (list, reseat)
3083      register Lisp_Object list, reseat;
3084 {
3085   register int i;
3086   register Lisp_Object marker;
3087
3088   if (running_asynch_code)
3089     save_search_regs ();
3090
3091   CHECK_LIST (list);
3092
3093   /* Unless we find a marker with a buffer or an explicit buffer
3094      in LIST, assume that this match data came from a string.  */
3095   last_thing_searched = Qt;
3096
3097   /* Allocate registers if they don't already exist.  */
3098   {
3099     int length = XFASTINT (Flength (list)) / 2;
3100
3101     if (length > search_regs.num_regs)
3102       {
3103         if (search_regs.num_regs == 0)
3104           {
3105             search_regs.start
3106               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3107             search_regs.end
3108               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3109           }
3110         else
3111           {
3112             search_regs.start
3113               = (regoff_t *) xrealloc (search_regs.start,
3114                                        length * sizeof (regoff_t));
3115             search_regs.end
3116               = (regoff_t *) xrealloc (search_regs.end,
3117                                        length * sizeof (regoff_t));
3118           }
3119
3120         for (i = search_regs.num_regs; i < length; i++)
3121           search_regs.start[i] = -1;
3122
3123         search_regs.num_regs = length;
3124       }
3125
3126     for (i = 0; CONSP (list); i++)
3127       {
3128         marker = XCAR (list);
3129         if (BUFFERP (marker))
3130           {
3131             last_thing_searched = marker;
3132             break;
3133           }
3134         if (i >= length)
3135           break;
3136         if (NILP (marker))
3137           {
3138             search_regs.start[i] = -1;
3139             list = XCDR (list);
3140           }
3141         else
3142           {
3143             int from;
3144             Lisp_Object m;
3145
3146             m = marker;
3147             if (MARKERP (marker))
3148               {
3149                 if (XMARKER (marker)->buffer == 0)
3150                   XSETFASTINT (marker, 0);
3151                 else
3152                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3153               }
3154
3155             CHECK_NUMBER_COERCE_MARKER (marker);
3156             from = XINT (marker);
3157
3158             if (!NILP (reseat) && MARKERP (m))
3159               {
3160                 unchain_marker (XMARKER (m));
3161                 XSETCAR (list, Qnil);
3162               }
3163
3164             if ((list = XCDR (list), !CONSP (list)))
3165               break;
3166
3167             m = marker = XCAR (list);
3168
3169             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3170               XSETFASTINT (marker, 0);
3171
3172             CHECK_NUMBER_COERCE_MARKER (marker);
3173             search_regs.start[i] = from;
3174             search_regs.end[i] = XINT (marker);
3175
3176             if (!NILP (reseat) && MARKERP (m))
3177               {
3178                 unchain_marker (XMARKER (m));
3179                 XSETCAR (list, Qnil);
3180               }
3181           }
3182         list = XCDR (list);
3183       }
3184
3185     for (; i < search_regs.num_regs; i++)
3186       search_regs.start[i] = -1;
3187   }
3188
3189   return Qnil;
3190 }
3191
3192 /* If non-zero the match data have been saved in saved_search_regs
3193    during the execution of a sentinel or filter. */
3194 static int search_regs_saved;
3195 static struct re_registers saved_search_regs;
3196 static Lisp_Object saved_last_thing_searched;
3197
3198 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3199    if asynchronous code (filter or sentinel) is running. */
3200 static void
3201 save_search_regs ()
3202 {
3203   if (!search_regs_saved)
3204     {
3205       saved_search_regs.num_regs = search_regs.num_regs;
3206       saved_search_regs.start = search_regs.start;
3207       saved_search_regs.end = search_regs.end;
3208       saved_last_thing_searched = last_thing_searched;
3209       last_thing_searched = Qnil;
3210       search_regs.num_regs = 0;
3211       search_regs.start = 0;
3212       search_regs.end = 0;
3213
3214       search_regs_saved = 1;
3215     }
3216 }
3217
3218 /* Called upon exit from filters and sentinels. */
3219 void
3220 restore_search_regs ()
3221 {
3222   if (search_regs_saved)
3223     {
3224       if (search_regs.num_regs > 0)
3225         {
3226           xfree (search_regs.start);
3227           xfree (search_regs.end);
3228         }
3229       search_regs.num_regs = saved_search_regs.num_regs;
3230       search_regs.start = saved_search_regs.start;
3231       search_regs.end = saved_search_regs.end;
3232       last_thing_searched = saved_last_thing_searched;
3233       saved_last_thing_searched = Qnil;
3234       search_regs_saved = 0;
3235     }
3236 }
3237
3238 static Lisp_Object
3239 unwind_set_match_data (list)
3240      Lisp_Object list;
3241 {
3242   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3243   return Fset_match_data (list, Qt);
3244 }
3245
3246 /* Called to unwind protect the match data.  */
3247 void
3248 record_unwind_save_match_data ()
3249 {
3250   record_unwind_protect (unwind_set_match_data,
3251                          Fmatch_data (Qnil, Qnil, Qnil));
3252 }
3253
3254 /* Quote a string to inactivate reg-expr chars */
3255
3256 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3257        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3258      (string)
3259      Lisp_Object string;
3260 {
3261   register unsigned char *in, *out, *end;
3262   register unsigned char *temp;
3263   int backslashes_added = 0;
3264
3265   CHECK_STRING (string);
3266
3267   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3268
3269   /* Now copy the data into the new string, inserting escapes. */
3270
3271   in = SDATA (string);
3272   end = in + SBYTES (string);
3273   out = temp;
3274
3275   for (; in != end; in++)
3276     {
3277       if (*in == '['
3278           || *in == '*' || *in == '.' || *in == '\\'
3279           || *in == '?' || *in == '+'
3280           || *in == '^' || *in == '$')
3281         *out++ = '\\', backslashes_added++;
3282       *out++ = *in;
3283     }
3284
3285   return make_specified_string (temp,
3286                                 SCHARS (string) + backslashes_added,
3287                                 out - temp,
3288                                 STRING_MULTIBYTE (string));
3289 }
3290 \f
3291 void
3292 syms_of_search ()
3293 {
3294   register int i;
3295
3296   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3297     {
3298       searchbufs[i].buf.allocated = 100;
3299       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3300       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3301       searchbufs[i].regexp = Qnil;
3302       searchbufs[i].whitespace_regexp = Qnil;
3303       searchbufs[i].syntax_table = Qnil;
3304       staticpro (&searchbufs[i].regexp);
3305       staticpro (&searchbufs[i].whitespace_regexp);
3306       staticpro (&searchbufs[i].syntax_table);
3307       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3308     }
3309   searchbuf_head = &searchbufs[0];
3310
3311   Qsearch_failed = intern ("search-failed");
3312   staticpro (&Qsearch_failed);
3313   Qinvalid_regexp = intern ("invalid-regexp");
3314   staticpro (&Qinvalid_regexp);
3315
3316   Fput (Qsearch_failed, Qerror_conditions,
3317         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3318   Fput (Qsearch_failed, Qerror_message,
3319         build_string ("Search failed"));
3320
3321   Fput (Qinvalid_regexp, Qerror_conditions,
3322         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3323   Fput (Qinvalid_regexp, Qerror_message,
3324         build_string ("Invalid regexp"));
3325
3326   last_thing_searched = Qnil;
3327   staticpro (&last_thing_searched);
3328
3329   saved_last_thing_searched = Qnil;
3330   staticpro (&saved_last_thing_searched);
3331
3332   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3333       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3334 Some commands use this for user-specified regexps.
3335 Spaces that occur inside character classes or repetition operators
3336 or other such regexp constructs are not replaced with this.
3337 A value of nil (which is the normal value) means treat spaces literally.  */);
3338   Vsearch_spaces_regexp = Qnil;
3339
3340   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3341       doc: /* Internal use only.
3342 If non-nil, the primitive searching and matching functions
3343 such as `looking-at', `string-match', `re-search-forward', etc.,
3344 do not set the match data.  The proper way to use this variable
3345 is to bind it with `let' around a small expression.  */);
3346   Vinhibit_changing_match_data = Qnil;
3347
3348   defsubr (&Slooking_at);
3349   defsubr (&Sposix_looking_at);
3350   defsubr (&Sstring_match);
3351   defsubr (&Sposix_string_match);
3352   defsubr (&Ssearch_forward);
3353   defsubr (&Ssearch_backward);
3354   defsubr (&Sword_search_forward);
3355   defsubr (&Sword_search_backward);
3356   defsubr (&Sword_search_forward_lax);
3357   defsubr (&Sword_search_backward_lax);
3358   defsubr (&Sre_search_forward);
3359   defsubr (&Sre_search_backward);
3360   defsubr (&Sposix_search_forward);
3361   defsubr (&Sposix_search_backward);
3362   defsubr (&Sreplace_match);
3363   defsubr (&Smatch_beginning);
3364   defsubr (&Smatch_end);
3365   defsubr (&Smatch_data);
3366   defsubr (&Sset_match_data);
3367   defsubr (&Sregexp_quote);
3368 }
3369
3370 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3371    (do not change this comment) */