src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
   3                  2004, 2005, 2006 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 Lisp_Object Vsearch_spaces_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 101    PATTERN is the pattern to compile.
 102    CP is the place to put the result.
 103    TRANSLATE is a translation table for ignoring case, or nil for none.
 104    REGP is the structure that says where to store the "register"
 105    values that will result from matching this pattern.
 106    If it is 0, we should compile the pattern not to record any
 107    subexpression bounds.
 108    POSIX is nonzero if we want full backtracking (POSIX style)
 109    for this pattern.  0 means backtrack only enough to get a valid match.
 110    MULTIBYTE is nonzero if we want to handle multibyte characters in
 111    PATTERN.  0 means all multibyte characters are recognized just as
 112    sequences of binary data.
 113
 114    The behavior also depends on Vsearch_spaces_regexp.  */
 115
 116 static void
 117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 118      struct regexp_cache *cp;
 119      Lisp_Object pattern;
 120      Lisp_Object translate;
 121      struct re_registers *regp;
 122      int posix;
 123      int multibyte;
 124 {
 125   unsigned char *raw_pattern;
 126   int raw_pattern_size;
 127   char *val;
 128   reg_syntax_t old;
 129
 130   /* MULTIBYTE says whether the text to be searched is multibyte.
 131      We must convert PATTERN to match that, or we will not really
 132      find things right.  */
 133
 134   if (multibyte == STRING_MULTIBYTE (pattern))
 135     {
 136       raw_pattern = (unsigned char *) SDATA (pattern);
 137       raw_pattern_size = SBYTES (pattern);
 138     }
 139   else if (multibyte)
 140     {
 141       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 142                                                   SCHARS (pattern));
 143       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 144       copy_text (SDATA (pattern), raw_pattern,
 145                  SCHARS (pattern), 0, 1);
 146     }
 147   else
 148     {
 149       /* Converting multibyte to single-byte.
 150
 151          ??? Perhaps this conversion should be done in a special way
 152          by subtracting nonascii-insert-offset from each non-ASCII char,
 153          so that only the multibyte chars which really correspond to
 154          the chosen single-byte character set can possibly match.  */
 155       raw_pattern_size = SCHARS (pattern);
 156       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 157       copy_text (SDATA (pattern), raw_pattern,
 158                  SBYTES (pattern), 1, 0);
 159     }
 160
 161   cp->regexp = Qnil;
 162   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 163   cp->posix = posix;
 164   cp->buf.multibyte = multibyte;
 165   cp->whitespace_regexp = Vsearch_spaces_regexp;
 166   BLOCK_INPUT;
 167   old = re_set_syntax (RE_SYNTAX_EMACS
 168                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 169
 170   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 171                             : SDATA (Vsearch_spaces_regexp));
 172
 173   val = (char *) re_compile_pattern ((char *)raw_pattern,
 174                                      raw_pattern_size, &cp->buf);
 175
 176   re_set_whitespace_regexp (NULL);
 177
 178   re_set_syntax (old);
 179   UNBLOCK_INPUT;
 180   if (val)
 181     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 182
 183   cp->regexp = Fcopy_sequence (pattern);
 184 }
 185
 186 /* Shrink each compiled regexp buffer in the cache
 187    to the size actually used right now.
 188    This is called from garbage collection.  */
 189
 190 void
 191 shrink_regexp_cache ()
 192 {
 193   struct regexp_cache *cp;
 194
 195   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 196     {
 197       cp->buf.allocated = cp->buf.used;
 198       cp->buf.buffer
 199         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 200     }
 201 }
 202
 203 /* Compile a regexp if necessary, but first check to see if there's one in
 204    the cache.
 205    PATTERN is the pattern to compile.
 206    TRANSLATE is a translation table for ignoring case, or nil for none.
 207    REGP is the structure that says where to store the "register"
 208    values that will result from matching this pattern.
 209    If it is 0, we should compile the pattern not to record any
 210    subexpression bounds.
 211    POSIX is nonzero if we want full backtracking (POSIX style)
 212    for this pattern.  0 means backtrack only enough to get a valid match.  */
 213
 214 struct re_pattern_buffer *
 215 compile_pattern (pattern, regp, translate, posix, multibyte)
 216      Lisp_Object pattern;
 217      struct re_registers *regp;
 218      Lisp_Object translate;
 219      int posix, multibyte;
 220 {
 221   struct regexp_cache *cp, **cpp;
 222
 223   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 224     {
 225       cp = *cpp;
 226       /* Entries are initialized to nil, and may be set to nil by
 227          compile_pattern_1 if the pattern isn't valid.  Don't apply
 228          string accessors in those cases.  However, compile_pattern_1
 229          is only applied to the cache entry we pick here to reuse.  So
 230          nil should never appear before a non-nil entry.  */
 231       if (NILP (cp->regexp))
 232         goto compile_it;
 233       if (SCHARS (cp->regexp) == SCHARS (pattern)
 234           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 235           && !NILP (Fstring_equal (cp->regexp, pattern))
 236           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 237           && cp->posix == posix
 238           && cp->buf.multibyte == multibyte
 239           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 240         break;
 241
 242       /* If we're at the end of the cache, compile into the nil cell
 243          we found, or the last (least recently used) cell with a
 244          string value.  */
 245       if (cp->next == 0)
 246         {
 247         compile_it:
 248           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 249           break;
 250         }
 251     }
 252
 253   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 254      either because we found it in the cache or because we just compiled it.
 255      Move it to the front of the queue to mark it as most recently used.  */
 256   *cpp = cp->next;
 257   cp->next = searchbuf_head;
 258   searchbuf_head = cp;
 259
 260   /* Advise the searching functions about the space we have allocated
 261      for register data.  */
 262   if (regp)
 263     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 264
 265   return &cp->buf;
 266 }
 267
 268 /* Error condition used for failing searches */
 269 Lisp_Object Qsearch_failed;
 270
 271 Lisp_Object
 272 signal_failure (arg)
 273      Lisp_Object arg;
 274 {
 275   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 276   return Qnil;
 277 }
 278 \f
 279 static Lisp_Object
 280 looking_at_1 (string, posix)
 281      Lisp_Object string;
 282      int posix;
 283 {
 284   Lisp_Object val;
 285   unsigned char *p1, *p2;
 286   int s1, s2;
 287   register int i;
 288   struct re_pattern_buffer *bufp;
 289
 290   if (running_asynch_code)
 291     save_search_regs ();
 292
 293   CHECK_STRING (string);
 294   bufp = compile_pattern (string, &search_regs,
 295                           (!NILP (current_buffer->case_fold_search)
 296                            ? current_buffer->case_canon_table : Qnil),
 297                           posix,
 298                           !NILP (current_buffer->enable_multibyte_characters));
 299
 300   immediate_quit = 1;
 301   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 302
 303   /* Get pointers and sizes of the two strings
 304      that make up the visible portion of the buffer. */
 305
 306   p1 = BEGV_ADDR;
 307   s1 = GPT_BYTE - BEGV_BYTE;
 308   p2 = GAP_END_ADDR;
 309   s2 = ZV_BYTE - GPT_BYTE;
 310   if (s1 < 0)
 311     {
 312       p2 = p1;
 313       s2 = ZV_BYTE - BEGV_BYTE;
 314       s1 = 0;
 315     }
 316   if (s2 < 0)
 317     {
 318       s1 = ZV_BYTE - BEGV_BYTE;
 319       s2 = 0;
 320     }
 321
 322   re_match_object = Qnil;
 323
 324   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 325                   PT_BYTE - BEGV_BYTE, &search_regs,
 326                   ZV_BYTE - BEGV_BYTE);
 327   immediate_quit = 0;
 328
 329   if (i == -2)
 330     matcher_overflow ();
 331
 332   val = (0 <= i ? Qt : Qnil);
 333   if (i >= 0)
 334     for (i = 0; i < search_regs.num_regs; i++)
 335       if (search_regs.start[i] >= 0)
 336         {
 337           search_regs.start[i]
 338             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 339           search_regs.end[i]
 340             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 341         }
 342   XSETBUFFER (last_thing_searched, current_buffer);
 343   return val;
 344 }
 345
 346 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 347        doc: /* Return t if text after point matches regular expression REGEXP.
 348 This function modifies the match data that `match-beginning',
 349 `match-end' and `match-data' access; save and restore the match
 350 data if you want to preserve them.  */)
 351      (regexp)
 352      Lisp_Object regexp;
 353 {
 354   return looking_at_1 (regexp, 0);
 355 }
 356
 357 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 358        doc: /* Return t if text after point matches regular expression REGEXP.
 359 Find the longest match, in accord with Posix regular expression rules.
 360 This function modifies the match data that `match-beginning',
 361 `match-end' and `match-data' access; save and restore the match
 362 data if you want to preserve them.  */)
 363      (regexp)
 364      Lisp_Object regexp;
 365 {
 366   return looking_at_1 (regexp, 1);
 367 }
 368 \f
 369 static Lisp_Object
 370 string_match_1 (regexp, string, start, posix)
 371      Lisp_Object regexp, string, start;
 372      int posix;
 373 {
 374   int val;
 375   struct re_pattern_buffer *bufp;
 376   int pos, pos_byte;
 377   int i;
 378
 379   if (running_asynch_code)
 380     save_search_regs ();
 381
 382   CHECK_STRING (regexp);
 383   CHECK_STRING (string);
 384
 385   if (NILP (start))
 386     pos = 0, pos_byte = 0;
 387   else
 388     {
 389       int len = SCHARS (string);
 390
 391       CHECK_NUMBER (start);
 392       pos = XINT (start);
 393       if (pos < 0 && -pos <= len)
 394         pos = len + pos;
 395       else if (0 > pos || pos > len)
 396         args_out_of_range (string, start);
 397       pos_byte = string_char_to_byte (string, pos);
 398     }
 399
 400   bufp = compile_pattern (regexp, &search_regs,
 401                           (!NILP (current_buffer->case_fold_search)
 402                            ? current_buffer->case_canon_table : Qnil),
 403                           posix,
 404                           STRING_MULTIBYTE (string));
 405   immediate_quit = 1;
 406   re_match_object = string;
 407
 408   val = re_search (bufp, (char *) SDATA (string),
 409                    SBYTES (string), pos_byte,
 410                    SBYTES (string) - pos_byte,
 411                    &search_regs);
 412   immediate_quit = 0;
 413   last_thing_searched = Qt;
 414   if (val == -2)
 415     matcher_overflow ();
 416   if (val < 0) return Qnil;
 417
 418   for (i = 0; i < search_regs.num_regs; i++)
 419     if (search_regs.start[i] >= 0)
 420       {
 421         search_regs.start[i]
 422           = string_byte_to_char (string, search_regs.start[i]);
 423         search_regs.end[i]
 424           = string_byte_to_char (string, search_regs.end[i]);
 425       }
 426
 427   return make_number (string_byte_to_char (string, val));
 428 }
 429
 430 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 431        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 432 Matching ignores case if `case-fold-search' is non-nil.
 433 If third arg START is non-nil, start search at that index in STRING.
 434 For index of first char beyond the match, do (match-end 0).
 435 `match-end' and `match-beginning' also give indices of substrings
 436 matched by parenthesis constructs in the pattern.
 437
 438 You can use the function `match-string' to extract the substrings
 439 matched by the parenthesis constructions in REGEXP. */)
 440      (regexp, string, start)
 441      Lisp_Object regexp, string, start;
 442 {
 443   return string_match_1 (regexp, string, start, 0);
 444 }
 445
 446 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 447        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 448 Find the longest match, in accord with Posix regular expression rules.
 449 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 450 If third arg START is non-nil, start search at that index in STRING.
 451 For index of first char beyond the match, do (match-end 0).
 452 `match-end' and `match-beginning' also give indices of substrings
 453 matched by parenthesis constructs in the pattern.  */)
 454      (regexp, string, start)
 455      Lisp_Object regexp, string, start;
 456 {
 457   return string_match_1 (regexp, string, start, 1);
 458 }
 459
 460 /* Match REGEXP against STRING, searching all of STRING,
 461    and return the index of the match, or negative on failure.
 462    This does not clobber the match data.  */
 463
 464 int
 465 fast_string_match (regexp, string)
 466      Lisp_Object regexp, string;
 467 {
 468   int val;
 469   struct re_pattern_buffer *bufp;
 470
 471   bufp = compile_pattern (regexp, 0, Qnil,
 472                           0, STRING_MULTIBYTE (string));
 473   immediate_quit = 1;
 474   re_match_object = string;
 475
 476   val = re_search (bufp, (char *) SDATA (string),
 477                    SBYTES (string), 0,
 478                    SBYTES (string), 0);
 479   immediate_quit = 0;
 480   return val;
 481 }
 482
 483 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 484    and return the index of the match, or negative on failure.
 485    This does not clobber the match data.
 486    We assume that STRING contains single-byte characters.  */
 487
 488 extern Lisp_Object Vascii_downcase_table;
 489
 490 int
 491 fast_c_string_match_ignore_case (regexp, string)
 492      Lisp_Object regexp;
 493      const char *string;
 494 {
 495   int val;
 496   struct re_pattern_buffer *bufp;
 497   int len = strlen (string);
 498
 499   regexp = string_make_unibyte (regexp);
 500   re_match_object = Qt;
 501   bufp = compile_pattern (regexp, 0,
 502                           Vascii_canon_table, 0,
 503                           0);
 504   immediate_quit = 1;
 505   val = re_search (bufp, string, len, 0, len, 0);
 506   immediate_quit = 0;
 507   return val;
 508 }
 509
 510 /* Like fast_string_match but ignore case.  */
 511
 512 int
 513 fast_string_match_ignore_case (regexp, string)
 514      Lisp_Object regexp, string;
 515 {
 516   int val;
 517   struct re_pattern_buffer *bufp;
 518
 519   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 520                           0, STRING_MULTIBYTE (string));
 521   immediate_quit = 1;
 522   re_match_object = string;
 523
 524   val = re_search (bufp, (char *) SDATA (string),
 525                    SBYTES (string), 0,
 526                    SBYTES (string), 0);
 527   immediate_quit = 0;
 528   return val;
 529 }
 530 \f
 531 /* The newline cache: remembering which sections of text have no newlines.  */
 532
 533 /* If the user has requested newline caching, make sure it's on.
 534    Otherwise, make sure it's off.
 535    This is our cheezy way of associating an action with the change of
 536    state of a buffer-local variable.  */
 537 static void
 538 newline_cache_on_off (buf)
 539      struct buffer *buf;
 540 {
 541   if (NILP (buf->cache_long_line_scans))
 542     {
 543       /* It should be off.  */
 544       if (buf->newline_cache)
 545         {
 546           free_region_cache (buf->newline_cache);
 547           buf->newline_cache = 0;
 548         }
 549     }
 550   else
 551     {
 552       /* It should be on.  */
 553       if (buf->newline_cache == 0)
 554         buf->newline_cache = new_region_cache ();
 555     }
 556 }
 557
 558 \f
 559 /* Search for COUNT instances of the character TARGET between START and END.
 560
 561    If COUNT is positive, search forwards; END must be >= START.
 562    If COUNT is negative, search backwards for the -COUNTth instance;
 563       END must be <= START.
 564    If COUNT is zero, do anything you please; run rogue, for all I care.
 565
 566    If END is zero, use BEGV or ZV instead, as appropriate for the
 567    direction indicated by COUNT.
 568
 569    If we find COUNT instances, set *SHORTAGE to zero, and return the
 570    position past the COUNTth match.  Note that for reverse motion
 571    this is not the same as the usual convention for Emacs motion commands.
 572
 573    If we don't find COUNT instances before reaching END, set *SHORTAGE
 574    to the number of TARGETs left unfound, and return END.
 575
 576    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 577    except when inside redisplay.  */
 578
 579 int
 580 scan_buffer (target, start, end, count, shortage, allow_quit)
 581      register int target;
 582      int start, end;
 583      int count;
 584      int *shortage;
 585      int allow_quit;
 586 {
 587   struct region_cache *newline_cache;
 588   int direction;
 589
 590   if (count > 0)
 591     {
 592       direction = 1;
 593       if (! end) end = ZV;
 594     }
 595   else
 596     {
 597       direction = -1;
 598       if (! end) end = BEGV;
 599     }
 600
 601   newline_cache_on_off (current_buffer);
 602   newline_cache = current_buffer->newline_cache;
 603
 604   if (shortage != 0)
 605     *shortage = 0;
 606
 607   immediate_quit = allow_quit;
 608
 609   if (count > 0)
 610     while (start != end)
 611       {
 612         /* Our innermost scanning loop is very simple; it doesn't know
 613            about gaps, buffer ends, or the newline cache.  ceiling is
 614            the position of the last character before the next such
 615            obstacle --- the last character the dumb search loop should
 616            examine.  */
 617         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 618         int start_byte = CHAR_TO_BYTE (start);
 619         int tem;
 620
 621         /* If we're looking for a newline, consult the newline cache
 622            to see where we can avoid some scanning.  */
 623         if (target == '\n' && newline_cache)
 624           {
 625             int next_change;
 626             immediate_quit = 0;
 627             while (region_cache_forward
 628                    (current_buffer, newline_cache, start_byte, &next_change))
 629               start_byte = next_change;
 630             immediate_quit = allow_quit;
 631
 632             /* START should never be after END.  */
 633             if (start_byte > ceiling_byte)
 634               start_byte = ceiling_byte;
 635
 636             /* Now the text after start is an unknown region, and
 637                next_change is the position of the next known region. */
 638             ceiling_byte = min (next_change - 1, ceiling_byte);
 639           }
 640
 641         /* The dumb loop can only scan text stored in contiguous
 642            bytes. BUFFER_CEILING_OF returns the last character
 643            position that is contiguous, so the ceiling is the
 644            position after that.  */
 645         tem = BUFFER_CEILING_OF (start_byte);
 646         ceiling_byte = min (tem, ceiling_byte);
 647
 648         {
 649           /* The termination address of the dumb loop.  */
 650           register unsigned char *ceiling_addr
 651             = BYTE_POS_ADDR (ceiling_byte) + 1;
 652           register unsigned char *cursor
 653             = BYTE_POS_ADDR (start_byte);
 654           unsigned char *base = cursor;
 655
 656           while (cursor < ceiling_addr)
 657             {
 658               unsigned char *scan_start = cursor;
 659
 660               /* The dumb loop.  */
 661               while (*cursor != target && ++cursor < ceiling_addr)
 662                 ;
 663
 664               /* If we're looking for newlines, cache the fact that
 665                  the region from start to cursor is free of them. */
 666               if (target == '\n' && newline_cache)
 667                 know_region_cache (current_buffer, newline_cache,
 668                                    start_byte + scan_start - base,
 669                                    start_byte + cursor - base);
 670
 671               /* Did we find the target character?  */
 672               if (cursor < ceiling_addr)
 673                 {
 674                   if (--count == 0)
 675                     {
 676                       immediate_quit = 0;
 677                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 678                     }
 679                   cursor++;
 680                 }
 681             }
 682
 683           start = BYTE_TO_CHAR (start_byte + cursor - base);
 684         }
 685       }
 686   else
 687     while (start > end)
 688       {
 689         /* The last character to check before the next obstacle.  */
 690         int ceiling_byte = CHAR_TO_BYTE (end);
 691         int start_byte = CHAR_TO_BYTE (start);
 692         int tem;
 693
 694         /* Consult the newline cache, if appropriate.  */
 695         if (target == '\n' && newline_cache)
 696           {
 697             int next_change;
 698             immediate_quit = 0;
 699             while (region_cache_backward
 700                    (current_buffer, newline_cache, start_byte, &next_change))
 701               start_byte = next_change;
 702             immediate_quit = allow_quit;
 703
 704             /* Start should never be at or before end.  */
 705             if (start_byte <= ceiling_byte)
 706               start_byte = ceiling_byte + 1;
 707
 708             /* Now the text before start is an unknown region, and
 709                next_change is the position of the next known region. */
 710             ceiling_byte = max (next_change, ceiling_byte);
 711           }
 712
 713         /* Stop scanning before the gap.  */
 714         tem = BUFFER_FLOOR_OF (start_byte - 1);
 715         ceiling_byte = max (tem, ceiling_byte);
 716
 717         {
 718           /* The termination address of the dumb loop.  */
 719           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 720           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 721           unsigned char *base = cursor;
 722
 723           while (cursor >= ceiling_addr)
 724             {
 725               unsigned char *scan_start = cursor;
 726
 727               while (*cursor != target && --cursor >= ceiling_addr)
 728                 ;
 729
 730               /* If we're looking for newlines, cache the fact that
 731                  the region from after the cursor to start is free of them.  */
 732               if (target == '\n' && newline_cache)
 733                 know_region_cache (current_buffer, newline_cache,
 734                                    start_byte + cursor - base,
 735                                    start_byte + scan_start - base);
 736
 737               /* Did we find the target character?  */
 738               if (cursor >= ceiling_addr)
 739                 {
 740                   if (++count >= 0)
 741                     {
 742                       immediate_quit = 0;
 743                       return BYTE_TO_CHAR (start_byte + cursor - base);
 744                     }
 745                   cursor--;
 746                 }
 747             }
 748
 749           start = BYTE_TO_CHAR (start_byte + cursor - base);
 750         }
 751       }
 752
 753   immediate_quit = 0;
 754   if (shortage != 0)
 755     *shortage = count * direction;
 756   return start;
 757 }
 758 \f
 759 /* Search for COUNT instances of a line boundary, which means either a
 760    newline or (if selective display enabled) a carriage return.
 761    Start at START.  If COUNT is negative, search backwards.
 762
 763    We report the resulting position by calling TEMP_SET_PT_BOTH.
 764
 765    If we find COUNT instances. we position after (always after,
 766    even if scanning backwards) the COUNTth match, and return 0.
 767
 768    If we don't find COUNT instances before reaching the end of the
 769    buffer (or the beginning, if scanning backwards), we return
 770    the number of line boundaries left unfound, and position at
 771    the limit we bumped up against.
 772
 773    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 774    except in special cases.  */
 775
 776 int
 777 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 778      int start, start_byte;
 779      int limit, limit_byte;
 780      register int count;
 781      int allow_quit;
 782 {
 783   int direction = ((count > 0) ? 1 : -1);
 784
 785   register unsigned char *cursor;
 786   unsigned char *base;
 787
 788   register int ceiling;
 789   register unsigned char *ceiling_addr;
 790
 791   int old_immediate_quit = immediate_quit;
 792
 793   /* The code that follows is like scan_buffer
 794      but checks for either newline or carriage return.  */
 795
 796   if (allow_quit)
 797     immediate_quit++;
 798
 799   start_byte = CHAR_TO_BYTE (start);
 800
 801   if (count > 0)
 802     {
 803       while (start_byte < limit_byte)
 804         {
 805           ceiling =  BUFFER_CEILING_OF (start_byte);
 806           ceiling = min (limit_byte - 1, ceiling);
 807           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 808           base = (cursor = BYTE_POS_ADDR (start_byte));
 809           while (1)
 810             {
 811               while (*cursor != '\n' && ++cursor != ceiling_addr)
 812                 ;
 813
 814               if (cursor != ceiling_addr)
 815                 {
 816                   if (--count == 0)
 817                     {
 818                       immediate_quit = old_immediate_quit;
 819                       start_byte = start_byte + cursor - base + 1;
 820                       start = BYTE_TO_CHAR (start_byte);
 821                       TEMP_SET_PT_BOTH (start, start_byte);
 822                       return 0;
 823                     }
 824                   else
 825                     if (++cursor == ceiling_addr)
 826                       break;
 827                 }
 828               else
 829                 break;
 830             }
 831           start_byte += cursor - base;
 832         }
 833     }
 834   else
 835     {
 836       while (start_byte > limit_byte)
 837         {
 838           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 839           ceiling = max (limit_byte, ceiling);
 840           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 841           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 842           while (1)
 843             {
 844               while (--cursor != ceiling_addr && *cursor != '\n')
 845                 ;
 846
 847               if (cursor != ceiling_addr)
 848                 {
 849                   if (++count == 0)
 850                     {
 851                       immediate_quit = old_immediate_quit;
 852                       /* Return the position AFTER the match we found.  */
 853                       start_byte = start_byte + cursor - base + 1;
 854                       start = BYTE_TO_CHAR (start_byte);
 855                       TEMP_SET_PT_BOTH (start, start_byte);
 856                       return 0;
 857                     }
 858                 }
 859               else
 860                 break;
 861             }
 862           /* Here we add 1 to compensate for the last decrement
 863              of CURSOR, which took it past the valid range.  */
 864           start_byte += cursor - base + 1;
 865         }
 866     }
 867
 868   TEMP_SET_PT_BOTH (limit, limit_byte);
 869   immediate_quit = old_immediate_quit;
 870
 871   return count * direction;
 872 }
 873
 874 int
 875 find_next_newline_no_quit (from, cnt)
 876      register int from, cnt;
 877 {
 878   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 879 }
 880
 881 /* Like find_next_newline, but returns position before the newline,
 882    not after, and only search up to TO.  This isn't just
 883    find_next_newline (...)-1, because you might hit TO.  */
 884
 885 int
 886 find_before_next_newline (from, to, cnt)
 887      int from, to, cnt;
 888 {
 889   int shortage;
 890   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 891
 892   if (shortage == 0)
 893     pos--;
 894
 895   return pos;
 896 }
 897 \f
 898 /* Subroutines of Lisp buffer search functions. */
 899
 900 static Lisp_Object
 901 search_command (string, bound, noerror, count, direction, RE, posix)
 902      Lisp_Object string, bound, noerror, count;
 903      int direction;
 904      int RE;
 905      int posix;
 906 {
 907   register int np;
 908   int lim, lim_byte;
 909   int n = direction;
 910
 911   if (!NILP (count))
 912     {
 913       CHECK_NUMBER (count);
 914       n *= XINT (count);
 915     }
 916
 917   CHECK_STRING (string);
 918   if (NILP (bound))
 919     {
 920       if (n > 0)
 921         lim = ZV, lim_byte = ZV_BYTE;
 922       else
 923         lim = BEGV, lim_byte = BEGV_BYTE;
 924     }
 925   else
 926     {
 927       CHECK_NUMBER_COERCE_MARKER (bound);
 928       lim = XINT (bound);
 929       if (n > 0 ? lim < PT : lim > PT)
 930         error ("Invalid search bound (wrong side of point)");
 931       if (lim > ZV)
 932         lim = ZV, lim_byte = ZV_BYTE;
 933       else if (lim < BEGV)
 934         lim = BEGV, lim_byte = BEGV_BYTE;
 935       else
 936         lim_byte = CHAR_TO_BYTE (lim);
 937     }
 938
 939   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 940                       (!NILP (current_buffer->case_fold_search)
 941                        ? current_buffer->case_canon_table
 942                        : Qnil),
 943                       (!NILP (current_buffer->case_fold_search)
 944                        ? current_buffer->case_eqv_table
 945                        : Qnil),
 946                       posix);
 947   if (np <= 0)
 948     {
 949       if (NILP (noerror))
 950         return signal_failure (string);
 951       if (!EQ (noerror, Qt))
 952         {
 953           if (lim < BEGV || lim > ZV)
 954             abort ();
 955           SET_PT_BOTH (lim, lim_byte);
 956           return Qnil;
 957 #if 0 /* This would be clean, but maybe programs depend on
 958          a value of nil here.  */
 959           np = lim;
 960 #endif
 961         }
 962       else
 963         return Qnil;
 964     }
 965
 966   if (np < BEGV || np > ZV)
 967     abort ();
 968
 969   SET_PT (np);
 970
 971   return make_number (np);
 972 }
 973 \f
 974 /* Return 1 if REGEXP it matches just one constant string.  */
 975
 976 static int
 977 trivial_regexp_p (regexp)
 978      Lisp_Object regexp;
 979 {
 980   int len = SBYTES (regexp);
 981   unsigned char *s = SDATA (regexp);
 982   while (--len >= 0)
 983     {
 984       switch (*s++)
 985         {
 986         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 987           return 0;
 988         case '\\':
 989           if (--len < 0)
 990             return 0;
 991           switch (*s++)
 992             {
 993             case '|': case '(': case ')': case '`': case '\'': case 'b':
 994             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 995             case 'S': case '=': case '{': case '}': case '_':
 996             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 997             case '1': case '2': case '3': case '4': case '5':
 998             case '6': case '7': case '8': case '9':
 999               return 0;
1000             }
1001         }
1002     }
1003   return 1;
1004 }
1005
1006 /* Search for the n'th occurrence of STRING in the current buffer,
1007    starting at position POS and stopping at position LIM,
1008    treating STRING as a literal string if RE is false or as
1009    a regular expression if RE is true.
1010
1011    If N is positive, searching is forward and LIM must be greater than POS.
1012    If N is negative, searching is backward and LIM must be less than POS.
1013
1014    Returns -x if x occurrences remain to be found (x > 0),
1015    or else the position at the beginning of the Nth occurrence
1016    (if searching backward) or the end (if searching forward).
1017
1018    POSIX is nonzero if we want full backtracking (POSIX style)
1019    for this pattern.  0 means backtrack only enough to get a valid match.  */
1020
1021 #define TRANSLATE(out, trt, d)                  \
1022 do                                              \
1023   {                                             \
1024     if (! NILP (trt))                           \
1025       {                                         \
1026         Lisp_Object temp;                       \
1027         temp = Faref (trt, make_number (d));    \
1028         if (INTEGERP (temp))                    \
1029           out = XINT (temp);                    \
1030         else                                    \
1031           out = d;                              \
1032       }                                         \
1033     else                                        \
1034       out = d;                                  \
1035   }                                             \
1036 while (0)
1037
1038 static int
1039 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1040                RE, trt, inverse_trt, posix)
1041      Lisp_Object string;
1042      int pos;
1043      int pos_byte;
1044      int lim;
1045      int lim_byte;
1046      int n;
1047      int RE;
1048      Lisp_Object trt;
1049      Lisp_Object inverse_trt;
1050      int posix;
1051 {
1052   int len = SCHARS (string);
1053   int len_byte = SBYTES (string);
1054   register int i;
1055
1056   if (running_asynch_code)
1057     save_search_regs ();
1058
1059   /* Searching 0 times means don't move.  */
1060   /* Null string is found at starting position.  */
1061   if (len == 0 || n == 0)
1062     {
1063       set_search_regs (pos_byte, 0);
1064       return pos;
1065     }
1066
1067   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1068     {
1069       unsigned char *p1, *p2;
1070       int s1, s2;
1071       struct re_pattern_buffer *bufp;
1072
1073       bufp = compile_pattern (string, &search_regs, trt, posix,
1074                               !NILP (current_buffer->enable_multibyte_characters));
1075
1076       immediate_quit = 1;       /* Quit immediately if user types ^G,
1077                                    because letting this function finish
1078                                    can take too long. */
1079       QUIT;                     /* Do a pending quit right away,
1080                                    to avoid paradoxical behavior */
1081       /* Get pointers and sizes of the two strings
1082          that make up the visible portion of the buffer. */
1083
1084       p1 = BEGV_ADDR;
1085       s1 = GPT_BYTE - BEGV_BYTE;
1086       p2 = GAP_END_ADDR;
1087       s2 = ZV_BYTE - GPT_BYTE;
1088       if (s1 < 0)
1089         {
1090           p2 = p1;
1091           s2 = ZV_BYTE - BEGV_BYTE;
1092           s1 = 0;
1093         }
1094       if (s2 < 0)
1095         {
1096           s1 = ZV_BYTE - BEGV_BYTE;
1097           s2 = 0;
1098         }
1099       re_match_object = Qnil;
1100
1101       while (n < 0)
1102         {
1103           int val;
1104           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106                              &search_regs,
1107                              /* Don't allow match past current point */
1108                              pos_byte - BEGV_BYTE);
1109           if (val == -2)
1110             {
1111               matcher_overflow ();
1112             }
1113           if (val >= 0)
1114             {
1115               pos_byte = search_regs.start[0] + BEGV_BYTE;
1116               for (i = 0; i < search_regs.num_regs; i++)
1117                 if (search_regs.start[i] >= 0)
1118                   {
1119                     search_regs.start[i]
1120                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1121                     search_regs.end[i]
1122                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1123                   }
1124               XSETBUFFER (last_thing_searched, current_buffer);
1125               /* Set pos to the new position. */
1126               pos = search_regs.start[0];
1127             }
1128           else
1129             {
1130               immediate_quit = 0;
1131               return (n);
1132             }
1133           n++;
1134         }
1135       while (n > 0)
1136         {
1137           int val;
1138           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1139                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1140                              &search_regs,
1141                              lim_byte - BEGV_BYTE);
1142           if (val == -2)
1143             {
1144               matcher_overflow ();
1145             }
1146           if (val >= 0)
1147             {
1148               pos_byte = search_regs.end[0] + BEGV_BYTE;
1149               for (i = 0; i < search_regs.num_regs; i++)
1150                 if (search_regs.start[i] >= 0)
1151                   {
1152                     search_regs.start[i]
1153                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1154                     search_regs.end[i]
1155                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1156                   }
1157               XSETBUFFER (last_thing_searched, current_buffer);
1158               pos = search_regs.end[0];
1159             }
1160           else
1161             {
1162               immediate_quit = 0;
1163               return (0 - n);
1164             }
1165           n--;
1166         }
1167       immediate_quit = 0;
1168       return (pos);
1169     }
1170   else                          /* non-RE case */
1171     {
1172       unsigned char *raw_pattern, *pat;
1173       int raw_pattern_size;
1174       int raw_pattern_size_byte;
1175       unsigned char *patbuf;
1176       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1177       unsigned char *base_pat;
1178       /* Set to positive if we find a non-ASCII char that need
1179          translation.  Otherwise set to zero later.  */
1180       int charset_base = -1;
1181       int boyer_moore_ok = 1;
1182
1183       /* MULTIBYTE says whether the text to be searched is multibyte.
1184          We must convert PATTERN to match that, or we will not really
1185          find things right.  */
1186
1187       if (multibyte == STRING_MULTIBYTE (string))
1188         {
1189           raw_pattern = (unsigned char *) SDATA (string);
1190           raw_pattern_size = SCHARS (string);
1191           raw_pattern_size_byte = SBYTES (string);
1192         }
1193       else if (multibyte)
1194         {
1195           raw_pattern_size = SCHARS (string);
1196           raw_pattern_size_byte
1197             = count_size_as_multibyte (SDATA (string),
1198                                        raw_pattern_size);
1199           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1200           copy_text (SDATA (string), raw_pattern,
1201                      SCHARS (string), 0, 1);
1202         }
1203       else
1204         {
1205           /* Converting multibyte to single-byte.
1206
1207              ??? Perhaps this conversion should be done in a special way
1208              by subtracting nonascii-insert-offset from each non-ASCII char,
1209              so that only the multibyte chars which really correspond to
1210              the chosen single-byte character set can possibly match.  */
1211           raw_pattern_size = SCHARS (string);
1212           raw_pattern_size_byte = SCHARS (string);
1213           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1214           copy_text (SDATA (string), raw_pattern,
1215                      SBYTES (string), 1, 0);
1216         }
1217
1218       /* Copy and optionally translate the pattern.  */
1219       len = raw_pattern_size;
1220       len_byte = raw_pattern_size_byte;
1221       patbuf = (unsigned char *) alloca (len_byte);
1222       pat = patbuf;
1223       base_pat = raw_pattern;
1224       if (multibyte)
1225         {
1226           /* Fill patbuf by translated characters in STRING while
1227              checking if we can use boyer-moore search.  If TRT is
1228              non-nil, we can use boyer-moore search only if TRT can be
1229              represented by the byte array of 256 elements.  For that,
1230              all non-ASCII case-equivalents of all case-senstive
1231              characters in STRING must belong to the same charset and
1232              row.  */
1233
1234           while (--len >= 0)
1235             {
1236               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1237               int c, translated, inverse;
1238               int in_charlen, charlen;
1239
1240               /* If we got here and the RE flag is set, it's because we're
1241                  dealing with a regexp known to be trivial, so the backslash
1242                  just quotes the next character.  */
1243               if (RE && *base_pat == '\\')
1244                 {
1245                   len--;
1246                   raw_pattern_size--;
1247                   len_byte--;
1248                   base_pat++;
1249                 }
1250
1251               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1252
1253               if (NILP (trt))
1254                 {
1255                   str = base_pat;
1256                   charlen = in_charlen;
1257                 }
1258               else
1259                 {
1260                   /* Translate the character.  */
1261                   TRANSLATE (translated, trt, c);
1262                   charlen = CHAR_STRING (translated, str_base);
1263                   str = str_base;
1264
1265                   /* Check if C has any other case-equivalents.  */
1266                   TRANSLATE (inverse, inverse_trt, c);
1267                   /* If so, check if we can use boyer-moore.  */
1268                   if (c != inverse && boyer_moore_ok)
1269                     {
1270                       /* Check if all equivalents belong to the same
1271                          charset & row.  Note that the check of C
1272                          itself is done by the last iteration.  Note
1273                          also that we don't have to check ASCII
1274                          characters because boyer-moore search can
1275                          always handle their translation.  */
1276                       while (1)
1277                         {
1278                           if (ASCII_BYTE_P (inverse))
1279                             {
1280                               if (charset_base > 0)
1281                                 {
1282                                   boyer_moore_ok = 0;
1283                                   break;
1284                                 }
1285                               charset_base = 0;
1286                             }
1287                           else if (SINGLE_BYTE_CHAR_P (inverse))
1288                             {
1289                               /* Boyer-moore search can't handle a
1290                                  translation of an eight-bit
1291                                  character.  */
1292                               boyer_moore_ok = 0;
1293                               break;
1294                             }
1295                           else if (charset_base < 0)
1296                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1297                           else if ((inverse & ~CHAR_FIELD3_MASK)
1298                                    != charset_base)
1299                             {
1300                               boyer_moore_ok = 0;
1301                               break;
1302                             }
1303                           if (c == inverse)
1304                             break;
1305                           TRANSLATE (inverse, inverse_trt, inverse);
1306                         }
1307                     }
1308                 }
1309               if (charset_base < 0)
1310                 charset_base = 0;
1311
1312               /* Store this character into the translated pattern.  */
1313               bcopy (str, pat, charlen);
1314               pat += charlen;
1315               base_pat += in_charlen;
1316               len_byte -= in_charlen;
1317             }
1318         }
1319       else
1320         {
1321           /* Unibyte buffer.  */
1322           charset_base = 0;
1323           while (--len >= 0)
1324             {
1325               int c, translated;
1326
1327               /* If we got here and the RE flag is set, it's because we're
1328                  dealing with a regexp known to be trivial, so the backslash
1329                  just quotes the next character.  */
1330               if (RE && *base_pat == '\\')
1331                 {
1332                   len--;
1333                   raw_pattern_size--;
1334                   base_pat++;
1335                 }
1336               c = *base_pat++;
1337               TRANSLATE (translated, trt, c);
1338               *pat++ = translated;
1339             }
1340         }
1341
1342       len_byte = pat - patbuf;
1343       len = raw_pattern_size;
1344       pat = base_pat = patbuf;
1345
1346       if (boyer_moore_ok)
1347         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1348                             pos, pos_byte, lim, lim_byte,
1349                             charset_base);
1350       else
1351         return simple_search (n, pat, len, len_byte, trt,
1352                               pos, pos_byte, lim, lim_byte);
1353     }
1354 }
1355 \f
1356 /* Do a simple string search N times for the string PAT,
1357    whose length is LEN/LEN_BYTE,
1358    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1359    TRT is the translation table.
1360
1361    Return the character position where the match is found.
1362    Otherwise, if M matches remained to be found, return -M.
1363
1364    This kind of search works regardless of what is in PAT and
1365    regardless of what is in TRT.  It is used in cases where
1366    boyer_moore cannot work.  */
1367
1368 static int
1369 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1370      int n;
1371      unsigned char *pat;
1372      int len, len_byte;
1373      Lisp_Object trt;
1374      int pos, pos_byte;
1375      int lim, lim_byte;
1376 {
1377   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1378   int forward = n > 0;
1379
1380   if (lim > pos && multibyte)
1381     while (n > 0)
1382       {
1383         while (1)
1384           {
1385             /* Try matching at position POS.  */
1386             int this_pos = pos;
1387             int this_pos_byte = pos_byte;
1388             int this_len = len;
1389             int this_len_byte = len_byte;
1390             unsigned char *p = pat;
1391             if (pos + len > lim)
1392               goto stop;
1393
1394             while (this_len > 0)
1395               {
1396                 int charlen, buf_charlen;
1397                 int pat_ch, buf_ch;
1398
1399                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1400                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1401                                                  ZV_BYTE - this_pos_byte,
1402                                                  buf_charlen);
1403                 TRANSLATE (buf_ch, trt, buf_ch);
1404
1405                 if (buf_ch != pat_ch)
1406                   break;
1407
1408                 this_len_byte -= charlen;
1409                 this_len--;
1410                 p += charlen;
1411
1412                 this_pos_byte += buf_charlen;
1413                 this_pos++;
1414               }
1415
1416             if (this_len == 0)
1417               {
1418                 pos += len;
1419                 pos_byte += len_byte;
1420                 break;
1421               }
1422
1423             INC_BOTH (pos, pos_byte);
1424           }
1425
1426         n--;
1427       }
1428   else if (lim > pos)
1429     while (n > 0)
1430       {
1431         while (1)
1432           {
1433             /* Try matching at position POS.  */
1434             int this_pos = pos;
1435             int this_len = len;
1436             unsigned char *p = pat;
1437
1438             if (pos + len > lim)
1439               goto stop;
1440
1441             while (this_len > 0)
1442               {
1443                 int pat_ch = *p++;
1444                 int buf_ch = FETCH_BYTE (this_pos);
1445                 TRANSLATE (buf_ch, trt, buf_ch);
1446
1447                 if (buf_ch != pat_ch)
1448                   break;
1449
1450                 this_len--;
1451                 this_pos++;
1452               }
1453
1454             if (this_len == 0)
1455               {
1456                 pos += len;
1457                 break;
1458               }
1459
1460             pos++;
1461           }
1462
1463         n--;
1464       }
1465   /* Backwards search.  */
1466   else if (lim < pos && multibyte)
1467     while (n < 0)
1468       {
1469         while (1)
1470           {
1471             /* Try matching at position POS.  */
1472             int this_pos = pos - len;
1473             int this_pos_byte = pos_byte - len_byte;
1474             int this_len = len;
1475             int this_len_byte = len_byte;
1476             unsigned char *p = pat;
1477
1478             if (pos - len < lim)
1479               goto stop;
1480
1481             while (this_len > 0)
1482               {
1483                 int charlen, buf_charlen;
1484                 int pat_ch, buf_ch;
1485
1486                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1487                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488                                                  ZV_BYTE - this_pos_byte,
1489                                                  buf_charlen);
1490                 TRANSLATE (buf_ch, trt, buf_ch);
1491
1492                 if (buf_ch != pat_ch)
1493                   break;
1494
1495                 this_len_byte -= charlen;
1496                 this_len--;
1497                 p += charlen;
1498                 this_pos_byte += buf_charlen;
1499                 this_pos++;
1500               }
1501
1502             if (this_len == 0)
1503               {
1504                 pos -= len;
1505                 pos_byte -= len_byte;
1506                 break;
1507               }
1508
1509             DEC_BOTH (pos, pos_byte);
1510           }
1511
1512         n++;
1513       }
1514   else if (lim < pos)
1515     while (n < 0)
1516       {
1517         while (1)
1518           {
1519             /* Try matching at position POS.  */
1520             int this_pos = pos - len;
1521             int this_len = len;
1522             unsigned char *p = pat;
1523
1524             if (pos - len < lim)
1525               goto stop;
1526
1527             while (this_len > 0)
1528               {
1529                 int pat_ch = *p++;
1530                 int buf_ch = FETCH_BYTE (this_pos);
1531                 TRANSLATE (buf_ch, trt, buf_ch);
1532
1533                 if (buf_ch != pat_ch)
1534                   break;
1535                 this_len--;
1536                 this_pos++;
1537               }
1538
1539             if (this_len == 0)
1540               {
1541                 pos -= len;
1542                 break;
1543               }
1544
1545             pos--;
1546           }
1547
1548         n++;
1549       }
1550
1551  stop:
1552   if (n == 0)
1553     {
1554       if (forward)
1555         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1556       else
1557         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1558
1559       return pos;
1560     }
1561   else if (n > 0)
1562     return -n;
1563   else
1564     return n;
1565 }
1566 \f
1567 /* Do Boyer-Moore search N times for the string BASE_PAT,
1568    whose length is LEN/LEN_BYTE,
1569    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1570    DIRECTION says which direction we search in.
1571    TRT and INVERSE_TRT are translation tables.
1572    Characters in PAT are already translated by TRT.
1573
1574    This kind of search works if all the characters in BASE_PAT that
1575    have nontrivial translation are the same aside from the last byte.
1576    This makes it possible to translate just the last byte of a
1577    character, and do so after just a simple test of the context.
1578    CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1579
1580    If that criterion is not satisfied, do not call this function.  */
1581
1582 static int
1583 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1584              pos, pos_byte, lim, lim_byte, charset_base)
1585      int n;
1586      unsigned char *base_pat;
1587      int len, len_byte;
1588      Lisp_Object trt;
1589      Lisp_Object inverse_trt;
1590      int pos, pos_byte;
1591      int lim, lim_byte;
1592      int charset_base;
1593 {
1594   int direction = ((n > 0) ? 1 : -1);
1595   register int dirlen;
1596   int infinity, limit, stride_for_teases = 0;
1597   register int *BM_tab;
1598   int *BM_tab_base;
1599   register unsigned char *cursor, *p_limit;
1600   register int i, j;
1601   unsigned char *pat, *pat_end;
1602   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1603
1604   unsigned char simple_translate[0400];
1605   /* These are set to the preceding bytes of a byte to be translated
1606      if charset_base is nonzero.  As the maximum byte length of a
1607      multibyte character is 4, we have to check at most three previous
1608      bytes.  */
1609   int translate_prev_byte1 = 0;
1610   int translate_prev_byte2 = 0;
1611   int translate_prev_byte3 = 0;
1612
1613 #ifdef C_ALLOCA
1614   int BM_tab_space[0400];
1615   BM_tab = &BM_tab_space[0];
1616 #else
1617   BM_tab = (int *) alloca (0400 * sizeof (int));
1618 #endif
1619   /* The general approach is that we are going to maintain that we know */
1620   /* the first (closest to the present position, in whatever direction */
1621   /* we're searching) character that could possibly be the last */
1622   /* (furthest from present position) character of a valid match.  We */
1623   /* advance the state of our knowledge by looking at that character */
1624   /* and seeing whether it indeed matches the last character of the */
1625   /* pattern.  If it does, we take a closer look.  If it does not, we */
1626   /* move our pointer (to putative last characters) as far as is */
1627   /* logically possible.  This amount of movement, which I call a */
1628   /* stride, will be the length of the pattern if the actual character */
1629   /* appears nowhere in the pattern, otherwise it will be the distance */
1630   /* from the last occurrence of that character to the end of the */
1631   /* pattern. */
1632   /* As a coding trick, an enormous stride is coded into the table for */
1633   /* characters that match the last character.  This allows use of only */
1634   /* a single test, a test for having gone past the end of the */
1635   /* permissible match region, to test for both possible matches (when */
1636   /* the stride goes past the end immediately) and failure to */
1637   /* match (where you get nudged past the end one stride at a time). */
1638
1639   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1640   /* is determined only by the last character of the putative match. */
1641   /* If that character does not match, we will stride the proper */
1642   /* distance to propose a match that superimposes it on the last */
1643   /* instance of a character that matches it (per trt), or misses */
1644   /* it entirely if there is none. */
1645
1646   dirlen = len_byte * direction;
1647   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1648
1649   /* Record position after the end of the pattern.  */
1650   pat_end = base_pat + len_byte;
1651   /* BASE_PAT points to a character that we start scanning from.
1652      It is the first character in a forward search,
1653      the last character in a backward search.  */
1654   if (direction < 0)
1655     base_pat = pat_end - 1;
1656
1657   BM_tab_base = BM_tab;
1658   BM_tab += 0400;
1659   j = dirlen;           /* to get it in a register */
1660   /* A character that does not appear in the pattern induces a */
1661   /* stride equal to the pattern length. */
1662   while (BM_tab_base != BM_tab)
1663     {
1664       *--BM_tab = j;
1665       *--BM_tab = j;
1666       *--BM_tab = j;
1667       *--BM_tab = j;
1668     }
1669
1670   /* We use this for translation, instead of TRT itself.
1671      We fill this in to handle the characters that actually
1672      occur in the pattern.  Others don't matter anyway!  */
1673   bzero (simple_translate, sizeof simple_translate);
1674   for (i = 0; i < 0400; i++)
1675     simple_translate[i] = i;
1676
1677   if (charset_base)
1678     {
1679       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1680          byte following them are the target of translation.  */
1681       int sample_char = charset_base | 0x20;
1682       unsigned char str[MAX_MULTIBYTE_LENGTH];
1683       int len = CHAR_STRING (sample_char, str);
1684
1685       translate_prev_byte1 = str[len - 2];
1686       if (len > 2)
1687         {
1688           translate_prev_byte2 = str[len - 3];
1689           if (len > 3)
1690             translate_prev_byte3 = str[len - 4];
1691         }
1692     }
1693
1694   i = 0;
1695   while (i != infinity)
1696     {
1697       unsigned char *ptr = base_pat + i;
1698       i += direction;
1699       if (i == dirlen)
1700         i = infinity;
1701       if (! NILP (trt))
1702         {
1703           /* If the byte currently looking at is the last of a
1704              character to check case-equivalents, set CH to that
1705              character.  An ASCII character and a non-ASCII character
1706              matching with CHARSET_BASE are to be checked.  */
1707           int ch = -1;
1708
1709           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1710             ch = *ptr;
1711           else if (charset_base
1712                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1713             {
1714               unsigned char *charstart = ptr - 1;
1715
1716               while (! (CHAR_HEAD_P (*charstart)))
1717                 charstart--;
1718               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1719               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1720                 ch = -1;
1721             }
1722
1723           if (ch > 0400)
1724             j = ((unsigned char) ch) | 0200;
1725           else
1726             j = *ptr;
1727
1728           if (i == infinity)
1729             stride_for_teases = BM_tab[j];
1730
1731           BM_tab[j] = dirlen - i;
1732           /* A translation table is accompanied by its inverse -- see */
1733           /* comment following downcase_table for details */
1734           if (ch >= 0)
1735             {
1736               int starting_ch = ch;
1737               int starting_j = j;
1738
1739               while (1)
1740                 {
1741                   TRANSLATE (ch, inverse_trt, ch);
1742                   if (ch > 0400)
1743                     j = ((unsigned char) ch) | 0200;
1744                   else
1745                     j = (unsigned char) ch;
1746
1747                   /* For all the characters that map into CH,
1748                      set up simple_translate to map the last byte
1749                      into STARTING_J.  */
1750                   simple_translate[j] = starting_j;
1751                   if (ch == starting_ch)
1752                     break;
1753                   BM_tab[j] = dirlen - i;
1754                 }
1755             }
1756         }
1757       else
1758         {
1759           j = *ptr;
1760
1761           if (i == infinity)
1762             stride_for_teases = BM_tab[j];
1763           BM_tab[j] = dirlen - i;
1764         }
1765       /* stride_for_teases tells how much to stride if we get a */
1766       /* match on the far character but are subsequently */
1767       /* disappointed, by recording what the stride would have been */
1768       /* for that character if the last character had been */
1769       /* different. */
1770     }
1771   infinity = dirlen - infinity;
1772   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1773   /* loop invariant - POS_BYTE points at where last char (first
1774      char if reverse) of pattern would align in a possible match.  */
1775   while (n != 0)
1776     {
1777       int tail_end;
1778       unsigned char *tail_end_ptr;
1779
1780       /* It's been reported that some (broken) compiler thinks that
1781          Boolean expressions in an arithmetic context are unsigned.
1782          Using an explicit ?1:0 prevents this.  */
1783       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1784           < 0)
1785         return (n * (0 - direction));
1786       /* First we do the part we can by pointers (maybe nothing) */
1787       QUIT;
1788       pat = base_pat;
1789       limit = pos_byte - dirlen + direction;
1790       if (direction > 0)
1791         {
1792           limit = BUFFER_CEILING_OF (limit);
1793           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1794              can take on without hitting edge of buffer or the gap.  */
1795           limit = min (limit, pos_byte + 20000);
1796           limit = min (limit, lim_byte - 1);
1797         }
1798       else
1799         {
1800           limit = BUFFER_FLOOR_OF (limit);
1801           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1802              can take on without hitting edge of buffer or the gap.  */
1803           limit = max (limit, pos_byte - 20000);
1804           limit = max (limit, lim_byte);
1805         }
1806       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1807       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1808
1809       if ((limit - pos_byte) * direction > 20)
1810         {
1811           unsigned char *p2;
1812
1813           p_limit = BYTE_POS_ADDR (limit);
1814           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1815           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1816           while (1)             /* use one cursor setting as long as i can */
1817             {
1818               if (direction > 0) /* worth duplicating */
1819                 {
1820                   /* Use signed comparison if appropriate
1821                      to make cursor+infinity sure to be > p_limit.
1822                      Assuming that the buffer lies in a range of addresses
1823                      that are all "positive" (as ints) or all "negative",
1824                      either kind of comparison will work as long
1825                      as we don't step by infinity.  So pick the kind
1826                      that works when we do step by infinity.  */
1827                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1828                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1829                       cursor += BM_tab[*cursor];
1830                   else
1831                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1832                       cursor += BM_tab[*cursor];
1833                 }
1834               else
1835                 {
1836                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1837                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1838                       cursor += BM_tab[*cursor];
1839                   else
1840                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1841                       cursor += BM_tab[*cursor];
1842                 }
1843 /* If you are here, cursor is beyond the end of the searched region. */
1844 /* This can happen if you match on the far character of the pattern, */
1845 /* because the "stride" of that character is infinity, a number able */
1846 /* to throw you well beyond the end of the search.  It can also */
1847 /* happen if you fail to match within the permitted region and would */
1848 /* otherwise try a character beyond that region */
1849               if ((cursor - p_limit) * direction <= len_byte)
1850                 break;  /* a small overrun is genuine */
1851               cursor -= infinity; /* large overrun = hit */
1852               i = dirlen - direction;
1853               if (! NILP (trt))
1854                 {
1855                   while ((i -= direction) + direction != 0)
1856                     {
1857                       int ch;
1858                       cursor -= direction;
1859                       /* Translate only the last byte of a character.  */
1860                       if (! multibyte
1861                           || ((cursor == tail_end_ptr
1862                                || CHAR_HEAD_P (cursor[1]))
1863                               && (CHAR_HEAD_P (cursor[0])
1864                                   /* Check if this is the last byte of
1865                                      a translable character.  */
1866                                   || (translate_prev_byte1 == cursor[-1]
1867                                       && (CHAR_HEAD_P (translate_prev_byte1)
1868                                           || (translate_prev_byte2 == cursor[-2]
1869                                               && (CHAR_HEAD_P (translate_prev_byte2)
1870                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1871                         ch = simple_translate[*cursor];
1872                       else
1873                         ch = *cursor;
1874                       if (pat[i] != ch)
1875                         break;
1876                     }
1877                 }
1878               else
1879                 {
1880                   while ((i -= direction) + direction != 0)
1881                     {
1882                       cursor -= direction;
1883                       if (pat[i] != *cursor)
1884                         break;
1885                     }
1886                 }
1887               cursor += dirlen - i - direction; /* fix cursor */
1888               if (i + direction == 0)
1889                 {
1890                   int position;
1891
1892                   cursor -= direction;
1893
1894                   position = pos_byte + cursor - p2 + ((direction > 0)
1895                                                        ? 1 - len_byte : 0);
1896                   set_search_regs (position, len_byte);
1897
1898                   if ((n -= direction) != 0)
1899                     cursor += dirlen; /* to resume search */
1900                   else
1901                     return ((direction > 0)
1902                             ? search_regs.end[0] : search_regs.start[0]);
1903                 }
1904               else
1905                 cursor += stride_for_teases; /* <sigh> we lose -  */
1906             }
1907           pos_byte += cursor - p2;
1908         }
1909       else
1910         /* Now we'll pick up a clump that has to be done the hard */
1911         /* way because it covers a discontinuity */
1912         {
1913           limit = ((direction > 0)
1914                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1915                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1916           limit = ((direction > 0)
1917                    ? min (limit + len_byte, lim_byte - 1)
1918                    : max (limit - len_byte, lim_byte));
1919           /* LIMIT is now the last value POS_BYTE can have
1920              and still be valid for a possible match.  */
1921           while (1)
1922             {
1923               /* This loop can be coded for space rather than */
1924               /* speed because it will usually run only once. */
1925               /* (the reach is at most len + 21, and typically */
1926               /* does not exceed len) */
1927               while ((limit - pos_byte) * direction >= 0)
1928                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1929               /* now run the same tests to distinguish going off the */
1930               /* end, a match or a phony match. */
1931               if ((pos_byte - limit) * direction <= len_byte)
1932                 break;  /* ran off the end */
1933               /* Found what might be a match.
1934                  Set POS_BYTE back to last (first if reverse) pos.  */
1935               pos_byte -= infinity;
1936               i = dirlen - direction;
1937               while ((i -= direction) + direction != 0)
1938                 {
1939                   int ch;
1940                   unsigned char *ptr;
1941                   pos_byte -= direction;
1942                   ptr = BYTE_POS_ADDR (pos_byte);
1943                   /* Translate only the last byte of a character.  */
1944                   if (! multibyte
1945                       || ((ptr == tail_end_ptr
1946                            || CHAR_HEAD_P (ptr[1]))
1947                           && (CHAR_HEAD_P (ptr[0])
1948                               /* Check if this is the last byte of a
1949                                  translable character.  */
1950                               || (translate_prev_byte1 == ptr[-1]
1951                                   && (CHAR_HEAD_P (translate_prev_byte1)
1952                                       || (translate_prev_byte2 == ptr[-2]
1953                                           && (CHAR_HEAD_P (translate_prev_byte2)
1954                                               || translate_prev_byte3 == ptr[-3])))))))
1955                     ch = simple_translate[*ptr];
1956                   else
1957                     ch = *ptr;
1958                   if (pat[i] != ch)
1959                     break;
1960                 }
1961               /* Above loop has moved POS_BYTE part or all the way
1962                  back to the first pos (last pos if reverse).
1963                  Set it once again at the last (first if reverse) char.  */
1964               pos_byte += dirlen - i- direction;
1965               if (i + direction == 0)
1966                 {
1967                   int position;
1968                   pos_byte -= direction;
1969
1970                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1971
1972                   set_search_regs (position, len_byte);
1973
1974                   if ((n -= direction) != 0)
1975                     pos_byte += dirlen; /* to resume search */
1976                   else
1977                     return ((direction > 0)
1978                             ? search_regs.end[0] : search_regs.start[0]);
1979                 }
1980               else
1981                 pos_byte += stride_for_teases;
1982             }
1983           }
1984       /* We have done one clump.  Can we continue? */
1985       if ((lim_byte - pos_byte) * direction < 0)
1986         return ((0 - n) * direction);
1987     }
1988   return BYTE_TO_CHAR (pos_byte);
1989 }
1990
1991 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1992    for the overall match just found in the current buffer.
1993    Also clear out the match data for registers 1 and up.  */
1994
1995 static void
1996 set_search_regs (beg_byte, nbytes)
1997      int beg_byte, nbytes;
1998 {
1999   int i;
2000
2001   /* Make sure we have registers in which to store
2002      the match position.  */
2003   if (search_regs.num_regs == 0)
2004     {
2005       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2006       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2007       search_regs.num_regs = 2;
2008     }
2009
2010   /* Clear out the other registers.  */
2011   for (i = 1; i < search_regs.num_regs; i++)
2012     {
2013       search_regs.start[i] = -1;
2014       search_regs.end[i] = -1;
2015     }
2016
2017   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2018   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2019   XSETBUFFER (last_thing_searched, current_buffer);
2020 }
2021 \f
2022 /* Given a string of words separated by word delimiters,
2023   compute a regexp that matches those exact words
2024   separated by arbitrary punctuation.  */
2025
2026 static Lisp_Object
2027 wordify (string)
2028      Lisp_Object string;
2029 {
2030   register unsigned char *p, *o;
2031   register int i, i_byte, len, punct_count = 0, word_count = 0;
2032   Lisp_Object val;
2033   int prev_c = 0;
2034   int adjust;
2035
2036   CHECK_STRING (string);
2037   p = SDATA (string);
2038   len = SCHARS (string);
2039
2040   for (i = 0, i_byte = 0; i < len; )
2041     {
2042       int c;
2043
2044       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2045
2046       if (SYNTAX (c) != Sword)
2047         {
2048           punct_count++;
2049           if (i > 0 && SYNTAX (prev_c) == Sword)
2050             word_count++;
2051         }
2052
2053       prev_c = c;
2054     }
2055
2056   if (SYNTAX (prev_c) == Sword)
2057     word_count++;
2058   if (!word_count)
2059     return empty_string;
2060
2061   adjust = - punct_count + 5 * (word_count - 1) + 4;
2062   if (STRING_MULTIBYTE (string))
2063     val = make_uninit_multibyte_string (len + adjust,
2064                                         SBYTES (string)
2065                                         + adjust);
2066   else
2067     val = make_uninit_string (len + adjust);
2068
2069   o = SDATA (val);
2070   *o++ = '\\';
2071   *o++ = 'b';
2072   prev_c = 0;
2073
2074   for (i = 0, i_byte = 0; i < len; )
2075     {
2076       int c;
2077       int i_byte_orig = i_byte;
2078
2079       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2080
2081       if (SYNTAX (c) == Sword)
2082         {
2083           bcopy (SDATA (string) + i_byte_orig, o,
2084                  i_byte - i_byte_orig);
2085           o += i_byte - i_byte_orig;
2086         }
2087       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2088         {
2089           *o++ = '\\';
2090           *o++ = 'W';
2091           *o++ = '\\';
2092           *o++ = 'W';
2093           *o++ = '*';
2094         }
2095
2096       prev_c = c;
2097     }
2098
2099   *o++ = '\\';
2100   *o++ = 'b';
2101
2102   return val;
2103 }
2104 \f
2105 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2106        "MSearch backward: ",
2107        doc: /* Search backward from point for STRING.
2108 Set point to the beginning of the occurrence found, and return point.
2109 An optional second argument bounds the search; it is a buffer position.
2110 The match found must not extend before that position.
2111 Optional third argument, if t, means if fail just return nil (no error).
2112  If not nil and not t, position at limit of search and return nil.
2113 Optional fourth argument is repeat count--search for successive occurrences.
2114
2115 Search case-sensitivity is determined by the value of the variable
2116 `case-fold-search', which see.
2117
2118 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2119      (string, bound, noerror, count)
2120      Lisp_Object string, bound, noerror, count;
2121 {
2122   return search_command (string, bound, noerror, count, -1, 0, 0);
2123 }
2124
2125 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2126        doc: /* Search forward from point for STRING.
2127 Set point to the end of the occurrence found, and return point.
2128 An optional second argument bounds the search; it is a buffer position.
2129 The match found must not extend after that position.  nil is equivalent
2130   to (point-max).
2131 Optional third argument, if t, means if fail just return nil (no error).
2132   If not nil and not t, move to limit of search and return nil.
2133 Optional fourth argument is repeat count--search for successive occurrences.
2134
2135 Search case-sensitivity is determined by the value of the variable
2136 `case-fold-search', which see.
2137
2138 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2139      (string, bound, noerror, count)
2140      Lisp_Object string, bound, noerror, count;
2141 {
2142   return search_command (string, bound, noerror, count, 1, 0, 0);
2143 }
2144
2145 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2146        "sWord search backward: ",
2147        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2148 Set point to the beginning of the occurrence found, and return point.
2149 An optional second argument bounds the search; it is a buffer position.
2150 The match found must not extend before that position.
2151 Optional third argument, if t, means if fail just return nil (no error).
2152   If not nil and not t, move to limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.  */)
2154      (string, bound, noerror, count)
2155      Lisp_Object string, bound, noerror, count;
2156 {
2157   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2158 }
2159
2160 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2161        "sWord search: ",
2162        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2163 Set point to the end of the occurrence found, and return point.
2164 An optional second argument bounds the search; it is a buffer position.
2165 The match found must not extend after that position.
2166 Optional third argument, if t, means if fail just return nil (no error).
2167   If not nil and not t, move to limit of search and return nil.
2168 Optional fourth argument is repeat count--search for successive occurrences.  */)
2169      (string, bound, noerror, count)
2170      Lisp_Object string, bound, noerror, count;
2171 {
2172   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2173 }
2174
2175 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2176        "sRE search backward: ",
2177        doc: /* Search backward from point for match for regular expression REGEXP.
2178 Set point to the beginning of the match, and return point.
2179 The match found is the one starting last in the buffer
2180 and yet ending before the origin of the search.
2181 An optional second argument bounds the search; it is a buffer position.
2182 The match found must start at or after that position.
2183 Optional third argument, if t, means if fail just return nil (no error).
2184   If not nil and not t, move to limit of search and return nil.
2185 Optional fourth argument is repeat count--search for successive occurrences.
2186 See also the functions `match-beginning', `match-end', `match-string',
2187 and `replace-match'.  */)
2188      (regexp, bound, noerror, count)
2189      Lisp_Object regexp, bound, noerror, count;
2190 {
2191   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2192 }
2193
2194 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2195        "sRE search: ",
2196        doc: /* Search forward from point for regular expression REGEXP.
2197 Set point to the end of the occurrence found, and return point.
2198 An optional second argument bounds the search; it is a buffer position.
2199 The match found must not extend after that position.
2200 Optional third argument, if t, means if fail just return nil (no error).
2201   If not nil and not t, move to limit of search and return nil.
2202 Optional fourth argument is repeat count--search for successive occurrences.
2203 See also the functions `match-beginning', `match-end', `match-string',
2204 and `replace-match'.  */)
2205      (regexp, bound, noerror, count)
2206      Lisp_Object regexp, bound, noerror, count;
2207 {
2208   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2209 }
2210
2211 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2212        "sPosix search backward: ",
2213        doc: /* Search backward from point for match for regular expression REGEXP.
2214 Find the longest match in accord with Posix regular expression rules.
2215 Set point to the beginning of the match, and return point.
2216 The match found is the one starting last in the buffer
2217 and yet ending before the origin of the search.
2218 An optional second argument bounds the search; it is a buffer position.
2219 The match found must start at or after that position.
2220 Optional third argument, if t, means if fail just return nil (no error).
2221   If not nil and not t, move to limit of search and return nil.
2222 Optional fourth argument is repeat count--search for successive occurrences.
2223 See also the functions `match-beginning', `match-end', `match-string',
2224 and `replace-match'.  */)
2225      (regexp, bound, noerror, count)
2226      Lisp_Object regexp, bound, noerror, count;
2227 {
2228   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2229 }
2230
2231 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2232        "sPosix search: ",
2233        doc: /* Search forward from point for regular expression REGEXP.
2234 Find the longest match in accord with Posix regular expression rules.
2235 Set point to the end of the occurrence found, and return point.
2236 An optional second argument bounds the search; it is a buffer position.
2237 The match found must not extend after that position.
2238 Optional third argument, if t, means if fail just return nil (no error).
2239   If not nil and not t, move to limit of search and return nil.
2240 Optional fourth argument is repeat count--search for successive occurrences.
2241 See also the functions `match-beginning', `match-end', `match-string',
2242 and `replace-match'.  */)
2243      (regexp, bound, noerror, count)
2244      Lisp_Object regexp, bound, noerror, count;
2245 {
2246   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2247 }
2248 \f
2249 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2250        doc: /* Replace text matched by last search with NEWTEXT.
2251 Leave point at the end of the replacement text.
2252
2253 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2254 Otherwise maybe capitalize the whole text, or maybe just word initials,
2255 based on the replaced text.
2256 If the replaced text has only capital letters
2257 and has at least one multiletter word, convert NEWTEXT to all caps.
2258 Otherwise if all words are capitalized in the replaced text,
2259 capitalize each word in NEWTEXT.
2260
2261 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2262 Otherwise treat `\\' as special:
2263   `\\&' in NEWTEXT means substitute original matched text.
2264   `\\N' means substitute what matched the Nth `\\(...\\)'.
2265        If Nth parens didn't match, substitute nothing.
2266   `\\\\' means insert one `\\'.
2267 Case conversion does not apply to these substitutions.
2268
2269 FIXEDCASE and LITERAL are optional arguments.
2270
2271 The optional fourth argument STRING can be a string to modify.
2272 This is meaningful when the previous match was done against STRING,
2273 using `string-match'.  When used this way, `replace-match'
2274 creates and returns a new string made by copying STRING and replacing
2275 the part of STRING that was matched.
2276
2277 The optional fifth argument SUBEXP specifies a subexpression;
2278 it says to replace just that subexpression with NEWTEXT,
2279 rather than replacing the entire matched text.
2280 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2281 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2282 NEWTEXT in place of subexp N.
2283 This is useful only after a regular expression search or match,
2284 since only regular expressions have distinguished subexpressions.  */)
2285      (newtext, fixedcase, literal, string, subexp)
2286      Lisp_Object newtext, fixedcase, literal, string, subexp;
2287 {
2288   enum { nochange, all_caps, cap_initial } case_action;
2289   register int pos, pos_byte;
2290   int some_multiletter_word;
2291   int some_lowercase;
2292   int some_uppercase;
2293   int some_nonuppercase_initial;
2294   register int c, prevc;
2295   int sub;
2296   int opoint, newpoint;
2297
2298   CHECK_STRING (newtext);
2299
2300   if (! NILP (string))
2301     CHECK_STRING (string);
2302
2303   case_action = nochange;       /* We tried an initialization */
2304                                 /* but some C compilers blew it */
2305
2306   if (search_regs.num_regs <= 0)
2307     error ("`replace-match' called before any match found");
2308
2309   if (NILP (subexp))
2310     sub = 0;
2311   else
2312     {
2313       CHECK_NUMBER (subexp);
2314       sub = XINT (subexp);
2315       if (sub < 0 || sub >= search_regs.num_regs)
2316         args_out_of_range (subexp, make_number (search_regs.num_regs));
2317     }
2318
2319   if (NILP (string))
2320     {
2321       if (search_regs.start[sub] < BEGV
2322           || search_regs.start[sub] > search_regs.end[sub]
2323           || search_regs.end[sub] > ZV)
2324         args_out_of_range (make_number (search_regs.start[sub]),
2325                            make_number (search_regs.end[sub]));
2326     }
2327   else
2328     {
2329       if (search_regs.start[sub] < 0
2330           || search_regs.start[sub] > search_regs.end[sub]
2331           || search_regs.end[sub] > SCHARS (string))
2332         args_out_of_range (make_number (search_regs.start[sub]),
2333                            make_number (search_regs.end[sub]));
2334     }
2335
2336   if (NILP (fixedcase))
2337     {
2338       /* Decide how to casify by examining the matched text. */
2339       int last;
2340
2341       pos = search_regs.start[sub];
2342       last = search_regs.end[sub];
2343
2344       if (NILP (string))
2345         pos_byte = CHAR_TO_BYTE (pos);
2346       else
2347         pos_byte = string_char_to_byte (string, pos);
2348
2349       prevc = '\n';
2350       case_action = all_caps;
2351
2352       /* some_multiletter_word is set nonzero if any original word
2353          is more than one letter long. */
2354       some_multiletter_word = 0;
2355       some_lowercase = 0;
2356       some_nonuppercase_initial = 0;
2357       some_uppercase = 0;
2358
2359       while (pos < last)
2360         {
2361           if (NILP (string))
2362             {
2363               c = FETCH_CHAR (pos_byte);
2364               INC_BOTH (pos, pos_byte);
2365             }
2366           else
2367             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2368
2369           if (LOWERCASEP (c))
2370             {
2371               /* Cannot be all caps if any original char is lower case */
2372
2373               some_lowercase = 1;
2374               if (SYNTAX (prevc) != Sword)
2375                 some_nonuppercase_initial = 1;
2376               else
2377                 some_multiletter_word = 1;
2378             }
2379           else if (UPPERCASEP (c))
2380             {
2381               some_uppercase = 1;
2382               if (SYNTAX (prevc) != Sword)
2383                 ;
2384               else
2385                 some_multiletter_word = 1;
2386             }
2387           else
2388             {
2389               /* If the initial is a caseless word constituent,
2390                  treat that like a lowercase initial.  */
2391               if (SYNTAX (prevc) != Sword)
2392                 some_nonuppercase_initial = 1;
2393             }
2394
2395           prevc = c;
2396         }
2397
2398       /* Convert to all caps if the old text is all caps
2399          and has at least one multiletter word.  */
2400       if (! some_lowercase && some_multiletter_word)
2401         case_action = all_caps;
2402       /* Capitalize each word, if the old text has all capitalized words.  */
2403       else if (!some_nonuppercase_initial && some_multiletter_word)
2404         case_action = cap_initial;
2405       else if (!some_nonuppercase_initial && some_uppercase)
2406         /* Should x -> yz, operating on X, give Yz or YZ?
2407            We'll assume the latter.  */
2408         case_action = all_caps;
2409       else
2410         case_action = nochange;
2411     }
2412
2413   /* Do replacement in a string.  */
2414   if (!NILP (string))
2415     {
2416       Lisp_Object before, after;
2417
2418       before = Fsubstring (string, make_number (0),
2419                            make_number (search_regs.start[sub]));
2420       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2421
2422       /* Substitute parts of the match into NEWTEXT
2423          if desired.  */
2424       if (NILP (literal))
2425         {
2426           int lastpos = 0;
2427           int lastpos_byte = 0;
2428           /* We build up the substituted string in ACCUM.  */
2429           Lisp_Object accum;
2430           Lisp_Object middle;
2431           int length = SBYTES (newtext);
2432
2433           accum = Qnil;
2434
2435           for (pos_byte = 0, pos = 0; pos_byte < length;)
2436             {
2437               int substart = -1;
2438               int subend = 0;
2439               int delbackslash = 0;
2440
2441               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2442
2443               if (c == '\\')
2444                 {
2445                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2446
2447                   if (c == '&')
2448                     {
2449                       substart = search_regs.start[sub];
2450                       subend = search_regs.end[sub];
2451                     }
2452                   else if (c >= '1' && c <= '9')
2453                     {
2454                       if (search_regs.start[c - '0'] >= 0
2455                           && c <= search_regs.num_regs + '0')
2456                         {
2457                           substart = search_regs.start[c - '0'];
2458                           subend = search_regs.end[c - '0'];
2459                         }
2460                       else
2461                         {
2462                           /* If that subexp did not match,
2463                              replace \\N with nothing.  */
2464                           substart = 0;
2465                           subend = 0;
2466                         }
2467                     }
2468                   else if (c == '\\')
2469                     delbackslash = 1;
2470                   else
2471                     error ("Invalid use of `\\' in replacement text");
2472                 }
2473               if (substart >= 0)
2474                 {
2475                   if (pos - 2 != lastpos)
2476                     middle = substring_both (newtext, lastpos,
2477                                              lastpos_byte,
2478                                              pos - 2, pos_byte - 2);
2479                   else
2480                     middle = Qnil;
2481                   accum = concat3 (accum, middle,
2482                                    Fsubstring (string,
2483                                                make_number (substart),
2484                                                make_number (subend)));
2485                   lastpos = pos;
2486                   lastpos_byte = pos_byte;
2487                 }
2488               else if (delbackslash)
2489                 {
2490                   middle = substring_both (newtext, lastpos,
2491                                            lastpos_byte,
2492                                            pos - 1, pos_byte - 1);
2493
2494                   accum = concat2 (accum, middle);
2495                   lastpos = pos;
2496                   lastpos_byte = pos_byte;
2497                 }
2498             }
2499
2500           if (pos != lastpos)
2501             middle = substring_both (newtext, lastpos,
2502                                      lastpos_byte,
2503                                      pos, pos_byte);
2504           else
2505             middle = Qnil;
2506
2507           newtext = concat2 (accum, middle);
2508         }
2509
2510       /* Do case substitution in NEWTEXT if desired.  */
2511       if (case_action == all_caps)
2512         newtext = Fupcase (newtext);
2513       else if (case_action == cap_initial)
2514         newtext = Fupcase_initials (newtext);
2515
2516       return concat3 (before, newtext, after);
2517     }
2518
2519   /* Record point, then move (quietly) to the start of the match.  */
2520   if (PT >= search_regs.end[sub])
2521     opoint = PT - ZV;
2522   else if (PT > search_regs.start[sub])
2523     opoint = search_regs.end[sub] - ZV;
2524   else
2525     opoint = PT;
2526
2527   /* If we want non-literal replacement,
2528      perform substitution on the replacement string.  */
2529   if (NILP (literal))
2530     {
2531       int length = SBYTES (newtext);
2532       unsigned char *substed;
2533       int substed_alloc_size, substed_len;
2534       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2535       int str_multibyte = STRING_MULTIBYTE (newtext);
2536       Lisp_Object rev_tbl;
2537       int really_changed = 0;
2538
2539       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2540                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2541                                           make_number (0))
2542                 : Qnil);
2543
2544       substed_alloc_size = length * 2 + 100;
2545       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2546       substed_len = 0;
2547
2548       /* Go thru NEWTEXT, producing the actual text to insert in
2549          SUBSTED while adjusting multibyteness to that of the current
2550          buffer.  */
2551
2552       for (pos_byte = 0, pos = 0; pos_byte < length;)
2553         {
2554           unsigned char str[MAX_MULTIBYTE_LENGTH];
2555           unsigned char *add_stuff = NULL;
2556           int add_len = 0;
2557           int idx = -1;
2558
2559           if (str_multibyte)
2560             {
2561               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2562               if (!buf_multibyte)
2563                 c = multibyte_char_to_unibyte (c, rev_tbl);
2564             }
2565           else
2566             {
2567               /* Note that we don't have to increment POS.  */
2568               c = SREF (newtext, pos_byte++);
2569               if (buf_multibyte)
2570                 c = unibyte_char_to_multibyte (c);
2571             }
2572
2573           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2574              or set IDX to a match index, which means put that part
2575              of the buffer text into SUBSTED.  */
2576
2577           if (c == '\\')
2578             {
2579               really_changed = 1;
2580
2581               if (str_multibyte)
2582                 {
2583                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2584                                                       pos, pos_byte);
2585                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2586                     c = multibyte_char_to_unibyte (c, rev_tbl);
2587                 }
2588               else
2589                 {
2590                   c = SREF (newtext, pos_byte++);
2591                   if (buf_multibyte)
2592                     c = unibyte_char_to_multibyte (c);
2593                 }
2594
2595               if (c == '&')
2596                 idx = sub;
2597               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2598                 {
2599                   if (search_regs.start[c - '0'] >= 1)
2600                     idx = c - '0';
2601                 }
2602               else if (c == '\\')
2603                 add_len = 1, add_stuff = "\\";
2604               else
2605                 {
2606                   xfree (substed);
2607                   error ("Invalid use of `\\' in replacement text");
2608                 }
2609             }
2610           else
2611             {
2612               add_len = CHAR_STRING (c, str);
2613               add_stuff = str;
2614             }
2615
2616           /* If we want to copy part of a previous match,
2617              set up ADD_STUFF and ADD_LEN to point to it.  */
2618           if (idx >= 0)
2619             {
2620               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2621               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2622               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2623                 move_gap (search_regs.start[idx]);
2624               add_stuff = BYTE_POS_ADDR (begbyte);
2625             }
2626
2627           /* Now the stuff we want to add to SUBSTED
2628              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2629
2630           /* Make sure SUBSTED is big enough.  */
2631           if (substed_len + add_len >= substed_alloc_size)
2632             {
2633               substed_alloc_size = substed_len + add_len + 500;
2634               substed = (unsigned char *) xrealloc (substed,
2635                                                     substed_alloc_size + 1);
2636             }
2637
2638           /* Now add to the end of SUBSTED.  */
2639           if (add_stuff)
2640             {
2641               bcopy (add_stuff, substed + substed_len, add_len);
2642               substed_len += add_len;
2643             }
2644         }
2645
2646       if (really_changed)
2647         {
2648           if (buf_multibyte)
2649             {
2650               int nchars = multibyte_chars_in_text (substed, substed_len);
2651
2652               newtext = make_multibyte_string (substed, nchars, substed_len);
2653             }
2654           else
2655             newtext = make_unibyte_string (substed, substed_len);
2656         }
2657       xfree (substed);
2658     }
2659
2660   /* Replace the old text with the new in the cleanest possible way.  */
2661   replace_range (search_regs.start[sub], search_regs.end[sub],
2662                  newtext, 1, 0, 1);
2663   newpoint = search_regs.start[sub] + SCHARS (newtext);
2664
2665   if (case_action == all_caps)
2666     Fupcase_region (make_number (search_regs.start[sub]),
2667                     make_number (newpoint));
2668   else if (case_action == cap_initial)
2669     Fupcase_initials_region (make_number (search_regs.start[sub]),
2670                              make_number (newpoint));
2671
2672   /* Adjust search data for this change.  */
2673   {
2674     int oldend = search_regs.end[sub];
2675     int oldstart = search_regs.start[sub];
2676     int change = newpoint - search_regs.end[sub];
2677     int i;
2678
2679     for (i = 0; i < search_regs.num_regs; i++)
2680       {
2681         if (search_regs.start[i] >= oldend)
2682           search_regs.start[i] += change;
2683         else if (search_regs.start[i] > oldstart)
2684           search_regs.start[i] = oldstart;
2685         if (search_regs.end[i] >= oldend)
2686           search_regs.end[i] += change;
2687         else if (search_regs.end[i] > oldstart)
2688           search_regs.end[i] = oldstart;
2689       }
2690   }
2691
2692   /* Put point back where it was in the text.  */
2693   if (opoint <= 0)
2694     TEMP_SET_PT (opoint + ZV);
2695   else
2696     TEMP_SET_PT (opoint);
2697
2698   /* Now move point "officially" to the start of the inserted replacement.  */
2699   move_if_not_intangible (newpoint);
2700
2701   return Qnil;
2702 }
2703 \f
2704 static Lisp_Object
2705 match_limit (num, beginningp)
2706      Lisp_Object num;
2707      int beginningp;
2708 {
2709   register int n;
2710
2711   CHECK_NUMBER (num);
2712   n = XINT (num);
2713   if (n < 0)
2714     args_out_of_range (num, make_number (0));
2715   if (search_regs.num_regs <= 0)
2716     error ("No match data, because no search succeeded");
2717   if (n >= search_regs.num_regs
2718       || search_regs.start[n] < 0)
2719     return Qnil;
2720   return (make_number ((beginningp) ? search_regs.start[n]
2721                                     : search_regs.end[n]));
2722 }
2723
2724 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2725        doc: /* Return position of start of text matched by last search.
2726 SUBEXP, a number, specifies which parenthesized expression in the last
2727   regexp.
2728 Value is nil if SUBEXPth pair didn't match, or there were less than
2729   SUBEXP pairs.
2730 Zero means the entire text matched by the whole regexp or whole string.  */)
2731      (subexp)
2732      Lisp_Object subexp;
2733 {
2734   return match_limit (subexp, 1);
2735 }
2736
2737 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2738        doc: /* Return position of end of text matched by last search.
2739 SUBEXP, a number, specifies which parenthesized expression in the last
2740   regexp.
2741 Value is nil if SUBEXPth pair didn't match, or there were less than
2742   SUBEXP pairs.
2743 Zero means the entire text matched by the whole regexp or whole string.  */)
2744      (subexp)
2745      Lisp_Object subexp;
2746 {
2747   return match_limit (subexp, 0);
2748 }
2749
2750 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2751        doc: /* Return a list containing all info on what the last search matched.
2752 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2753 All the elements are markers or nil (nil if the Nth pair didn't match)
2754 if the last match was on a buffer; integers or nil if a string was matched.
2755 Use `store-match-data' to reinstate the data in this list.
2756
2757 If INTEGERS (the optional first argument) is non-nil, always use
2758 integers \(rather than markers) to represent buffer positions.  In
2759 this case, and if the last match was in a buffer, the buffer will get
2760 stored as one additional element at the end of the list.
2761
2762 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2763 enough to hold all the values, and if INTEGERS is non-nil, no consing
2764 is done.
2765
2766 If optional third arg RESEAT is non-nil, any previous markers on the
2767 REUSE list will be modified to point to nowhere.
2768
2769 Return value is undefined if the last search failed.  */)
2770   (integers, reuse, reseat)
2771      Lisp_Object integers, reuse, reseat;
2772 {
2773   Lisp_Object tail, prev;
2774   Lisp_Object *data;
2775   int i, len;
2776
2777   if (!NILP (reseat))
2778     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2779       if (MARKERP (XCAR (tail)))
2780         {
2781           unchain_marker (XMARKER (XCAR (tail)));
2782           XSETCAR (tail, Qnil);
2783         }
2784
2785   if (NILP (last_thing_searched))
2786     return Qnil;
2787
2788   prev = Qnil;
2789
2790   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2791                                  * sizeof (Lisp_Object));
2792
2793   len = 0;
2794   for (i = 0; i < search_regs.num_regs; i++)
2795     {
2796       int start = search_regs.start[i];
2797       if (start >= 0)
2798         {
2799           if (EQ (last_thing_searched, Qt)
2800               || ! NILP (integers))
2801             {
2802               XSETFASTINT (data[2 * i], start);
2803               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2804             }
2805           else if (BUFFERP (last_thing_searched))
2806             {
2807               data[2 * i] = Fmake_marker ();
2808               Fset_marker (data[2 * i],
2809                            make_number (start),
2810                            last_thing_searched);
2811               data[2 * i + 1] = Fmake_marker ();
2812               Fset_marker (data[2 * i + 1],
2813                            make_number (search_regs.end[i]),
2814                            last_thing_searched);
2815             }
2816           else
2817             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2818             abort ();
2819
2820           len = 2 * i + 2;
2821         }
2822       else
2823         data[2 * i] = data[2 * i + 1] = Qnil;
2824     }
2825
2826   if (BUFFERP (last_thing_searched) && !NILP (integers))
2827     {
2828       data[len] = last_thing_searched;
2829       len++;
2830     }
2831
2832   /* If REUSE is not usable, cons up the values and return them.  */
2833   if (! CONSP (reuse))
2834     return Flist (len, data);
2835
2836   /* If REUSE is a list, store as many value elements as will fit
2837      into the elements of REUSE.  */
2838   for (i = 0, tail = reuse; CONSP (tail);
2839        i++, tail = XCDR (tail))
2840     {
2841       if (i < len)
2842         XSETCAR (tail, data[i]);
2843       else
2844         XSETCAR (tail, Qnil);
2845       prev = tail;
2846     }
2847
2848   /* If we couldn't fit all value elements into REUSE,
2849      cons up the rest of them and add them to the end of REUSE.  */
2850   if (i < len)
2851     XSETCDR (prev, Flist (len - i, data + i));
2852
2853   return reuse;
2854 }
2855
2856 /* Internal usage only:
2857    If RESEAT is `evaporate', put the markers back on the free list
2858    immediately.  No other references to the markers must exist in this case,
2859    so it is used only internally on the unwind stack and save-match-data from
2860    Lisp.  */
2861
2862 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2863        doc: /* Set internal data on last search match from elements of LIST.
2864 LIST should have been created by calling `match-data' previously.
2865
2866 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2867     (list, reseat)
2868      register Lisp_Object list, reseat;
2869 {
2870   register int i;
2871   register Lisp_Object marker;
2872
2873   if (running_asynch_code)
2874     save_search_regs ();
2875
2876   if (!CONSP (list) && !NILP (list))
2877     list = wrong_type_argument (Qconsp, list);
2878
2879   /* Unless we find a marker with a buffer or an explicit buffer
2880      in LIST, assume that this match data came from a string.  */
2881   last_thing_searched = Qt;
2882
2883   /* Allocate registers if they don't already exist.  */
2884   {
2885     int length = XFASTINT (Flength (list)) / 2;
2886
2887     if (length > search_regs.num_regs)
2888       {
2889         if (search_regs.num_regs == 0)
2890           {
2891             search_regs.start
2892               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2893             search_regs.end
2894               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2895           }
2896         else
2897           {
2898             search_regs.start
2899               = (regoff_t *) xrealloc (search_regs.start,
2900                                        length * sizeof (regoff_t));
2901             search_regs.end
2902               = (regoff_t *) xrealloc (search_regs.end,
2903                                        length * sizeof (regoff_t));
2904           }
2905
2906         for (i = search_regs.num_regs; i < length; i++)
2907           search_regs.start[i] = -1;
2908
2909         search_regs.num_regs = length;
2910       }
2911
2912     for (i = 0; CONSP (list); i++)
2913       {
2914         marker = XCAR (list);
2915         if (BUFFERP (marker))
2916           {
2917             last_thing_searched = marker;
2918             break;
2919           }
2920         if (i >= length)
2921           break;
2922         if (NILP (marker))
2923           {
2924             search_regs.start[i] = -1;
2925             list = XCDR (list);
2926           }
2927         else
2928           {
2929             int from;
2930             Lisp_Object m;
2931
2932             m = marker;
2933             if (MARKERP (marker))
2934               {
2935                 if (XMARKER (marker)->buffer == 0)
2936                   XSETFASTINT (marker, 0);
2937                 else
2938                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2939               }
2940
2941             CHECK_NUMBER_COERCE_MARKER (marker);
2942             from = XINT (marker);
2943
2944             if (!NILP (reseat) && MARKERP (m))
2945               {
2946                 if (EQ (reseat, Qevaporate))
2947                   free_marker (m);
2948                 else
2949                   unchain_marker (XMARKER (m));
2950                 XSETCAR (list, Qnil);
2951               }
2952
2953             if ((list = XCDR (list), !CONSP (list)))
2954               break;
2955
2956             m = marker = XCAR (list);
2957
2958             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2959               XSETFASTINT (marker, 0);
2960
2961             CHECK_NUMBER_COERCE_MARKER (marker);
2962             search_regs.start[i] = from;
2963             search_regs.end[i] = XINT (marker);
2964
2965             if (!NILP (reseat) && MARKERP (m))
2966               {
2967                 if (EQ (reseat, Qevaporate))
2968                   free_marker (m);
2969                 else
2970                   unchain_marker (XMARKER (m));
2971                 XSETCAR (list, Qnil);
2972               }
2973           }
2974         list = XCDR (list);
2975       }
2976
2977     for (; i < search_regs.num_regs; i++)
2978       search_regs.start[i] = -1;
2979   }
2980
2981   return Qnil;
2982 }
2983
2984 /* If non-zero the match data have been saved in saved_search_regs
2985    during the execution of a sentinel or filter. */
2986 static int search_regs_saved;
2987 static struct re_registers saved_search_regs;
2988 static Lisp_Object saved_last_thing_searched;
2989
2990 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2991    if asynchronous code (filter or sentinel) is running. */
2992 static void
2993 save_search_regs ()
2994 {
2995   if (!search_regs_saved)
2996     {
2997       saved_search_regs.num_regs = search_regs.num_regs;
2998       saved_search_regs.start = search_regs.start;
2999       saved_search_regs.end = search_regs.end;
3000       saved_last_thing_searched = last_thing_searched;
3001       last_thing_searched = Qnil;
3002       search_regs.num_regs = 0;
3003       search_regs.start = 0;
3004       search_regs.end = 0;
3005
3006       search_regs_saved = 1;
3007     }
3008 }
3009
3010 /* Called upon exit from filters and sentinels. */
3011 void
3012 restore_search_regs ()
3013 {
3014   if (search_regs_saved)
3015     {
3016       if (search_regs.num_regs > 0)
3017         {
3018           xfree (search_regs.start);
3019           xfree (search_regs.end);
3020         }
3021       search_regs.num_regs = saved_search_regs.num_regs;
3022       search_regs.start = saved_search_regs.start;
3023       search_regs.end = saved_search_regs.end;
3024       last_thing_searched = saved_last_thing_searched;
3025       saved_last_thing_searched = Qnil;
3026       search_regs_saved = 0;
3027     }
3028 }
3029
3030 static Lisp_Object
3031 unwind_set_match_data (list)
3032      Lisp_Object list;
3033 {
3034   /* It is safe to free (evaporate) the markers immediately.  */
3035   return Fset_match_data (list, Qevaporate);
3036 }
3037
3038 /* Called to unwind protect the match data.  */
3039 void
3040 record_unwind_save_match_data ()
3041 {
3042   record_unwind_protect (unwind_set_match_data,
3043                          Fmatch_data (Qnil, Qnil, Qnil));
3044 }
3045
3046 /* Quote a string to inactivate reg-expr chars */
3047
3048 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3049        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3050      (string)
3051      Lisp_Object string;
3052 {
3053   register unsigned char *in, *out, *end;
3054   register unsigned char *temp;
3055   int backslashes_added = 0;
3056
3057   CHECK_STRING (string);
3058
3059   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3060
3061   /* Now copy the data into the new string, inserting escapes. */
3062
3063   in = SDATA (string);
3064   end = in + SBYTES (string);
3065   out = temp;
3066
3067   for (; in != end; in++)
3068     {
3069       if (*in == '[' || *in == ']'
3070           || *in == '*' || *in == '.' || *in == '\\'
3071           || *in == '?' || *in == '+'
3072           || *in == '^' || *in == '$')
3073         *out++ = '\\', backslashes_added++;
3074       *out++ = *in;
3075     }
3076
3077   return make_specified_string (temp,
3078                                 SCHARS (string) + backslashes_added,
3079                                 out - temp,
3080                                 STRING_MULTIBYTE (string));
3081 }
3082 \f
3083 void
3084 syms_of_search ()
3085 {
3086   register int i;
3087
3088   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3089     {
3090       searchbufs[i].buf.allocated = 100;
3091       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3092       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3093       searchbufs[i].regexp = Qnil;
3094       searchbufs[i].whitespace_regexp = Qnil;
3095       staticpro (&searchbufs[i].regexp);
3096       staticpro (&searchbufs[i].whitespace_regexp);
3097       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3098     }
3099   searchbuf_head = &searchbufs[0];
3100
3101   Qsearch_failed = intern ("search-failed");
3102   staticpro (&Qsearch_failed);
3103   Qinvalid_regexp = intern ("invalid-regexp");
3104   staticpro (&Qinvalid_regexp);
3105
3106   Fput (Qsearch_failed, Qerror_conditions,
3107         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3108   Fput (Qsearch_failed, Qerror_message,
3109         build_string ("Search failed"));
3110
3111   Fput (Qinvalid_regexp, Qerror_conditions,
3112         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3113   Fput (Qinvalid_regexp, Qerror_message,
3114         build_string ("Invalid regexp"));
3115
3116   last_thing_searched = Qnil;
3117   staticpro (&last_thing_searched);
3118
3119   saved_last_thing_searched = Qnil;
3120   staticpro (&saved_last_thing_searched);
3121
3122   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3123       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3124 Some commands use this for user-specified regexps.
3125 Spaces that occur inside character classes or repetition operators
3126 or other such regexp constructs are not replaced with this.
3127 A value of nil (which is the normal value) means treat spaces literally.  */);
3128   Vsearch_spaces_regexp = Qnil;
3129
3130   defsubr (&Slooking_at);
3131   defsubr (&Sposix_looking_at);
3132   defsubr (&Sstring_match);
3133   defsubr (&Sposix_string_match);
3134   defsubr (&Ssearch_forward);
3135   defsubr (&Ssearch_backward);
3136   defsubr (&Sword_search_forward);
3137   defsubr (&Sword_search_backward);
3138   defsubr (&Sre_search_forward);
3139   defsubr (&Sre_search_backward);
3140   defsubr (&Sposix_search_forward);
3141   defsubr (&Sposix_search_backward);
3142   defsubr (&Sreplace_match);
3143   defsubr (&Smatch_beginning);
3144   defsubr (&Smatch_end);
3145   defsubr (&Smatch_data);
3146   defsubr (&Sset_match_data);
3147   defsubr (&Sregexp_quote);
3148 }
3149
3150 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3151    (do not change this comment) */