Merge from trunk.
[emacs.git] / src / syntax.c
blob9b707c6c3b771ec34a05d7ba00ca4c62e0caf2bb
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
22 #include <config.h>
23 #include <ctype.h>
24 #include <setjmp.h>
25 #include "lisp.h"
26 #include "commands.h"
27 #include "buffer.h"
28 #include "character.h"
29 #include "keymap.h"
30 #include "regex.h"
32 /* Make syntax table lookup grant data in gl_state. */
33 #define SYNTAX_ENTRY_VIA_PROPERTY
35 #include "syntax.h"
36 #include "intervals.h"
38 /* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
42 #define ST_COMMENT_STYLE (256 + 1)
43 #define ST_STRING_STYLE (256 + 2)
44 #include "category.h"
46 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
48 int words_include_escapes;
49 int parse_sexp_lookup_properties;
51 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52 int multibyte_syntax_as_symbol;
54 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57 Lisp_Object syntax_temp;
59 /* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
63 int open_paren_in_column_0_is_defun_start;
65 /* This is the internal form of the parse state used in parse-partial-sexp. */
67 struct lisp_parse_state
69 int depth; /* Depth at end of parsing. */
70 int instring; /* -1 if not within string, else desired terminator. */
71 int incomment; /* -1 if in unnestable comment else comment nesting */
72 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
73 int quoted; /* Nonzero if just after an escape char at end of parsing */
74 int mindepth; /* Minimum depth seen while scanning. */
75 /* Char number of most recent start-of-expression at current level */
76 EMACS_INT thislevelstart;
77 /* Char number of start of containing expression */
78 EMACS_INT prevlevelstart;
79 EMACS_INT location; /* Char number at which parsing stopped. */
80 EMACS_INT comstr_start; /* Position of last comment/string starter. */
81 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
82 of levels (starting from outermost). */
85 /* These variables are a cache for finding the start of a defun.
86 find_start_pos is the place for which the defun start was found.
87 find_start_value is the defun start position found for it.
88 find_start_value_byte is the corresponding byte position.
89 find_start_buffer is the buffer it was found in.
90 find_start_begv is the BEGV value when it was found.
91 find_start_modiff is the value of MODIFF when it was found. */
93 static EMACS_INT find_start_pos;
94 static EMACS_INT find_start_value;
95 static EMACS_INT find_start_value_byte;
96 static struct buffer *find_start_buffer;
97 static EMACS_INT find_start_begv;
98 static int find_start_modiff;
101 static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
102 static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
103 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
104 static void scan_sexps_forward (struct lisp_parse_state *,
105 EMACS_INT, EMACS_INT, EMACS_INT, int,
106 int, Lisp_Object, int);
107 static int in_classes (int, Lisp_Object);
110 struct gl_state_s gl_state; /* Global state of syntax parser. */
112 INTERVAL interval_of (int, Lisp_Object);
113 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
114 to scan to property-change. */
116 /* Update gl_state to an appropriate interval which contains CHARPOS. The
117 sign of COUNT give the relative position of CHARPOS wrt the previously
118 valid interval. If INIT, only [be]_property fields of gl_state are
119 valid at start, the rest is filled basing on OBJECT.
121 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
122 direction than the intervals - or in an interval. We update the
123 current syntax-table basing on the property of this interval, and
124 update the interval to start further than CHARPOS - or be
125 NULL_INTERVAL. We also update lim_property to be the next value of
126 charpos to call this subroutine again - or be before/after the
127 start/end of OBJECT. */
129 void
130 update_syntax_table (int charpos, int count, int init, Lisp_Object object)
132 Lisp_Object tmp_table;
133 int cnt = 0, invalidate = 1;
134 INTERVAL i;
136 if (init)
138 gl_state.old_prop = Qnil;
139 gl_state.start = gl_state.b_property;
140 gl_state.stop = gl_state.e_property;
141 i = interval_of (charpos, object);
142 gl_state.backward_i = gl_state.forward_i = i;
143 invalidate = 0;
144 if (NULL_INTERVAL_P (i))
145 return;
146 /* interval_of updates only ->position of the return value, so
147 update the parents manually to speed up update_interval. */
148 while (!NULL_PARENT (i))
150 if (AM_RIGHT_CHILD (i))
151 INTERVAL_PARENT (i)->position = i->position
152 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
153 - TOTAL_LENGTH (INTERVAL_PARENT (i))
154 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
155 else
156 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
157 + TOTAL_LENGTH (i);
158 i = INTERVAL_PARENT (i);
160 i = gl_state.forward_i;
161 gl_state.b_property = i->position - gl_state.offset;
162 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
163 goto update;
165 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
167 /* We are guaranteed to be called with CHARPOS either in i,
168 or further off. */
169 if (NULL_INTERVAL_P (i))
170 error ("Error in syntax_table logic for to-the-end intervals");
171 else if (charpos < i->position) /* Move left. */
173 if (count > 0)
174 error ("Error in syntax_table logic for intervals <-");
175 /* Update the interval. */
176 i = update_interval (i, charpos);
177 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
179 invalidate = 0;
180 gl_state.forward_i = i;
181 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
184 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
186 if (count < 0)
187 error ("Error in syntax_table logic for intervals ->");
188 /* Update the interval. */
189 i = update_interval (i, charpos);
190 if (i->position != gl_state.e_property)
192 invalidate = 0;
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - gl_state.offset;
198 update:
199 tmp_table = textget (i->plist, Qsyntax_table);
201 if (invalidate)
202 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
204 if (invalidate) /* Did not get to adjacent interval. */
205 { /* with the same table => */
206 /* invalidate the old range. */
207 if (count > 0)
209 gl_state.backward_i = i;
210 gl_state.b_property = i->position - gl_state.offset;
212 else
214 gl_state.forward_i = i;
215 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
219 if (!EQ (tmp_table, gl_state.old_prop))
221 gl_state.current_syntax_table = tmp_table;
222 gl_state.old_prop = tmp_table;
223 if (EQ (Fsyntax_table_p (tmp_table), Qt))
225 gl_state.use_global = 0;
227 else if (CONSP (tmp_table))
229 gl_state.use_global = 1;
230 gl_state.global_code = tmp_table;
232 else
234 gl_state.use_global = 0;
235 gl_state.current_syntax_table = current_buffer->syntax_table;
239 while (!NULL_INTERVAL_P (i))
241 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
243 if (count > 0)
245 gl_state.e_property = i->position - gl_state.offset;
246 gl_state.forward_i = i;
248 else
250 gl_state.b_property
251 = i->position + LENGTH (i) - gl_state.offset;
252 gl_state.backward_i = i;
254 return;
256 else if (cnt == INTERVALS_AT_ONCE)
258 if (count > 0)
260 gl_state.e_property
261 = i->position + LENGTH (i) - gl_state.offset
262 /* e_property at EOB is not set to ZV but to ZV+1, so that
263 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
264 having to check eob between the two. */
265 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
266 gl_state.forward_i = i;
268 else
270 gl_state.b_property = i->position - gl_state.offset;
271 gl_state.backward_i = i;
273 return;
275 cnt++;
276 i = count > 0 ? next_interval (i) : previous_interval (i);
278 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
279 if (count > 0)
280 gl_state.e_property = gl_state.stop;
281 else
282 gl_state.b_property = gl_state.start;
285 /* Returns TRUE if char at CHARPOS is quoted.
286 Global syntax-table data should be set up already to be good at CHARPOS
287 or after. On return global syntax data is good for lookup at CHARPOS. */
289 static int
290 char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
292 register enum syntaxcode code;
293 register EMACS_INT beg = BEGV;
294 register int quoted = 0;
295 EMACS_INT orig = charpos;
297 while (charpos > beg)
299 int c;
300 DEC_BOTH (charpos, bytepos);
302 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
303 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
304 code = SYNTAX (c);
305 if (! (code == Scharquote || code == Sescape))
306 break;
308 quoted = !quoted;
311 UPDATE_SYNTAX_TABLE (orig);
312 return quoted;
315 /* Return the bytepos one character after BYTEPOS.
316 We assume that BYTEPOS is not at the end of the buffer. */
318 INLINE EMACS_INT
319 inc_bytepos (EMACS_INT bytepos)
321 if (NILP (current_buffer->enable_multibyte_characters))
322 return bytepos + 1;
324 INC_POS (bytepos);
325 return bytepos;
328 /* Return the bytepos one character before BYTEPOS.
329 We assume that BYTEPOS is not at the start of the buffer. */
331 INLINE EMACS_INT
332 dec_bytepos (EMACS_INT bytepos)
334 if (NILP (current_buffer->enable_multibyte_characters))
335 return bytepos - 1;
337 DEC_POS (bytepos);
338 return bytepos;
341 /* Return a defun-start position before POS and not too far before.
342 It should be the last one before POS, or nearly the last.
344 When open_paren_in_column_0_is_defun_start is nonzero,
345 only the beginning of the buffer is treated as a defun-start.
347 We record the information about where the scan started
348 and what its result was, so that another call in the same area
349 can return the same value very quickly.
351 There is no promise at which position the global syntax data is
352 valid on return from the subroutine, so the caller should explicitly
353 update the global data. */
355 static EMACS_INT
356 find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
358 EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
360 if (!open_paren_in_column_0_is_defun_start)
362 find_start_value_byte = BEGV_BYTE;
363 return BEGV;
366 /* Use previous finding, if it's valid and applies to this inquiry. */
367 if (current_buffer == find_start_buffer
368 /* Reuse the defun-start even if POS is a little farther on.
369 POS might be in the next defun, but that's ok.
370 Our value may not be the best possible, but will still be usable. */
371 && pos <= find_start_pos + 1000
372 && pos >= find_start_value
373 && BEGV == find_start_begv
374 && MODIFF == find_start_modiff)
375 return find_start_value;
377 /* Back up to start of line. */
378 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
380 /* We optimize syntax-table lookup for rare updates. Thus we accept
381 only those `^\s(' which are good in global _and_ text-property
382 syntax-tables. */
383 SETUP_BUFFER_SYNTAX_TABLE ();
384 while (PT > BEGV)
386 int c;
388 /* Open-paren at start of line means we may have found our
389 defun-start. */
390 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
391 if (SYNTAX (c) == Sopen)
393 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
394 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
395 if (SYNTAX (c) == Sopen)
396 break;
397 /* Now fallback to the default value. */
398 SETUP_BUFFER_SYNTAX_TABLE ();
400 /* Move to beg of previous line. */
401 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
404 /* Record what we found, for the next try. */
405 find_start_value = PT;
406 find_start_value_byte = PT_BYTE;
407 find_start_buffer = current_buffer;
408 find_start_modiff = MODIFF;
409 find_start_begv = BEGV;
410 find_start_pos = pos;
412 TEMP_SET_PT_BOTH (opoint, opoint_byte);
414 return find_start_value;
417 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
419 static int
420 prev_char_comend_first (int pos, int pos_byte)
422 int c, val;
424 DEC_BOTH (pos, pos_byte);
425 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
426 c = FETCH_CHAR (pos_byte);
427 val = SYNTAX_COMEND_FIRST (c);
428 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
429 return val;
432 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
434 /* static int
435 * prev_char_comstart_first (pos, pos_byte)
436 * int pos, pos_byte;
438 * int c, val;
440 * DEC_BOTH (pos, pos_byte);
441 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
442 * c = FETCH_CHAR (pos_byte);
443 * val = SYNTAX_COMSTART_FIRST (c);
444 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
445 * return val;
446 * } */
448 /* Checks whether charpos FROM is at the end of a comment.
449 FROM_BYTE is the bytepos corresponding to FROM.
450 Do not move back before STOP.
452 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
453 return -1 otherwise.
455 If successful, store the charpos of the comment's beginning
456 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
458 Global syntax data remains valid for backward search starting at
459 the returned value (or at FROM, if the search was not successful). */
461 static int
462 back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
464 /* Look back, counting the parity of string-quotes,
465 and recording the comment-starters seen.
466 When we reach a safe place, assume that's not in a string;
467 then step the main scan to the earliest comment-starter seen
468 an even number of string quotes away from the safe place.
470 OFROM[I] is position of the earliest comment-starter seen
471 which is I+2X quotes from the comment-end.
472 PARITY is current parity of quotes from the comment end. */
473 int string_style = -1; /* Presumed outside of any string. */
474 int string_lossage = 0;
475 /* Not a real lossage: indicates that we have passed a matching comment
476 starter plus a non-matching comment-ender, meaning that any matching
477 comment-starter we might see later could be a false positive (hidden
478 inside another comment).
479 Test case: { a (* b } c (* d *) */
480 int comment_lossage = 0;
481 EMACS_INT comment_end = from;
482 EMACS_INT comment_end_byte = from_byte;
483 EMACS_INT comstart_pos = 0;
484 EMACS_INT comstart_byte;
485 /* Place where the containing defun starts,
486 or 0 if we didn't come across it yet. */
487 EMACS_INT defun_start = 0;
488 EMACS_INT defun_start_byte = 0;
489 register enum syntaxcode code;
490 int nesting = 1; /* current comment nesting */
491 int c;
492 int syntax = 0;
494 /* FIXME: A }} comment-ender style leads to incorrect behavior
495 in the case of {{ c }}} because we ignore the last two chars which are
496 assumed to be comment-enders although they aren't. */
498 /* At beginning of range to scan, we're outside of strings;
499 that determines quote parity to the comment-end. */
500 while (from != stop)
502 int temp_byte, prev_syntax;
503 int com2start, com2end;
505 /* Move back and examine a character. */
506 DEC_BOTH (from, from_byte);
507 UPDATE_SYNTAX_TABLE_BACKWARD (from);
509 prev_syntax = syntax;
510 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
511 syntax = SYNTAX_WITH_FLAGS (c);
512 code = SYNTAX (c);
514 /* Check for 2-char comment markers. */
515 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
516 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
517 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
518 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
519 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
520 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
521 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
523 /* Nasty cases with overlapping 2-char comment markers:
524 - snmp-mode: -- c -- foo -- c --
525 --- c --
526 ------ c --
527 - c-mode: *||*
528 |* *|* *|
529 |*| |* |*|
530 /// */
532 /* If a 2-char comment sequence partly overlaps with another,
533 we don't try to be clever. */
534 if (from > stop && (com2end || com2start))
536 int next = from, next_byte = from_byte, next_c, next_syntax;
537 DEC_BOTH (next, next_byte);
538 UPDATE_SYNTAX_TABLE_BACKWARD (next);
539 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
540 next_syntax = SYNTAX_WITH_FLAGS (next_c);
541 if (((com2start || comnested)
542 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
543 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
544 || ((com2end || comnested)
545 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
546 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
547 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
548 goto lossage;
549 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
552 if (com2start && comstart_pos == 0)
553 /* We're looking at a comment starter. But it might be a comment
554 ender as well (see snmp-mode). The first time we see one, we
555 need to consider it as a comment starter,
556 and the subsequent times as a comment ender. */
557 com2end = 0;
559 /* Turn a 2-char comment sequences into the appropriate syntax. */
560 if (com2end)
561 code = Sendcomment;
562 else if (com2start)
563 code = Scomment;
564 /* Ignore comment starters of a different style. */
565 else if (code == Scomment
566 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
567 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
568 continue;
570 /* Ignore escaped characters, except comment-enders. */
571 if (code != Sendcomment && char_quoted (from, from_byte))
572 continue;
574 switch (code)
576 case Sstring_fence:
577 case Scomment_fence:
578 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
579 case Sstring:
580 /* Track parity of quotes. */
581 if (string_style == -1)
582 /* Entering a string. */
583 string_style = c;
584 else if (string_style == c)
585 /* Leaving the string. */
586 string_style = -1;
587 else
588 /* If we have two kinds of string delimiters.
589 There's no way to grok this scanning backwards. */
590 string_lossage = 1;
591 break;
593 case Scomment:
594 /* We've already checked that it is the relevant comstyle. */
595 if (string_style != -1 || comment_lossage || string_lossage)
596 /* There are odd string quotes involved, so let's be careful.
597 Test case in Pascal: " { " a { " } */
598 goto lossage;
600 if (!comnested)
602 /* Record best comment-starter so far. */
603 comstart_pos = from;
604 comstart_byte = from_byte;
606 else if (--nesting <= 0)
607 /* nested comments have to be balanced, so we don't need to
608 keep looking for earlier ones. We use here the same (slightly
609 incorrect) reasoning as below: since it is followed by uniform
610 paired string quotes, this comment-start has to be outside of
611 strings, else the comment-end itself would be inside a string. */
612 goto done;
613 break;
615 case Sendcomment:
616 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
617 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
618 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
619 /* This is the same style of comment ender as ours. */
621 if (comnested)
622 nesting++;
623 else
624 /* Anything before that can't count because it would match
625 this comment-ender rather than ours. */
626 from = stop; /* Break out of the loop. */
628 else if (comstart_pos != 0 || c != '\n')
629 /* We're mixing comment styles here, so we'd better be careful.
630 The (comstart_pos != 0 || c != '\n') check is not quite correct
631 (we should just always set comment_lossage), but removing it
632 would imply that any multiline comment in C would go through
633 lossage, which seems overkill.
634 The failure should only happen in the rare cases such as
635 { (* } *) */
636 comment_lossage = 1;
637 break;
639 case Sopen:
640 /* Assume a defun-start point is outside of strings. */
641 if (open_paren_in_column_0_is_defun_start
642 && (from == stop
643 || (temp_byte = dec_bytepos (from_byte),
644 FETCH_CHAR (temp_byte) == '\n')))
646 defun_start = from;
647 defun_start_byte = from_byte;
648 from = stop; /* Break out of the loop. */
650 break;
652 default:
653 break;
657 if (comstart_pos == 0)
659 from = comment_end;
660 from_byte = comment_end_byte;
661 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
663 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
664 or `done'), then we've found the beginning of the non-nested comment. */
665 else if (1) /* !comnested */
667 from = comstart_pos;
668 from_byte = comstart_byte;
669 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
671 else
673 struct lisp_parse_state state;
674 lossage:
675 /* We had two kinds of string delimiters mixed up
676 together. Decode this going forwards.
677 Scan fwd from a known safe place (beginning-of-defun)
678 to the one in question; this records where we
679 last passed a comment starter. */
680 /* If we did not already find the defun start, find it now. */
681 if (defun_start == 0)
683 defun_start = find_defun_start (comment_end, comment_end_byte);
684 defun_start_byte = find_start_value_byte;
688 scan_sexps_forward (&state,
689 defun_start, defun_start_byte,
690 comment_end, -10000, 0, Qnil, 0);
691 defun_start = comment_end;
692 if (state.incomment == (comnested ? 1 : -1)
693 && state.comstyle == comstyle)
694 from = state.comstr_start;
695 else
697 from = comment_end;
698 if (state.incomment)
699 /* If comment_end is inside some other comment, maybe ours
700 is nested, so we need to try again from within the
701 surrounding comment. Example: { a (* " *) */
703 /* FIXME: We should advance by one or two chars. */
704 defun_start = state.comstr_start + 2;
705 defun_start_byte = CHAR_TO_BYTE (defun_start);
708 } while (defun_start < comment_end);
710 from_byte = CHAR_TO_BYTE (from);
711 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
714 done:
715 *charpos_ptr = from;
716 *bytepos_ptr = from_byte;
718 return (from == comment_end) ? -1 : from;
721 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
722 doc: /* Return t if OBJECT is a syntax table.
723 Currently, any char-table counts as a syntax table. */)
724 (Lisp_Object object)
726 if (CHAR_TABLE_P (object)
727 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
728 return Qt;
729 return Qnil;
732 static void
733 check_syntax_table (Lisp_Object obj)
735 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
736 Qsyntax_table_p, obj);
739 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
740 doc: /* Return the current syntax table.
741 This is the one specified by the current buffer. */)
742 (void)
744 return current_buffer->syntax_table;
747 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
748 Sstandard_syntax_table, 0, 0, 0,
749 doc: /* Return the standard syntax table.
750 This is the one used for new buffers. */)
751 (void)
753 return Vstandard_syntax_table;
756 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
757 doc: /* Construct a new syntax table and return it.
758 It is a copy of the TABLE, which defaults to the standard syntax table. */)
759 (Lisp_Object table)
761 Lisp_Object copy;
763 if (!NILP (table))
764 check_syntax_table (table);
765 else
766 table = Vstandard_syntax_table;
768 copy = Fcopy_sequence (table);
770 /* Only the standard syntax table should have a default element.
771 Other syntax tables should inherit from parents instead. */
772 XCHAR_TABLE (copy)->defalt = Qnil;
774 /* Copied syntax tables should all have parents.
775 If we copied one with no parent, such as the standard syntax table,
776 use the standard syntax table as the copy's parent. */
777 if (NILP (XCHAR_TABLE (copy)->parent))
778 Fset_char_table_parent (copy, Vstandard_syntax_table);
779 return copy;
782 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
783 doc: /* Select a new syntax table for the current buffer.
784 One argument, a syntax table. */)
785 (Lisp_Object table)
787 int idx;
788 check_syntax_table (table);
789 current_buffer->syntax_table = table;
790 /* Indicate that this buffer now has a specified syntax table. */
791 idx = PER_BUFFER_VAR_IDX (syntax_table);
792 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
793 return table;
796 /* Convert a letter which signifies a syntax code
797 into the code it signifies.
798 This is used by modify-syntax-entry, and other things. */
800 unsigned char syntax_spec_code[0400] =
801 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
802 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
803 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
804 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
805 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
806 (char) Smath, 0377, 0377, (char) Squote,
807 (char) Sopen, (char) Sclose, 0377, 0377,
808 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
809 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
810 0377, 0377, 0377, 0377,
811 (char) Scomment, 0377, (char) Sendcomment, 0377,
812 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
813 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
814 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
815 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
816 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
818 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
819 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
822 /* Indexed by syntax code, give the letter that describes it. */
824 char syntax_code_spec[16] =
826 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
827 '!', '|'
830 /* Indexed by syntax code, give the object (cons of syntax code and
831 nil) to be stored in syntax table. Since these objects can be
832 shared among syntax tables, we generate them in advance. By
833 sharing objects, the function `describe-syntax' can give a more
834 compact listing. */
835 static Lisp_Object Vsyntax_code_object;
838 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
839 doc: /* Return the syntax code of CHARACTER, described by a character.
840 For example, if CHARACTER is a word constituent, the
841 character `w' (119) is returned.
842 The characters that correspond to various syntax codes
843 are listed in the documentation of `modify-syntax-entry'. */)
844 (Lisp_Object character)
846 int char_int;
847 CHECK_CHARACTER (character);
848 char_int = XINT (character);
849 SETUP_BUFFER_SYNTAX_TABLE ();
850 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
853 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
854 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
855 (Lisp_Object character)
857 int char_int, code;
858 CHECK_NUMBER (character);
859 char_int = XINT (character);
860 SETUP_BUFFER_SYNTAX_TABLE ();
861 code = SYNTAX (char_int);
862 if (code == Sopen || code == Sclose)
863 return SYNTAX_MATCH (char_int);
864 return Qnil;
867 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
868 doc: /* Convert a syntax specification STRING into syntax cell form.
869 STRING should be a string as it is allowed as argument of
870 `modify-syntax-entry'. Value is the equivalent cons cell
871 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
872 text property. */)
873 (Lisp_Object string)
875 register const unsigned char *p;
876 register enum syntaxcode code;
877 int val;
878 Lisp_Object match;
880 CHECK_STRING (string);
882 p = SDATA (string);
883 code = (enum syntaxcode) syntax_spec_code[*p++];
884 if (((int) code & 0377) == 0377)
885 error ("Invalid syntax description letter: %c", p[-1]);
887 if (code == Sinherit)
888 return Qnil;
890 if (*p)
892 int len;
893 int character = STRING_CHAR_AND_LENGTH (p, len);
894 XSETINT (match, character);
895 if (XFASTINT (match) == ' ')
896 match = Qnil;
897 p += len;
899 else
900 match = Qnil;
902 val = (int) code;
903 while (*p)
904 switch (*p++)
906 case '1':
907 val |= 1 << 16;
908 break;
910 case '2':
911 val |= 1 << 17;
912 break;
914 case '3':
915 val |= 1 << 18;
916 break;
918 case '4':
919 val |= 1 << 19;
920 break;
922 case 'p':
923 val |= 1 << 20;
924 break;
926 case 'b':
927 val |= 1 << 21;
928 break;
930 case 'n':
931 val |= 1 << 22;
932 break;
935 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
936 return XVECTOR (Vsyntax_code_object)->contents[val];
937 else
938 /* Since we can't use a shared object, let's make a new one. */
939 return Fcons (make_number (val), match);
942 /* I really don't know why this is interactive
943 help-form should at least be made useful whilst reading the second arg. */
944 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
945 "cSet syntax for character: \nsSet syntax for %s to: ",
946 doc: /* Set syntax for character CHAR according to string NEWENTRY.
947 The syntax is changed only for table SYNTAX-TABLE, which defaults to
948 the current buffer's syntax table.
949 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
950 in the range MIN to MAX are changed.
951 The first character of NEWENTRY should be one of the following:
952 Space or - whitespace syntax. w word constituent.
953 _ symbol constituent. . punctuation.
954 ( open-parenthesis. ) close-parenthesis.
955 " string quote. \\ escape.
956 $ paired delimiter. ' expression quote or prefix operator.
957 < comment starter. > comment ender.
958 / character-quote. @ inherit from `standard-syntax-table'.
959 | generic string fence. ! generic comment fence.
961 Only single-character comment start and end sequences are represented thus.
962 Two-character sequences are represented as described below.
963 The second character of NEWENTRY is the matching parenthesis,
964 used only if the first character is `(' or `)'.
965 Any additional characters are flags.
966 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
967 1 means CHAR is the start of a two-char comment start sequence.
968 2 means CHAR is the second character of such a sequence.
969 3 means CHAR is the start of a two-char comment end sequence.
970 4 means CHAR is the second character of such a sequence.
972 There can be up to two orthogonal comment sequences. This is to support
973 language modes such as C++. By default, all comment sequences are of style
974 a, but you can set the comment sequence style to b (on the second character
975 of a comment-start, or the first character of a comment-end sequence) using
976 this flag:
977 b means CHAR is part of comment sequence b.
978 n means CHAR is part of a nestable comment sequence.
980 p means CHAR is a prefix character for `backward-prefix-chars';
981 such characters are treated as whitespace when they occur
982 between expressions.
983 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
984 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
986 if (CONSP (c))
988 CHECK_CHARACTER_CAR (c);
989 CHECK_CHARACTER_CDR (c);
991 else
992 CHECK_CHARACTER (c);
994 if (NILP (syntax_table))
995 syntax_table = current_buffer->syntax_table;
996 else
997 check_syntax_table (syntax_table);
999 newentry = Fstring_to_syntax (newentry);
1000 if (CONSP (c))
1001 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1002 else
1003 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1005 /* We clear the regexp cache, since character classes can now have
1006 different values from those in the compiled regexps.*/
1007 clear_regexp_cache ();
1009 return Qnil;
1012 /* Dump syntax table to buffer in human-readable format */
1014 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1015 Sinternal_describe_syntax_value, 1, 1, 0,
1016 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1017 (Lisp_Object syntax)
1019 register enum syntaxcode code;
1020 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
1021 char str[2];
1022 Lisp_Object first, match_lisp, value = syntax;
1024 if (NILP (value))
1026 insert_string ("default");
1027 return syntax;
1030 if (CHAR_TABLE_P (value))
1032 insert_string ("deeper char-table ...");
1033 return syntax;
1036 if (!CONSP (value))
1038 insert_string ("invalid");
1039 return syntax;
1042 first = XCAR (value);
1043 match_lisp = XCDR (value);
1045 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1047 insert_string ("invalid");
1048 return syntax;
1051 code = (enum syntaxcode) (XINT (first) & 0377);
1052 start1 = (XINT (first) >> 16) & 1;
1053 start2 = (XINT (first) >> 17) & 1;
1054 end1 = (XINT (first) >> 18) & 1;
1055 end2 = (XINT (first) >> 19) & 1;
1056 prefix = (XINT (first) >> 20) & 1;
1057 comstyle = (XINT (first) >> 21) & 1;
1058 comnested = (XINT (first) >> 22) & 1;
1060 if ((int) code < 0 || (int) code >= (int) Smax)
1062 insert_string ("invalid");
1063 return syntax;
1065 desc = syntax_code_spec[(int) code];
1067 str[0] = desc, str[1] = 0;
1068 insert (str, 1);
1070 if (NILP (match_lisp))
1071 insert (" ", 1);
1072 else
1073 insert_char (XINT (match_lisp));
1075 if (start1)
1076 insert ("1", 1);
1077 if (start2)
1078 insert ("2", 1);
1080 if (end1)
1081 insert ("3", 1);
1082 if (end2)
1083 insert ("4", 1);
1085 if (prefix)
1086 insert ("p", 1);
1087 if (comstyle)
1088 insert ("b", 1);
1089 if (comnested)
1090 insert ("n", 1);
1092 insert_string ("\twhich means: ");
1094 switch (SWITCH_ENUM_CAST (code))
1096 case Swhitespace:
1097 insert_string ("whitespace"); break;
1098 case Spunct:
1099 insert_string ("punctuation"); break;
1100 case Sword:
1101 insert_string ("word"); break;
1102 case Ssymbol:
1103 insert_string ("symbol"); break;
1104 case Sopen:
1105 insert_string ("open"); break;
1106 case Sclose:
1107 insert_string ("close"); break;
1108 case Squote:
1109 insert_string ("prefix"); break;
1110 case Sstring:
1111 insert_string ("string"); break;
1112 case Smath:
1113 insert_string ("math"); break;
1114 case Sescape:
1115 insert_string ("escape"); break;
1116 case Scharquote:
1117 insert_string ("charquote"); break;
1118 case Scomment:
1119 insert_string ("comment"); break;
1120 case Sendcomment:
1121 insert_string ("endcomment"); break;
1122 case Sinherit:
1123 insert_string ("inherit"); break;
1124 case Scomment_fence:
1125 insert_string ("comment fence"); break;
1126 case Sstring_fence:
1127 insert_string ("string fence"); break;
1128 default:
1129 insert_string ("invalid");
1130 return syntax;
1133 if (!NILP (match_lisp))
1135 insert_string (", matches ");
1136 insert_char (XINT (match_lisp));
1139 if (start1)
1140 insert_string (",\n\t is the first character of a comment-start sequence");
1141 if (start2)
1142 insert_string (",\n\t is the second character of a comment-start sequence");
1144 if (end1)
1145 insert_string (",\n\t is the first character of a comment-end sequence");
1146 if (end2)
1147 insert_string (",\n\t is the second character of a comment-end sequence");
1148 if (comstyle)
1149 insert_string (" (comment style b)");
1150 if (comnested)
1151 insert_string (" (nestable)");
1153 if (prefix)
1154 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1156 return syntax;
1159 int parse_sexp_ignore_comments;
1161 /* Char-table of functions that find the next or previous word
1162 boundary. */
1163 Lisp_Object Vfind_word_boundary_function_table;
1165 /* Return the position across COUNT words from FROM.
1166 If that many words cannot be found before the end of the buffer, return 0.
1167 COUNT negative means scan backward and stop at word beginning. */
1170 scan_words (register int from, register int count)
1172 register int beg = BEGV;
1173 register int end = ZV;
1174 register int from_byte = CHAR_TO_BYTE (from);
1175 register enum syntaxcode code;
1176 int ch0, ch1;
1177 Lisp_Object func, script, pos;
1179 immediate_quit = 1;
1180 QUIT;
1182 SETUP_SYNTAX_TABLE (from, count);
1184 while (count > 0)
1186 while (1)
1188 if (from == end)
1190 immediate_quit = 0;
1191 return 0;
1193 UPDATE_SYNTAX_TABLE_FORWARD (from);
1194 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1195 code = SYNTAX (ch0);
1196 INC_BOTH (from, from_byte);
1197 if (words_include_escapes
1198 && (code == Sescape || code == Scharquote))
1199 break;
1200 if (code == Sword)
1201 break;
1203 /* Now CH0 is a character which begins a word and FROM is the
1204 position of the next character. */
1205 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1206 if (! NILP (Ffboundp (func)))
1208 pos = call2 (func, make_number (from - 1), make_number (end));
1209 if (INTEGERP (pos) && XINT (pos) > from)
1211 from = XINT (pos);
1212 from_byte = CHAR_TO_BYTE (from);
1215 else
1217 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1218 while (1)
1220 if (from == end) break;
1221 UPDATE_SYNTAX_TABLE_FORWARD (from);
1222 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1223 code = SYNTAX (ch1);
1224 if ((code != Sword
1225 && (! words_include_escapes
1226 || (code != Sescape && code != Scharquote)))
1227 || word_boundary_p (ch0, ch1))
1228 break;
1229 INC_BOTH (from, from_byte);
1230 ch0 = ch1;
1233 count--;
1235 while (count < 0)
1237 while (1)
1239 if (from == beg)
1241 immediate_quit = 0;
1242 return 0;
1244 DEC_BOTH (from, from_byte);
1245 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1246 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1247 code = SYNTAX (ch1);
1248 if (words_include_escapes
1249 && (code == Sescape || code == Scharquote))
1250 break;
1251 if (code == Sword)
1252 break;
1254 /* Now CH1 is a character which ends a word and FROM is the
1255 position of it. */
1256 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1257 if (! NILP (Ffboundp (func)))
1259 pos = call2 (func, make_number (from), make_number (beg));
1260 if (INTEGERP (pos) && XINT (pos) < from)
1262 from = XINT (pos);
1263 from_byte = CHAR_TO_BYTE (from);
1266 else
1268 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1269 while (1)
1271 if (from == beg)
1272 break;
1273 DEC_BOTH (from, from_byte);
1274 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1275 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1276 code = SYNTAX (ch0);
1277 if ((code != Sword
1278 && (! words_include_escapes
1279 || (code != Sescape && code != Scharquote)))
1280 || word_boundary_p (ch0, ch1))
1282 INC_BOTH (from, from_byte);
1283 break;
1285 ch1 = ch0;
1288 count++;
1291 immediate_quit = 0;
1293 return from;
1296 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1297 doc: /* Move point forward ARG words (backward if ARG is negative).
1298 Normally returns t.
1299 If an edge of the buffer or a field boundary is reached, point is left there
1300 and the function returns nil. Field boundaries are not noticed if
1301 `inhibit-field-text-motion' is non-nil. */)
1302 (Lisp_Object arg)
1304 Lisp_Object tmp;
1305 int orig_val, val;
1307 if (NILP (arg))
1308 XSETFASTINT (arg, 1);
1309 else
1310 CHECK_NUMBER (arg);
1312 val = orig_val = scan_words (PT, XINT (arg));
1313 if (! orig_val)
1314 val = XINT (arg) > 0 ? ZV : BEGV;
1316 /* Avoid jumping out of an input field. */
1317 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1318 Qt, Qnil, Qnil);
1319 val = XFASTINT (tmp);
1321 SET_PT (val);
1322 return val == orig_val ? Qt : Qnil;
1325 Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
1327 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1328 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1329 STRING is like the inside of a `[...]' in a regular expression
1330 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1331 (but not at the end of a range; quoting is never needed there).
1332 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1333 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1334 Char classes, e.g. `[:alpha:]', are supported.
1336 Returns the distance traveled, either zero or positive. */)
1337 (Lisp_Object string, Lisp_Object lim)
1339 return skip_chars (1, string, lim, 1);
1342 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1343 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1344 See `skip-chars-forward' for details.
1345 Returns the distance traveled, either zero or negative. */)
1346 (Lisp_Object string, Lisp_Object lim)
1348 return skip_chars (0, string, lim, 1);
1351 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1352 doc: /* Move point forward across chars in specified syntax classes.
1353 SYNTAX is a string of syntax code characters.
1354 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1355 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1356 This function returns the distance traveled, either zero or positive. */)
1357 (Lisp_Object syntax, Lisp_Object lim)
1359 return skip_syntaxes (1, syntax, lim);
1362 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1363 doc: /* Move point backward across chars in specified syntax classes.
1364 SYNTAX is a string of syntax code characters.
1365 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1366 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1367 This function returns the distance traveled, either zero or negative. */)
1368 (Lisp_Object syntax, Lisp_Object lim)
1370 return skip_syntaxes (0, syntax, lim);
1373 static Lisp_Object
1374 skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
1376 register unsigned int c;
1377 unsigned char fastmap[0400];
1378 /* Store the ranges of non-ASCII characters. */
1379 int *char_ranges;
1380 int n_char_ranges = 0;
1381 int negate = 0;
1382 register int i, i_byte;
1383 /* Set to 1 if the current buffer is multibyte and the region
1384 contains non-ASCII chars. */
1385 int multibyte;
1386 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1387 chars. */
1388 int string_multibyte;
1389 int size_byte;
1390 const unsigned char *str;
1391 int len;
1392 Lisp_Object iso_classes;
1394 CHECK_STRING (string);
1395 iso_classes = Qnil;
1397 if (NILP (lim))
1398 XSETINT (lim, forwardp ? ZV : BEGV);
1399 else
1400 CHECK_NUMBER_COERCE_MARKER (lim);
1402 /* In any case, don't allow scan outside bounds of buffer. */
1403 if (XINT (lim) > ZV)
1404 XSETFASTINT (lim, ZV);
1405 if (XINT (lim) < BEGV)
1406 XSETFASTINT (lim, BEGV);
1408 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1409 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1410 string_multibyte = SBYTES (string) > SCHARS (string);
1412 memset (fastmap, 0, sizeof fastmap);
1414 str = SDATA (string);
1415 size_byte = SBYTES (string);
1417 i_byte = 0;
1418 if (i_byte < size_byte
1419 && SREF (string, 0) == '^')
1421 negate = 1; i_byte++;
1424 /* Find the characters specified and set their elements of fastmap.
1425 Handle backslashes and ranges specially.
1427 If STRING contains non-ASCII characters, setup char_ranges for
1428 them and use fastmap only for their leading codes. */
1430 if (! string_multibyte)
1432 int string_has_eight_bit = 0;
1434 /* At first setup fastmap. */
1435 while (i_byte < size_byte)
1437 c = str[i_byte++];
1439 if (handle_iso_classes && c == '['
1440 && i_byte < size_byte
1441 && str[i_byte] == ':')
1443 const unsigned char *class_beg = str + i_byte + 1;
1444 const unsigned char *class_end = class_beg;
1445 const unsigned char *class_limit = str + size_byte - 2;
1446 /* Leave room for the null. */
1447 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1448 re_wctype_t cc;
1450 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1451 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1453 while (class_end < class_limit
1454 && *class_end >= 'a' && *class_end <= 'z')
1455 class_end++;
1457 if (class_end == class_beg
1458 || *class_end != ':' || class_end[1] != ']')
1459 goto not_a_class_name;
1461 memcpy (class_name, class_beg, class_end - class_beg);
1462 class_name[class_end - class_beg] = 0;
1464 cc = re_wctype (class_name);
1465 if (cc == 0)
1466 error ("Invalid ISO C character class");
1468 iso_classes = Fcons (make_number (cc), iso_classes);
1470 i_byte = class_end + 2 - str;
1471 continue;
1474 not_a_class_name:
1475 if (c == '\\')
1477 if (i_byte == size_byte)
1478 break;
1480 c = str[i_byte++];
1482 /* Treat `-' as range character only if another character
1483 follows. */
1484 if (i_byte + 1 < size_byte
1485 && str[i_byte] == '-')
1487 unsigned int c2;
1489 /* Skip over the dash. */
1490 i_byte++;
1492 /* Get the end of the range. */
1493 c2 = str[i_byte++];
1494 if (c2 == '\\'
1495 && i_byte < size_byte)
1496 c2 = str[i_byte++];
1498 if (c <= c2)
1500 while (c <= c2)
1501 fastmap[c++] = 1;
1502 if (! ASCII_CHAR_P (c2))
1503 string_has_eight_bit = 1;
1506 else
1508 fastmap[c] = 1;
1509 if (! ASCII_CHAR_P (c))
1510 string_has_eight_bit = 1;
1514 /* If the current range is multibyte and STRING contains
1515 eight-bit chars, arrange fastmap and setup char_ranges for
1516 the corresponding multibyte chars. */
1517 if (multibyte && string_has_eight_bit)
1519 unsigned char fastmap2[0400];
1520 int range_start_byte, range_start_char;
1522 memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1523 memset (fastmap + 0200, 0, 0200);
1524 /* We are sure that this loop stops. */
1525 for (i = 0200; ! fastmap2[i]; i++);
1526 c = BYTE8_TO_CHAR (i);
1527 fastmap[CHAR_LEADING_CODE (c)] = 1;
1528 range_start_byte = i;
1529 range_start_char = c;
1530 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1531 for (i = 129; i < 0400; i++)
1533 c = BYTE8_TO_CHAR (i);
1534 fastmap[CHAR_LEADING_CODE (c)] = 1;
1535 if (i - range_start_byte != c - range_start_char)
1537 char_ranges[n_char_ranges++] = range_start_char;
1538 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1539 + range_start_char);
1540 range_start_byte = i;
1541 range_start_char = c;
1544 char_ranges[n_char_ranges++] = range_start_char;
1545 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1546 + range_start_char);
1549 else /* STRING is multibyte */
1551 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1553 while (i_byte < size_byte)
1555 unsigned char leading_code;
1557 leading_code = str[i_byte];
1558 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1559 i_byte += len;
1561 if (handle_iso_classes && c == '['
1562 && i_byte < size_byte
1563 && STRING_CHAR (str + i_byte) == ':')
1565 const unsigned char *class_beg = str + i_byte + 1;
1566 const unsigned char *class_end = class_beg;
1567 const unsigned char *class_limit = str + size_byte - 2;
1568 /* Leave room for the null. */
1569 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1570 re_wctype_t cc;
1572 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1573 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1575 while (class_end < class_limit
1576 && *class_end >= 'a' && *class_end <= 'z')
1577 class_end++;
1579 if (class_end == class_beg
1580 || *class_end != ':' || class_end[1] != ']')
1581 goto not_a_class_name_multibyte;
1583 memcpy (class_name, class_beg, class_end - class_beg);
1584 class_name[class_end - class_beg] = 0;
1586 cc = re_wctype (class_name);
1587 if (cc == 0)
1588 error ("Invalid ISO C character class");
1590 iso_classes = Fcons (make_number (cc), iso_classes);
1592 i_byte = class_end + 2 - str;
1593 continue;
1596 not_a_class_name_multibyte:
1597 if (c == '\\')
1599 if (i_byte == size_byte)
1600 break;
1602 leading_code = str[i_byte];
1603 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1604 i_byte += len;
1606 /* Treat `-' as range character only if another character
1607 follows. */
1608 if (i_byte + 1 < size_byte
1609 && str[i_byte] == '-')
1611 unsigned int c2;
1612 unsigned char leading_code2;
1614 /* Skip over the dash. */
1615 i_byte++;
1617 /* Get the end of the range. */
1618 leading_code2 = str[i_byte];
1619 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1620 i_byte += len;
1622 if (c2 == '\\'
1623 && i_byte < size_byte)
1625 leading_code2 = str[i_byte];
1626 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
1627 i_byte += len;
1630 if (c > c2)
1631 continue;
1632 if (ASCII_CHAR_P (c))
1634 while (c <= c2 && c < 0x80)
1635 fastmap[c++] = 1;
1636 leading_code = CHAR_LEADING_CODE (c);
1638 if (! ASCII_CHAR_P (c))
1640 while (leading_code <= leading_code2)
1641 fastmap[leading_code++] = 1;
1642 if (c <= c2)
1644 char_ranges[n_char_ranges++] = c;
1645 char_ranges[n_char_ranges++] = c2;
1649 else
1651 if (ASCII_CHAR_P (c))
1652 fastmap[c] = 1;
1653 else
1655 fastmap[leading_code] = 1;
1656 char_ranges[n_char_ranges++] = c;
1657 char_ranges[n_char_ranges++] = c;
1662 /* If the current range is unibyte and STRING contains non-ASCII
1663 chars, arrange fastmap for the corresponding unibyte
1664 chars. */
1666 if (! multibyte && n_char_ranges > 0)
1668 memset (fastmap + 0200, 0, 0200);
1669 for (i = 0; i < n_char_ranges; i += 2)
1671 int c1 = char_ranges[i];
1672 int c2 = char_ranges[i + 1];
1674 for (; c1 <= c2; c1++)
1676 int b = CHAR_TO_BYTE_SAFE (c1);
1677 if (b >= 0)
1678 fastmap[b] = 1;
1684 /* If ^ was the first character, complement the fastmap. */
1685 if (negate)
1687 if (! multibyte)
1688 for (i = 0; i < sizeof fastmap; i++)
1689 fastmap[i] ^= 1;
1690 else
1692 for (i = 0; i < 0200; i++)
1693 fastmap[i] ^= 1;
1694 /* All non-ASCII chars possibly match. */
1695 for (; i < sizeof fastmap; i++)
1696 fastmap[i] = 1;
1701 int start_point = PT;
1702 int pos = PT;
1703 int pos_byte = PT_BYTE;
1704 unsigned char *p = PT_ADDR, *endp, *stop;
1706 if (forwardp)
1708 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1709 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1711 else
1713 endp = CHAR_POS_ADDR (XINT (lim));
1714 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1717 immediate_quit = 1;
1718 /* This code may look up syntax tables using macros that rely on the
1719 gl_state object. To make sure this object is not out of date,
1720 let's initialize it manually.
1721 We ignore syntax-table text-properties for now, since that's
1722 what we've done in the past. */
1723 SETUP_BUFFER_SYNTAX_TABLE ();
1724 if (forwardp)
1726 if (multibyte)
1727 while (1)
1729 int nbytes;
1731 if (p >= stop)
1733 if (p >= endp)
1734 break;
1735 p = GAP_END_ADDR;
1736 stop = endp;
1738 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1739 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1741 if (negate)
1742 break;
1743 else
1744 goto fwd_ok;
1747 if (! fastmap[*p])
1748 break;
1749 if (! ASCII_CHAR_P (c))
1751 /* As we are looking at a multibyte character, we
1752 must look up the character in the table
1753 CHAR_RANGES. If there's no data in the table,
1754 that character is not what we want to skip. */
1756 /* The following code do the right thing even if
1757 n_char_ranges is zero (i.e. no data in
1758 CHAR_RANGES). */
1759 for (i = 0; i < n_char_ranges; i += 2)
1760 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1761 break;
1762 if (!(negate ^ (i < n_char_ranges)))
1763 break;
1765 fwd_ok:
1766 p += nbytes, pos++, pos_byte += nbytes;
1768 else
1769 while (1)
1771 if (p >= stop)
1773 if (p >= endp)
1774 break;
1775 p = GAP_END_ADDR;
1776 stop = endp;
1779 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1781 if (negate)
1782 break;
1783 else
1784 goto fwd_unibyte_ok;
1787 if (!fastmap[*p])
1788 break;
1789 fwd_unibyte_ok:
1790 p++, pos++, pos_byte++;
1793 else
1795 if (multibyte)
1796 while (1)
1798 unsigned char *prev_p;
1800 if (p <= stop)
1802 if (p <= endp)
1803 break;
1804 p = GPT_ADDR;
1805 stop = endp;
1807 prev_p = p;
1808 while (--p >= stop && ! CHAR_HEAD_P (*p));
1809 c = STRING_CHAR (p);
1811 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1813 if (negate)
1814 break;
1815 else
1816 goto back_ok;
1819 if (! fastmap[*p])
1820 break;
1821 if (! ASCII_CHAR_P (c))
1823 /* See the comment in the previous similar code. */
1824 for (i = 0; i < n_char_ranges; i += 2)
1825 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1826 break;
1827 if (!(negate ^ (i < n_char_ranges)))
1828 break;
1830 back_ok:
1831 pos--, pos_byte -= prev_p - p;
1833 else
1834 while (1)
1836 if (p <= stop)
1838 if (p <= endp)
1839 break;
1840 p = GPT_ADDR;
1841 stop = endp;
1844 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1846 if (negate)
1847 break;
1848 else
1849 goto back_unibyte_ok;
1852 if (!fastmap[p[-1]])
1853 break;
1854 back_unibyte_ok:
1855 p--, pos--, pos_byte--;
1859 SET_PT_BOTH (pos, pos_byte);
1860 immediate_quit = 0;
1862 return make_number (PT - start_point);
1867 static Lisp_Object
1868 skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
1870 register unsigned int c;
1871 unsigned char fastmap[0400];
1872 int negate = 0;
1873 register int i, i_byte;
1874 int multibyte;
1875 int size_byte;
1876 unsigned char *str;
1878 CHECK_STRING (string);
1880 if (NILP (lim))
1881 XSETINT (lim, forwardp ? ZV : BEGV);
1882 else
1883 CHECK_NUMBER_COERCE_MARKER (lim);
1885 /* In any case, don't allow scan outside bounds of buffer. */
1886 if (XINT (lim) > ZV)
1887 XSETFASTINT (lim, ZV);
1888 if (XINT (lim) < BEGV)
1889 XSETFASTINT (lim, BEGV);
1891 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1892 return make_number (0);
1894 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1895 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1897 memset (fastmap, 0, sizeof fastmap);
1899 if (SBYTES (string) > SCHARS (string))
1900 /* As this is very rare case (syntax spec is ASCII only), don't
1901 consider efficiency. */
1902 string = string_make_unibyte (string);
1904 str = SDATA (string);
1905 size_byte = SBYTES (string);
1907 i_byte = 0;
1908 if (i_byte < size_byte
1909 && SREF (string, 0) == '^')
1911 negate = 1; i_byte++;
1914 /* Find the syntaxes specified and set their elements of fastmap. */
1916 while (i_byte < size_byte)
1918 c = str[i_byte++];
1919 fastmap[syntax_spec_code[c]] = 1;
1922 /* If ^ was the first character, complement the fastmap. */
1923 if (negate)
1924 for (i = 0; i < sizeof fastmap; i++)
1925 fastmap[i] ^= 1;
1928 int start_point = PT;
1929 int pos = PT;
1930 int pos_byte = PT_BYTE;
1931 unsigned char *p = PT_ADDR, *endp, *stop;
1933 if (forwardp)
1935 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1936 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1938 else
1940 endp = CHAR_POS_ADDR (XINT (lim));
1941 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1944 immediate_quit = 1;
1945 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1946 if (forwardp)
1948 if (multibyte)
1950 while (1)
1952 int nbytes;
1954 if (p >= stop)
1956 if (p >= endp)
1957 break;
1958 p = GAP_END_ADDR;
1959 stop = endp;
1961 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1962 if (! fastmap[(int) SYNTAX (c)])
1963 break;
1964 p += nbytes, pos++, pos_byte += nbytes;
1965 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1968 else
1970 while (1)
1972 if (p >= stop)
1974 if (p >= endp)
1975 break;
1976 p = GAP_END_ADDR;
1977 stop = endp;
1979 if (! fastmap[(int) SYNTAX (*p)])
1980 break;
1981 p++, pos++, pos_byte++;
1982 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1986 else
1988 if (multibyte)
1990 while (1)
1992 unsigned char *prev_p;
1994 if (p <= stop)
1996 if (p <= endp)
1997 break;
1998 p = GPT_ADDR;
1999 stop = endp;
2001 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2002 prev_p = p;
2003 while (--p >= stop && ! CHAR_HEAD_P (*p));
2004 c = STRING_CHAR (p);
2005 if (! fastmap[(int) SYNTAX (c)])
2006 break;
2007 pos--, pos_byte -= prev_p - p;
2010 else
2012 while (1)
2014 if (p <= stop)
2016 if (p <= endp)
2017 break;
2018 p = GPT_ADDR;
2019 stop = endp;
2021 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2022 if (! fastmap[(int) SYNTAX (p[-1])])
2023 break;
2024 p--, pos--, pos_byte--;
2029 SET_PT_BOTH (pos, pos_byte);
2030 immediate_quit = 0;
2032 return make_number (PT - start_point);
2036 /* Return 1 if character C belongs to one of the ISO classes
2037 in the list ISO_CLASSES. Each class is represented by an
2038 integer which is its type according to re_wctype. */
2040 static int
2041 in_classes (int c, Lisp_Object iso_classes)
2043 int fits_class = 0;
2045 while (CONSP (iso_classes))
2047 Lisp_Object elt;
2048 elt = XCAR (iso_classes);
2049 iso_classes = XCDR (iso_classes);
2051 if (re_iswctype (c, XFASTINT (elt)))
2052 fits_class = 1;
2055 return fits_class;
2058 /* Jump over a comment, assuming we are at the beginning of one.
2059 FROM is the current position.
2060 FROM_BYTE is the bytepos corresponding to FROM.
2061 Do not move past STOP (a charpos).
2062 The comment over which we have to jump is of style STYLE
2063 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2064 NESTING should be positive to indicate the nesting at the beginning
2065 for nested comments and should be zero or negative else.
2066 ST_COMMENT_STYLE cannot be nested.
2067 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2068 (or 0 If the search cannot start in the middle of a two-character).
2070 If successful, return 1 and store the charpos of the comment's end
2071 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2072 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2073 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2074 (as defined for state.incomment) in *INCOMMENT_PTR.
2076 The comment end is the last character of the comment rather than the
2077 character just after the comment.
2079 Global syntax data is assumed to initially be valid for FROM and
2080 remains valid for forward search starting at the returned position. */
2082 static int
2083 forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2084 int nesting, int style, int prev_syntax,
2085 EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2086 int *incomment_ptr)
2088 register int c, c1;
2089 register enum syntaxcode code;
2090 register int syntax;
2092 if (nesting <= 0) nesting = -1;
2094 /* Enter the loop in the middle so that we find
2095 a 2-char comment ender if we start in the middle of it. */
2096 syntax = prev_syntax;
2097 if (syntax != 0) goto forw_incomment;
2099 while (1)
2101 if (from == stop)
2103 *incomment_ptr = nesting;
2104 *charpos_ptr = from;
2105 *bytepos_ptr = from_byte;
2106 return 0;
2108 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2109 syntax = SYNTAX_WITH_FLAGS (c);
2110 code = syntax & 0xff;
2111 if (code == Sendcomment
2112 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
2113 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2114 (nesting > 0 && --nesting == 0) : nesting < 0))
2115 /* we have encountered a comment end of the same style
2116 as the comment sequence which began this comment
2117 section */
2118 break;
2119 if (code == Scomment_fence
2120 && style == ST_COMMENT_STYLE)
2121 /* we have encountered a comment end of the same style
2122 as the comment sequence which began this comment
2123 section. */
2124 break;
2125 if (nesting > 0
2126 && code == Scomment
2127 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2128 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
2129 /* we have encountered a nested comment of the same style
2130 as the comment sequence which began this comment section */
2131 nesting++;
2132 INC_BOTH (from, from_byte);
2133 UPDATE_SYNTAX_TABLE_FORWARD (from);
2135 forw_incomment:
2136 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2137 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
2138 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2139 SYNTAX_COMEND_SECOND (c1))
2140 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2141 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
2143 if (--nesting <= 0)
2144 /* we have encountered a comment end of the same style
2145 as the comment sequence which began this comment
2146 section */
2147 break;
2148 else
2150 INC_BOTH (from, from_byte);
2151 UPDATE_SYNTAX_TABLE_FORWARD (from);
2154 if (nesting > 0
2155 && from < stop
2156 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2157 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2158 SYNTAX_COMMENT_STYLE (c1) == style
2159 && SYNTAX_COMSTART_SECOND (c1))
2160 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2161 SYNTAX_COMMENT_NESTED (c1)))
2162 /* we have encountered a nested comment of the same style
2163 as the comment sequence which began this comment
2164 section */
2166 INC_BOTH (from, from_byte);
2167 UPDATE_SYNTAX_TABLE_FORWARD (from);
2168 nesting++;
2171 *charpos_ptr = from;
2172 *bytepos_ptr = from_byte;
2173 return 1;
2176 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2177 doc: /*
2178 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2179 Stop scanning if we find something other than a comment or whitespace.
2180 Set point to where scanning stops.
2181 If COUNT comments are found as expected, with nothing except whitespace
2182 between them, return t; otherwise return nil. */)
2183 (Lisp_Object count)
2185 register EMACS_INT from;
2186 EMACS_INT from_byte;
2187 register EMACS_INT stop;
2188 register int c, c1;
2189 register enum syntaxcode code;
2190 int comstyle = 0; /* style of comment encountered */
2191 int comnested = 0; /* whether the comment is nestable or not */
2192 int found;
2193 EMACS_INT count1;
2194 EMACS_INT out_charpos, out_bytepos;
2195 int dummy;
2197 CHECK_NUMBER (count);
2198 count1 = XINT (count);
2199 stop = count1 > 0 ? ZV : BEGV;
2201 immediate_quit = 1;
2202 QUIT;
2204 from = PT;
2205 from_byte = PT_BYTE;
2207 SETUP_SYNTAX_TABLE (from, count1);
2208 while (count1 > 0)
2212 int comstart_first;
2214 if (from == stop)
2216 SET_PT_BOTH (from, from_byte);
2217 immediate_quit = 0;
2218 return Qnil;
2220 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2221 code = SYNTAX (c);
2222 comstart_first = SYNTAX_COMSTART_FIRST (c);
2223 comnested = SYNTAX_COMMENT_NESTED (c);
2224 comstyle = SYNTAX_COMMENT_STYLE (c);
2225 INC_BOTH (from, from_byte);
2226 UPDATE_SYNTAX_TABLE_FORWARD (from);
2227 if (from < stop && comstart_first
2228 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2229 SYNTAX_COMSTART_SECOND (c1)))
2231 /* We have encountered a comment start sequence and we
2232 are ignoring all text inside comments. We must record
2233 the comment style this sequence begins so that later,
2234 only a comment end of the same style actually ends
2235 the comment section. */
2236 code = Scomment;
2237 comstyle = SYNTAX_COMMENT_STYLE (c1);
2238 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2239 INC_BOTH (from, from_byte);
2240 UPDATE_SYNTAX_TABLE_FORWARD (from);
2243 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2245 if (code == Scomment_fence)
2246 comstyle = ST_COMMENT_STYLE;
2247 else if (code != Scomment)
2249 immediate_quit = 0;
2250 DEC_BOTH (from, from_byte);
2251 SET_PT_BOTH (from, from_byte);
2252 return Qnil;
2254 /* We're at the start of a comment. */
2255 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2256 &out_charpos, &out_bytepos, &dummy);
2257 from = out_charpos; from_byte = out_bytepos;
2258 if (!found)
2260 immediate_quit = 0;
2261 SET_PT_BOTH (from, from_byte);
2262 return Qnil;
2264 INC_BOTH (from, from_byte);
2265 UPDATE_SYNTAX_TABLE_FORWARD (from);
2266 /* We have skipped one comment. */
2267 count1--;
2270 while (count1 < 0)
2272 while (1)
2274 int quoted;
2276 if (from <= stop)
2278 SET_PT_BOTH (BEGV, BEGV_BYTE);
2279 immediate_quit = 0;
2280 return Qnil;
2283 DEC_BOTH (from, from_byte);
2284 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2285 quoted = char_quoted (from, from_byte);
2286 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2287 code = SYNTAX (c);
2288 comstyle = 0;
2289 comnested = SYNTAX_COMMENT_NESTED (c);
2290 if (code == Sendcomment)
2291 comstyle = SYNTAX_COMMENT_STYLE (c);
2292 if (from > stop && SYNTAX_COMEND_SECOND (c)
2293 && prev_char_comend_first (from, from_byte)
2294 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2296 /* We must record the comment style encountered so that
2297 later, we can match only the proper comment begin
2298 sequence of the same style. */
2299 DEC_BOTH (from, from_byte);
2300 code = Sendcomment;
2301 /* Calling char_quoted, above, set up global syntax position
2302 at the new value of FROM. */
2303 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2304 comstyle = SYNTAX_COMMENT_STYLE (c1);
2305 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2308 if (code == Scomment_fence)
2310 /* Skip until first preceding unquoted comment_fence. */
2311 int found = 0, ini = from, ini_byte = from_byte;
2313 while (1)
2315 DEC_BOTH (from, from_byte);
2316 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2317 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2318 if (SYNTAX (c) == Scomment_fence
2319 && !char_quoted (from, from_byte))
2321 found = 1;
2322 break;
2324 else if (from == stop)
2325 break;
2327 if (found == 0)
2329 from = ini; /* Set point to ini + 1. */
2330 from_byte = ini_byte;
2331 goto leave;
2333 else
2334 /* We have skipped one comment. */
2335 break;
2337 else if (code == Sendcomment)
2339 found = back_comment (from, from_byte, stop, comnested, comstyle,
2340 &out_charpos, &out_bytepos);
2341 if (found == -1)
2343 if (c == '\n')
2344 /* This end-of-line is not an end-of-comment.
2345 Treat it like a whitespace.
2346 CC-mode (and maybe others) relies on this behavior. */
2348 else
2350 /* Failure: we should go back to the end of this
2351 not-quite-endcomment. */
2352 if (SYNTAX(c) != code)
2353 /* It was a two-char Sendcomment. */
2354 INC_BOTH (from, from_byte);
2355 goto leave;
2358 else
2360 /* We have skipped one comment. */
2361 from = out_charpos, from_byte = out_bytepos;
2362 break;
2365 else if (code != Swhitespace || quoted)
2367 leave:
2368 immediate_quit = 0;
2369 INC_BOTH (from, from_byte);
2370 SET_PT_BOTH (from, from_byte);
2371 return Qnil;
2375 count1++;
2378 SET_PT_BOTH (from, from_byte);
2379 immediate_quit = 0;
2380 return Qt;
2383 /* Return syntax code of character C if C is an ASCII character
2384 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
2386 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2387 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
2388 ? SYNTAX (c) : Ssymbol)
2390 static Lisp_Object
2391 scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
2393 Lisp_Object val;
2394 register EMACS_INT stop = count > 0 ? ZV : BEGV;
2395 register int c, c1;
2396 int stringterm;
2397 int quoted;
2398 int mathexit = 0;
2399 register enum syntaxcode code, temp_code;
2400 int min_depth = depth; /* Err out if depth gets less than this. */
2401 int comstyle = 0; /* style of comment encountered */
2402 int comnested = 0; /* whether the comment is nestable or not */
2403 EMACS_INT temp_pos;
2404 EMACS_INT last_good = from;
2405 int found;
2406 EMACS_INT from_byte;
2407 EMACS_INT out_bytepos, out_charpos;
2408 int temp, dummy;
2409 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2411 if (depth > 0) min_depth = 0;
2413 if (from > ZV) from = ZV;
2414 if (from < BEGV) from = BEGV;
2416 from_byte = CHAR_TO_BYTE (from);
2418 immediate_quit = 1;
2419 QUIT;
2421 SETUP_SYNTAX_TABLE (from, count);
2422 while (count > 0)
2424 while (from < stop)
2426 int comstart_first, prefix;
2427 UPDATE_SYNTAX_TABLE_FORWARD (from);
2428 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2429 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2430 comstart_first = SYNTAX_COMSTART_FIRST (c);
2431 comnested = SYNTAX_COMMENT_NESTED (c);
2432 comstyle = SYNTAX_COMMENT_STYLE (c);
2433 prefix = SYNTAX_PREFIX (c);
2434 if (depth == min_depth)
2435 last_good = from;
2436 INC_BOTH (from, from_byte);
2437 UPDATE_SYNTAX_TABLE_FORWARD (from);
2438 if (from < stop && comstart_first
2439 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2440 SYNTAX_COMSTART_SECOND (c))
2441 && parse_sexp_ignore_comments)
2443 /* we have encountered a comment start sequence and we
2444 are ignoring all text inside comments. We must record
2445 the comment style this sequence begins so that later,
2446 only a comment end of the same style actually ends
2447 the comment section */
2448 code = Scomment;
2449 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2450 comstyle = SYNTAX_COMMENT_STYLE (c1);
2451 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2452 INC_BOTH (from, from_byte);
2453 UPDATE_SYNTAX_TABLE_FORWARD (from);
2456 if (prefix)
2457 continue;
2459 switch (SWITCH_ENUM_CAST (code))
2461 case Sescape:
2462 case Scharquote:
2463 if (from == stop)
2464 goto lose;
2465 INC_BOTH (from, from_byte);
2466 /* treat following character as a word constituent */
2467 case Sword:
2468 case Ssymbol:
2469 if (depth || !sexpflag) break;
2470 /* This word counts as a sexp; return at end of it. */
2471 while (from < stop)
2473 UPDATE_SYNTAX_TABLE_FORWARD (from);
2475 /* Some compilers can't handle this inside the switch. */
2476 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2477 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2478 switch (temp)
2480 case Scharquote:
2481 case Sescape:
2482 INC_BOTH (from, from_byte);
2483 if (from == stop)
2484 goto lose;
2485 break;
2486 case Sword:
2487 case Ssymbol:
2488 case Squote:
2489 break;
2490 default:
2491 goto done;
2493 INC_BOTH (from, from_byte);
2495 goto done;
2497 case Scomment_fence:
2498 comstyle = ST_COMMENT_STYLE;
2499 /* FALLTHROUGH */
2500 case Scomment:
2501 if (!parse_sexp_ignore_comments) break;
2502 UPDATE_SYNTAX_TABLE_FORWARD (from);
2503 found = forw_comment (from, from_byte, stop,
2504 comnested, comstyle, 0,
2505 &out_charpos, &out_bytepos, &dummy);
2506 from = out_charpos, from_byte = out_bytepos;
2507 if (!found)
2509 if (depth == 0)
2510 goto done;
2511 goto lose;
2513 INC_BOTH (from, from_byte);
2514 UPDATE_SYNTAX_TABLE_FORWARD (from);
2515 break;
2517 case Smath:
2518 if (!sexpflag)
2519 break;
2520 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2522 INC_BOTH (from, from_byte);
2524 if (mathexit)
2526 mathexit = 0;
2527 goto close1;
2529 mathexit = 1;
2531 case Sopen:
2532 if (!++depth) goto done;
2533 break;
2535 case Sclose:
2536 close1:
2537 if (!--depth) goto done;
2538 if (depth < min_depth)
2539 xsignal3 (Qscan_error,
2540 build_string ("Containing expression ends prematurely"),
2541 make_number (last_good), make_number (from));
2542 break;
2544 case Sstring:
2545 case Sstring_fence:
2546 temp_pos = dec_bytepos (from_byte);
2547 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2548 while (1)
2550 if (from >= stop)
2551 goto lose;
2552 UPDATE_SYNTAX_TABLE_FORWARD (from);
2553 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2554 if (code == Sstring
2555 ? (c == stringterm
2556 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2557 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2558 break;
2560 /* Some compilers can't handle this inside the switch. */
2561 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2562 switch (temp)
2564 case Scharquote:
2565 case Sescape:
2566 INC_BOTH (from, from_byte);
2568 INC_BOTH (from, from_byte);
2570 INC_BOTH (from, from_byte);
2571 if (!depth && sexpflag) goto done;
2572 break;
2573 default:
2574 /* Ignore whitespace, punctuation, quote, endcomment. */
2575 break;
2579 /* Reached end of buffer. Error if within object, return nil if between */
2580 if (depth)
2581 goto lose;
2583 immediate_quit = 0;
2584 return Qnil;
2586 /* End of object reached */
2587 done:
2588 count--;
2591 while (count < 0)
2593 while (from > stop)
2595 DEC_BOTH (from, from_byte);
2596 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2597 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2598 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2599 if (depth == min_depth)
2600 last_good = from;
2601 comstyle = 0;
2602 comnested = SYNTAX_COMMENT_NESTED (c);
2603 if (code == Sendcomment)
2604 comstyle = SYNTAX_COMMENT_STYLE (c);
2605 if (from > stop && SYNTAX_COMEND_SECOND (c)
2606 && prev_char_comend_first (from, from_byte)
2607 && parse_sexp_ignore_comments)
2609 /* We must record the comment style encountered so that
2610 later, we can match only the proper comment begin
2611 sequence of the same style. */
2612 DEC_BOTH (from, from_byte);
2613 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2614 code = Sendcomment;
2615 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2616 comstyle = SYNTAX_COMMENT_STYLE (c1);
2617 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2620 /* Quoting turns anything except a comment-ender
2621 into a word character. Note that this cannot be true
2622 if we decremented FROM in the if-statement above. */
2623 if (code != Sendcomment && char_quoted (from, from_byte))
2625 DEC_BOTH (from, from_byte);
2626 code = Sword;
2628 else if (SYNTAX_PREFIX (c))
2629 continue;
2631 switch (SWITCH_ENUM_CAST (code))
2633 case Sword:
2634 case Ssymbol:
2635 case Sescape:
2636 case Scharquote:
2637 if (depth || !sexpflag) break;
2638 /* This word counts as a sexp; count object finished
2639 after passing it. */
2640 while (from > stop)
2642 temp_pos = from_byte;
2643 if (! NILP (current_buffer->enable_multibyte_characters))
2644 DEC_POS (temp_pos);
2645 else
2646 temp_pos--;
2647 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2648 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2649 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2650 /* Don't allow comment-end to be quoted. */
2651 if (temp_code == Sendcomment)
2652 goto done2;
2653 quoted = char_quoted (from - 1, temp_pos);
2654 if (quoted)
2656 DEC_BOTH (from, from_byte);
2657 temp_pos = dec_bytepos (temp_pos);
2658 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2660 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2661 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2662 if (! (quoted || temp_code == Sword
2663 || temp_code == Ssymbol
2664 || temp_code == Squote))
2665 goto done2;
2666 DEC_BOTH (from, from_byte);
2668 goto done2;
2670 case Smath:
2671 if (!sexpflag)
2672 break;
2673 temp_pos = dec_bytepos (from_byte);
2674 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2675 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2676 DEC_BOTH (from, from_byte);
2677 if (mathexit)
2679 mathexit = 0;
2680 goto open2;
2682 mathexit = 1;
2684 case Sclose:
2685 if (!++depth) goto done2;
2686 break;
2688 case Sopen:
2689 open2:
2690 if (!--depth) goto done2;
2691 if (depth < min_depth)
2692 xsignal3 (Qscan_error,
2693 build_string ("Containing expression ends prematurely"),
2694 make_number (last_good), make_number (from));
2695 break;
2697 case Sendcomment:
2698 if (!parse_sexp_ignore_comments)
2699 break;
2700 found = back_comment (from, from_byte, stop, comnested, comstyle,
2701 &out_charpos, &out_bytepos);
2702 /* FIXME: if found == -1, then it really wasn't a comment-end.
2703 For single-char Sendcomment, we can't do much about it apart
2704 from skipping the char.
2705 For 2-char endcomments, we could try again, taking both
2706 chars as separate entities, but it's a lot of trouble
2707 for very little gain, so we don't bother either. -sm */
2708 if (found != -1)
2709 from = out_charpos, from_byte = out_bytepos;
2710 break;
2712 case Scomment_fence:
2713 case Sstring_fence:
2714 while (1)
2716 if (from == stop)
2717 goto lose;
2718 DEC_BOTH (from, from_byte);
2719 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2720 if (!char_quoted (from, from_byte)
2721 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2722 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2723 break;
2725 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2726 break;
2728 case Sstring:
2729 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2730 while (1)
2732 if (from == stop)
2733 goto lose;
2734 DEC_BOTH (from, from_byte);
2735 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2736 if (!char_quoted (from, from_byte)
2737 && (stringterm
2738 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2739 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2740 break;
2742 if (!depth && sexpflag) goto done2;
2743 break;
2744 default:
2745 /* Ignore whitespace, punctuation, quote, endcomment. */
2746 break;
2750 /* Reached start of buffer. Error if within object, return nil if between */
2751 if (depth)
2752 goto lose;
2754 immediate_quit = 0;
2755 return Qnil;
2757 done2:
2758 count++;
2762 immediate_quit = 0;
2763 XSETFASTINT (val, from);
2764 return val;
2766 lose:
2767 xsignal3 (Qscan_error,
2768 build_string ("Unbalanced parentheses"),
2769 make_number (last_good), make_number (from));
2772 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2773 doc: /* Scan from character number FROM by COUNT lists.
2774 Returns the character number of the position thus found.
2776 If DEPTH is nonzero, paren depth begins counting from that value,
2777 only places where the depth in parentheses becomes zero
2778 are candidates for stopping; COUNT such places are counted.
2779 Thus, a positive value for DEPTH means go out levels.
2781 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2783 If the beginning or end of (the accessible part of) the buffer is reached
2784 and the depth is wrong, an error is signaled.
2785 If the depth is right but the count is not used up, nil is returned. */)
2786 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2788 CHECK_NUMBER (from);
2789 CHECK_NUMBER (count);
2790 CHECK_NUMBER (depth);
2792 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2795 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2796 doc: /* Scan from character number FROM by COUNT balanced expressions.
2797 If COUNT is negative, scan backwards.
2798 Returns the character number of the position thus found.
2800 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2802 If the beginning or end of (the accessible part of) the buffer is reached
2803 in the middle of a parenthetical grouping, an error is signaled.
2804 If the beginning or end is reached between groupings
2805 but before count is used up, nil is returned. */)
2806 (Lisp_Object from, Lisp_Object count)
2808 CHECK_NUMBER (from);
2809 CHECK_NUMBER (count);
2811 return scan_lists (XINT (from), XINT (count), 0, 1);
2814 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2815 0, 0, 0,
2816 doc: /* Move point backward over any number of chars with prefix syntax.
2817 This includes chars with "quote" or "prefix" syntax (' or p). */)
2818 (void)
2820 int beg = BEGV;
2821 int opoint = PT;
2822 int opoint_byte = PT_BYTE;
2823 int pos = PT;
2824 int pos_byte = PT_BYTE;
2825 int c;
2827 if (pos <= beg)
2829 SET_PT_BOTH (opoint, opoint_byte);
2831 return Qnil;
2834 SETUP_SYNTAX_TABLE (pos, -1);
2836 DEC_BOTH (pos, pos_byte);
2838 while (!char_quoted (pos, pos_byte)
2839 /* Previous statement updates syntax table. */
2840 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2841 || SYNTAX_PREFIX (c)))
2843 opoint = pos;
2844 opoint_byte = pos_byte;
2846 if (pos + 1 > beg)
2847 DEC_BOTH (pos, pos_byte);
2850 SET_PT_BOTH (opoint, opoint_byte);
2852 return Qnil;
2855 /* Parse forward from FROM / FROM_BYTE to END,
2856 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2857 and return a description of the state of the parse at END.
2858 If STOPBEFORE is nonzero, stop at the start of an atom.
2859 If COMMENTSTOP is 1, stop at the start of a comment.
2860 If COMMENTSTOP is -1, stop at the start or end of a comment,
2861 after the beginning of a string, or after the end of a string. */
2863 static void
2864 scan_sexps_forward (struct lisp_parse_state *stateptr,
2865 EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2866 int targetdepth, int stopbefore,
2867 Lisp_Object oldstate, int commentstop)
2869 struct lisp_parse_state state;
2871 register enum syntaxcode code;
2872 int c1;
2873 int comnested;
2874 struct level { int last, prev; };
2875 struct level levelstart[100];
2876 register struct level *curlevel = levelstart;
2877 struct level *endlevel = levelstart + 100;
2878 register int depth; /* Paren depth of current scanning location.
2879 level - levelstart equals this except
2880 when the depth becomes negative. */
2881 int mindepth; /* Lowest DEPTH value seen. */
2882 int start_quoted = 0; /* Nonzero means starting after a char quote */
2883 Lisp_Object tem;
2884 EMACS_INT prev_from; /* Keep one character before FROM. */
2885 EMACS_INT prev_from_byte;
2886 int prev_from_syntax;
2887 int boundary_stop = commentstop == -1;
2888 int nofence;
2889 int found;
2890 EMACS_INT out_bytepos, out_charpos;
2891 int temp;
2893 prev_from = from;
2894 prev_from_byte = from_byte;
2895 if (from != BEGV)
2896 DEC_BOTH (prev_from, prev_from_byte);
2898 /* Use this macro instead of `from++'. */
2899 #define INC_FROM \
2900 do { prev_from = from; \
2901 prev_from_byte = from_byte; \
2902 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
2903 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2904 INC_BOTH (from, from_byte); \
2905 if (from < end) \
2906 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2907 } while (0)
2909 immediate_quit = 1;
2910 QUIT;
2912 if (NILP (oldstate))
2914 depth = 0;
2915 state.instring = -1;
2916 state.incomment = 0;
2917 state.comstyle = 0; /* comment style a by default. */
2918 state.comstr_start = -1; /* no comment/string seen. */
2920 else
2922 tem = Fcar (oldstate);
2923 if (!NILP (tem))
2924 depth = XINT (tem);
2925 else
2926 depth = 0;
2928 oldstate = Fcdr (oldstate);
2929 oldstate = Fcdr (oldstate);
2930 oldstate = Fcdr (oldstate);
2931 tem = Fcar (oldstate);
2932 /* Check whether we are inside string_fence-style string: */
2933 state.instring = (!NILP (tem)
2934 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2935 : -1);
2937 oldstate = Fcdr (oldstate);
2938 tem = Fcar (oldstate);
2939 state.incomment = (!NILP (tem)
2940 ? (INTEGERP (tem) ? XINT (tem) : -1)
2941 : 0);
2943 oldstate = Fcdr (oldstate);
2944 tem = Fcar (oldstate);
2945 start_quoted = !NILP (tem);
2947 /* if the eighth element of the list is nil, we are in comment
2948 style a. If it is non-nil, we are in comment style b */
2949 oldstate = Fcdr (oldstate);
2950 oldstate = Fcdr (oldstate);
2951 tem = Fcar (oldstate);
2952 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2953 ? ST_COMMENT_STYLE : 1);
2955 oldstate = Fcdr (oldstate);
2956 tem = Fcar (oldstate);
2957 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2958 oldstate = Fcdr (oldstate);
2959 tem = Fcar (oldstate);
2960 while (!NILP (tem)) /* >= second enclosing sexps. */
2962 /* curlevel++->last ran into compiler bug on Apollo */
2963 curlevel->last = XINT (Fcar (tem));
2964 if (++curlevel == endlevel)
2965 curlevel--; /* error ("Nesting too deep for parser"); */
2966 curlevel->prev = -1;
2967 curlevel->last = -1;
2968 tem = Fcdr (tem);
2971 state.quoted = 0;
2972 mindepth = depth;
2974 curlevel->prev = -1;
2975 curlevel->last = -1;
2977 SETUP_SYNTAX_TABLE (prev_from, 1);
2978 temp = FETCH_CHAR (prev_from_byte);
2979 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
2980 UPDATE_SYNTAX_TABLE_FORWARD (from);
2982 /* Enter the loop at a place appropriate for initial state. */
2984 if (state.incomment)
2985 goto startincomment;
2986 if (state.instring >= 0)
2988 nofence = state.instring != ST_STRING_STYLE;
2989 if (start_quoted)
2990 goto startquotedinstring;
2991 goto startinstring;
2993 else if (start_quoted)
2994 goto startquoted;
2996 while (from < end)
2998 INC_FROM;
2999 code = prev_from_syntax & 0xff;
3001 if (from < end
3002 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3003 && (c1 = FETCH_CHAR (from_byte),
3004 SYNTAX_COMSTART_SECOND (c1)))
3005 /* Duplicate code to avoid a complex if-expression
3006 which causes trouble for the SGI compiler. */
3008 /* Record the comment style we have entered so that only
3009 the comment-end sequence of the same style actually
3010 terminates the comment section. */
3011 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
3012 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3013 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
3014 state.incomment = comnested ? 1 : -1;
3015 state.comstr_start = prev_from;
3016 INC_FROM;
3017 code = Scomment;
3019 else if (code == Scomment_fence)
3021 /* Record the comment style we have entered so that only
3022 the comment-end sequence of the same style actually
3023 terminates the comment section. */
3024 state.comstyle = ST_COMMENT_STYLE;
3025 state.incomment = -1;
3026 state.comstr_start = prev_from;
3027 code = Scomment;
3029 else if (code == Scomment)
3031 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
3032 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3033 1 : -1);
3034 state.comstr_start = prev_from;
3037 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3038 continue;
3039 switch (SWITCH_ENUM_CAST (code))
3041 case Sescape:
3042 case Scharquote:
3043 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3044 curlevel->last = prev_from;
3045 startquoted:
3046 if (from == end) goto endquoted;
3047 INC_FROM;
3048 goto symstarted;
3049 /* treat following character as a word constituent */
3050 case Sword:
3051 case Ssymbol:
3052 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3053 curlevel->last = prev_from;
3054 symstarted:
3055 while (from < end)
3057 /* Some compilers can't handle this inside the switch. */
3058 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3059 temp = SYNTAX (temp);
3060 switch (temp)
3062 case Scharquote:
3063 case Sescape:
3064 INC_FROM;
3065 if (from == end) goto endquoted;
3066 break;
3067 case Sword:
3068 case Ssymbol:
3069 case Squote:
3070 break;
3071 default:
3072 goto symdone;
3074 INC_FROM;
3076 symdone:
3077 curlevel->prev = curlevel->last;
3078 break;
3080 case Scomment_fence: /* Can't happen because it's handled above. */
3081 case Scomment:
3082 if (commentstop || boundary_stop) goto done;
3083 startincomment:
3084 /* The (from == BEGV) test was to enter the loop in the middle so
3085 that we find a 2-char comment ender even if we start in the
3086 middle of it. We don't want to do that if we're just at the
3087 beginning of the comment (think of (*) ... (*)). */
3088 found = forw_comment (from, from_byte, end,
3089 state.incomment, state.comstyle,
3090 (from == BEGV || from < state.comstr_start + 3)
3091 ? 0 : prev_from_syntax,
3092 &out_charpos, &out_bytepos, &state.incomment);
3093 from = out_charpos; from_byte = out_bytepos;
3094 /* Beware! prev_from and friends are invalid now.
3095 Luckily, the `done' doesn't use them and the INC_FROM
3096 sets them to a sane value without looking at them. */
3097 if (!found) goto done;
3098 INC_FROM;
3099 state.incomment = 0;
3100 state.comstyle = 0; /* reset the comment style */
3101 if (boundary_stop) goto done;
3102 break;
3104 case Sopen:
3105 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3106 depth++;
3107 /* curlevel++->last ran into compiler bug on Apollo */
3108 curlevel->last = prev_from;
3109 if (++curlevel == endlevel)
3110 curlevel--; /* error ("Nesting too deep for parser"); */
3111 curlevel->prev = -1;
3112 curlevel->last = -1;
3113 if (targetdepth == depth) goto done;
3114 break;
3116 case Sclose:
3117 depth--;
3118 if (depth < mindepth)
3119 mindepth = depth;
3120 if (curlevel != levelstart)
3121 curlevel--;
3122 curlevel->prev = curlevel->last;
3123 if (targetdepth == depth) goto done;
3124 break;
3126 case Sstring:
3127 case Sstring_fence:
3128 state.comstr_start = from - 1;
3129 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3130 curlevel->last = prev_from;
3131 state.instring = (code == Sstring
3132 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3133 : ST_STRING_STYLE);
3134 if (boundary_stop) goto done;
3135 startinstring:
3137 nofence = state.instring != ST_STRING_STYLE;
3139 while (1)
3141 int c;
3143 if (from >= end) goto done;
3144 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3145 /* Some compilers can't handle this inside the switch. */
3146 temp = SYNTAX (c);
3148 /* Check TEMP here so that if the char has
3149 a syntax-table property which says it is NOT
3150 a string character, it does not end the string. */
3151 if (nofence && c == state.instring && temp == Sstring)
3152 break;
3154 switch (temp)
3156 case Sstring_fence:
3157 if (!nofence) goto string_end;
3158 break;
3159 case Scharquote:
3160 case Sescape:
3161 INC_FROM;
3162 startquotedinstring:
3163 if (from >= end) goto endquoted;
3165 INC_FROM;
3168 string_end:
3169 state.instring = -1;
3170 curlevel->prev = curlevel->last;
3171 INC_FROM;
3172 if (boundary_stop) goto done;
3173 break;
3175 case Smath:
3176 /* FIXME: We should do something with it. */
3177 break;
3178 default:
3179 /* Ignore whitespace, punctuation, quote, endcomment. */
3180 break;
3183 goto done;
3185 stop: /* Here if stopping before start of sexp. */
3186 from = prev_from; /* We have just fetched the char that starts it; */
3187 goto done; /* but return the position before it. */
3189 endquoted:
3190 state.quoted = 1;
3191 done:
3192 state.depth = depth;
3193 state.mindepth = mindepth;
3194 state.thislevelstart = curlevel->prev;
3195 state.prevlevelstart
3196 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3197 state.location = from;
3198 state.levelstarts = Qnil;
3199 while (--curlevel >= levelstart)
3200 state.levelstarts = Fcons (make_number (curlevel->last),
3201 state.levelstarts);
3202 immediate_quit = 0;
3204 *stateptr = state;
3207 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3208 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3209 Parsing stops at TO or when certain criteria are met;
3210 point is set to where parsing stops.
3211 If fifth arg OLDSTATE is omitted or nil,
3212 parsing assumes that FROM is the beginning of a function.
3213 Value is a list of elements describing final state of parsing:
3214 0. depth in parens.
3215 1. character address of start of innermost containing list; nil if none.
3216 2. character address of start of last complete sexp terminated.
3217 3. non-nil if inside a string.
3218 (it is the character that will terminate the string,
3219 or t if the string should be terminated by a generic string delimiter.)
3220 4. nil if outside a comment, t if inside a non-nestable comment,
3221 else an integer (the current comment nesting).
3222 5. t if following a quote character.
3223 6. the minimum paren-depth encountered during this scan.
3224 7. t if in a comment of style b; symbol `syntax-table' if the comment
3225 should be terminated by a generic comment delimiter.
3226 8. character address of start of comment or string; nil if not in one.
3227 9. Intermediate data for continuation of parsing (subject to change).
3228 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3229 in parentheses becomes equal to TARGETDEPTH.
3230 Fourth arg STOPBEFORE non-nil means stop when come to
3231 any character that starts a sexp.
3232 Fifth arg OLDSTATE is a list like what this function returns.
3233 It is used to initialize the state of the parse. Elements number 1, 2, 6
3234 and 8 are ignored.
3235 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3236 If it is symbol `syntax-table', stop after the start of a comment or a
3237 string, or after end of a comment or a string. */)
3238 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3240 struct lisp_parse_state state;
3241 int target;
3243 if (!NILP (targetdepth))
3245 CHECK_NUMBER (targetdepth);
3246 target = XINT (targetdepth);
3248 else
3249 target = -100000; /* We won't reach this depth */
3251 validate_region (&from, &to);
3252 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3253 XINT (to),
3254 target, !NILP (stopbefore), oldstate,
3255 (NILP (commentstop)
3256 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3258 SET_PT (state.location);
3260 return Fcons (make_number (state.depth),
3261 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3262 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
3263 Fcons (state.instring >= 0
3264 ? (state.instring == ST_STRING_STYLE
3265 ? Qt : make_number (state.instring)) : Qnil,
3266 Fcons (state.incomment < 0 ? Qt :
3267 (state.incomment == 0 ? Qnil :
3268 make_number (state.incomment)),
3269 Fcons (state.quoted ? Qt : Qnil,
3270 Fcons (make_number (state.mindepth),
3271 Fcons ((state.comstyle
3272 ? (state.comstyle == ST_COMMENT_STYLE
3273 ? Qsyntax_table : Qt) :
3274 Qnil),
3275 Fcons (((state.incomment
3276 || (state.instring >= 0))
3277 ? make_number (state.comstr_start)
3278 : Qnil),
3279 Fcons (state.levelstarts, Qnil))))))))));
3282 void
3283 init_syntax_once (void)
3285 register int i, c;
3286 Lisp_Object temp;
3288 /* This has to be done here, before we call Fmake_char_table. */
3289 Qsyntax_table = intern_c_string ("syntax-table");
3290 staticpro (&Qsyntax_table);
3292 /* Intern_C_String this now in case it isn't already done.
3293 Setting this variable twice is harmless.
3294 But don't staticpro it here--that is done in alloc.c. */
3295 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3297 /* Create objects which can be shared among syntax tables. */
3298 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3299 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3300 XVECTOR (Vsyntax_code_object)->contents[i]
3301 = Fcons (make_number (i), Qnil);
3303 /* Now we are ready to set up this property, so we can
3304 create syntax tables. */
3305 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3307 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3309 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3311 /* Control characters should not be whitespace. */
3312 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3313 for (i = 0; i <= ' ' - 1; i++)
3314 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3315 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3317 /* Except that a few really are whitespace. */
3318 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3319 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3320 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3321 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3322 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3323 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3325 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3326 for (i = 'a'; i <= 'z'; i++)
3327 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3328 for (i = 'A'; i <= 'Z'; i++)
3329 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3330 for (i = '0'; i <= '9'; i++)
3331 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3333 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3334 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3336 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3337 Fcons (make_number (Sopen), make_number (')')));
3338 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3339 Fcons (make_number (Sclose), make_number ('(')));
3340 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3341 Fcons (make_number (Sopen), make_number (']')));
3342 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3343 Fcons (make_number (Sclose), make_number ('[')));
3344 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3345 Fcons (make_number (Sopen), make_number ('}')));
3346 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3347 Fcons (make_number (Sclose), make_number ('{')));
3348 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3349 Fcons (make_number ((int) Sstring), Qnil));
3350 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3351 Fcons (make_number ((int) Sescape), Qnil));
3353 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3354 for (i = 0; i < 10; i++)
3356 c = "_-+*/&|<>="[i];
3357 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3360 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3361 for (i = 0; i < 12; i++)
3363 c = ".,;:?!#@~^'`"[i];
3364 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3367 /* All multibyte characters have syntax `word' by default. */
3368 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3369 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3372 void
3373 syms_of_syntax (void)
3375 Qsyntax_table_p = intern_c_string ("syntax-table-p");
3376 staticpro (&Qsyntax_table_p);
3378 staticpro (&Vsyntax_code_object);
3380 staticpro (&gl_state.object);
3381 staticpro (&gl_state.global_code);
3382 staticpro (&gl_state.current_syntax_table);
3383 staticpro (&gl_state.old_prop);
3385 /* Defined in regex.c */
3386 staticpro (&re_match_object);
3388 Qscan_error = intern_c_string ("scan-error");
3389 staticpro (&Qscan_error);
3390 Fput (Qscan_error, Qerror_conditions,
3391 pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
3392 Fput (Qscan_error, Qerror_message,
3393 make_pure_c_string ("Scan error"));
3395 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3396 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3398 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3399 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3400 Otherwise, that text property is simply ignored.
3401 See the info node `(elisp)Syntax Properties' for a description of the
3402 `syntax-table' property. */);
3404 words_include_escapes = 0;
3405 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3406 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3408 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3409 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3410 multibyte_syntax_as_symbol = 0;
3412 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3413 &open_paren_in_column_0_is_defun_start,
3414 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
3415 open_paren_in_column_0_is_defun_start = 1;
3418 DEFVAR_LISP ("find-word-boundary-function-table",
3419 &Vfind_word_boundary_function_table,
3420 doc: /*
3421 Char table of functions to search for the word boundary.
3422 Each function is called with two arguments; POS and LIMIT.
3423 POS and LIMIT are character positions in the current buffer.
3425 If POS is less than LIMIT, POS is at the first character of a word,
3426 and the return value of a function is a position after the last
3427 character of that word.
3429 If POS is not less than LIMIT, POS is at the last character of a word,
3430 and the return value of a function is a position at the first
3431 character of that word.
3433 In both cases, LIMIT bounds the search. */);
3434 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3436 defsubr (&Ssyntax_table_p);
3437 defsubr (&Ssyntax_table);
3438 defsubr (&Sstandard_syntax_table);
3439 defsubr (&Scopy_syntax_table);
3440 defsubr (&Sset_syntax_table);
3441 defsubr (&Schar_syntax);
3442 defsubr (&Smatching_paren);
3443 defsubr (&Sstring_to_syntax);
3444 defsubr (&Smodify_syntax_entry);
3445 defsubr (&Sinternal_describe_syntax_value);
3447 defsubr (&Sforward_word);
3449 defsubr (&Sskip_chars_forward);
3450 defsubr (&Sskip_chars_backward);
3451 defsubr (&Sskip_syntax_forward);
3452 defsubr (&Sskip_syntax_backward);
3454 defsubr (&Sforward_comment);
3455 defsubr (&Sscan_lists);
3456 defsubr (&Sscan_sexps);
3457 defsubr (&Sbackward_prefix_chars);
3458 defsubr (&Sparse_partial_sexp);
3461 /* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3462 (do not change this comment) */