tarballs, not pretests.
[emacs.git] / src / syntax.c
blobb2799924a5bab08782ed9430223e5860b57f363a
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
32 #include "syntax.h"
33 #include "intervals.h"
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
45 int words_include_escapes;
46 int parse_sexp_lookup_properties;
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol;
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
60 int open_paren_in_column_0_is_defun_start;
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
64 struct lisp_parse_state
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
68 int incomment; /* -1 if in unnestable comment else comment nesting */
69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
73 int location; /* Char number at which parsing stopped. */
74 int mindepth; /* Minimum depth seen while scanning. */
75 int comstr_start; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
88 static int find_start_pos;
89 static int find_start_value;
90 static int find_start_value_byte;
91 static struct buffer *find_start_buffer;
92 static int find_start_begv;
93 static int find_start_modiff;
96 static int find_defun_start P_ ((int, int));
97 static int back_comment P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted P_ ((int, int));
99 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100 static Lisp_Object scan_lists P_ ((int, int, int, int));
101 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
106 struct gl_state_s gl_state; /* Global state of syntax parser. */
108 INTERVAL interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
125 void
126 update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
128 Lisp_Object object;
130 Lisp_Object tmp_table;
131 int cnt = 0, invalidate = 1;
132 INTERVAL i, oldi;
134 if (init)
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
138 gl_state.forward_i = interval_of (charpos, object);
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
148 if (AM_RIGHT_CHILD (i))
149 INTERVAL_PARENT (i)->position = i->position
150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
153 else
154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
155 + TOTAL_LENGTH (i);
156 i = INTERVAL_PARENT (i);
158 i = gl_state.forward_i;
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
161 goto update;
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
165 /* We are guaranteed to be called with CHARPOS either in i,
166 or further off. */
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos < i->position) /* Move left. */
171 if (count > 0)
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i = update_interval (i, charpos);
175 if (!gl_state.left_ok || oldi->position != INTERVAL_LAST_POS (i))
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
185 if (count < 0)
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i = update_interval (i, charpos);
189 if (!gl_state.right_ok || i->position != INTERVAL_LAST_POS (oldi))
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - 1 - gl_state.offset;
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
212 if (count > 0)
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
216 gl_state.b_property = i->position - 1 - gl_state.offset;
218 else
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
230 gl_state.use_global = 0;
232 else if (CONSP (tmp_table))
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
237 else
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
243 while (!NULL_INTERVAL_P (i))
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
253 else if (cnt == INTERVALS_AT_ONCE)
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
271 else
273 if (count > 0)
275 gl_state.e_property = i->position - gl_state.offset;
276 gl_state.forward_i = i;
278 else
280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
281 gl_state.backward_i = i;
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
290 static int
291 char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
297 int orig = charpos;
299 DEC_BOTH (charpos, bytepos);
301 while (bytepos >= beg)
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
312 UPDATE_SYNTAX_TABLE (orig);
313 return quoted;
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
319 INLINE int
320 inc_bytepos (bytepos)
321 int bytepos;
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
326 INC_POS (bytepos);
327 return bytepos;
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
333 INLINE int
334 dec_bytepos (bytepos)
335 int bytepos;
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
340 DEC_POS (bytepos);
341 return bytepos;
344 /* Return a defun-start position before before POS and not too far before.
345 It should be the last one before POS, or nearly the last.
347 When open_paren_in_column_0_is_defun_start is nonzero,
348 the beginning of every line is treated as a defun-start.
350 We record the information about where the scan started
351 and what its result was, so that another call in the same area
352 can return the same value very quickly.
354 There is no promise at which position the global syntax data is
355 valid on return from the subroutine, so the caller should explicitly
356 update the global data. */
358 static int
359 find_defun_start (pos, pos_byte)
360 int pos, pos_byte;
362 int opoint = PT, opoint_byte = PT_BYTE;
364 /* Use previous finding, if it's valid and applies to this inquiry. */
365 if (current_buffer == find_start_buffer
366 /* Reuse the defun-start even if POS is a little farther on.
367 POS might be in the next defun, but that's ok.
368 Our value may not be the best possible, but will still be usable. */
369 && pos <= find_start_pos + 1000
370 && pos >= find_start_value
371 && BEGV == find_start_begv
372 && MODIFF == find_start_modiff)
373 return find_start_value;
375 /* Back up to start of line. */
376 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
378 /* We optimize syntax-table lookup for rare updates. Thus we accept
379 only those `^\s(' which are good in global _and_ text-property
380 syntax-tables. */
381 gl_state.current_syntax_table = current_buffer->syntax_table;
382 gl_state.use_global = 0;
383 if (open_paren_in_column_0_is_defun_start)
385 while (PT > BEGV)
387 /* Open-paren at start of line means we may have found our
388 defun-start. */
389 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
391 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
392 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
393 break;
394 /* Now fallback to the default value. */
395 gl_state.current_syntax_table = current_buffer->syntax_table;
396 gl_state.use_global = 0;
398 /* Move to beg of previous line. */
399 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
403 /* Record what we found, for the next try. */
404 find_start_value = PT;
405 find_start_value_byte = PT_BYTE;
406 find_start_buffer = current_buffer;
407 find_start_modiff = MODIFF;
408 find_start_begv = BEGV;
409 find_start_pos = pos;
411 TEMP_SET_PT_BOTH (opoint, opoint_byte);
413 return find_start_value;
416 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
418 static int
419 prev_char_comend_first (pos, pos_byte)
420 int pos, pos_byte;
422 int c, val;
424 DEC_BOTH (pos, pos_byte);
425 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
426 c = FETCH_CHAR (pos_byte);
427 val = SYNTAX_COMEND_FIRST (c);
428 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
429 return val;
432 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
434 /* static int
435 * prev_char_comstart_first (pos, pos_byte)
436 * int pos, pos_byte;
438 * int c, val;
440 * DEC_BOTH (pos, pos_byte);
441 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
442 * c = FETCH_CHAR (pos_byte);
443 * val = SYNTAX_COMSTART_FIRST (c);
444 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
445 * return val;
446 * } */
448 /* Checks whether charpos FROM is at the end of a comment.
449 FROM_BYTE is the bytepos corresponding to FROM.
450 Do not move back before STOP.
452 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
453 return -1 otherwise.
455 If successful, store the charpos of the comment's beginning
456 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
458 Global syntax data remains valid for backward search starting at
459 the returned value (or at FROM, if the search was not successful). */
461 static int
462 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
463 int from, from_byte, stop;
464 int comnested, comstyle;
465 int *charpos_ptr, *bytepos_ptr;
467 /* Look back, counting the parity of string-quotes,
468 and recording the comment-starters seen.
469 When we reach a safe place, assume that's not in a string;
470 then step the main scan to the earliest comment-starter seen
471 an even number of string quotes away from the safe place.
473 OFROM[I] is position of the earliest comment-starter seen
474 which is I+2X quotes from the comment-end.
475 PARITY is current parity of quotes from the comment end. */
476 int string_style = -1; /* Presumed outside of any string. */
477 int string_lossage = 0;
478 /* Not a real lossage: indicates that we have passed a matching comment
479 starter plus an non-matching comment-ender, meaning that any matching
480 comment-starter we might see later could be a false positive (hidden
481 inside another comment).
482 Test case: { a (* b } c (* d *) */
483 int comment_lossage = 0;
484 int comment_end = from;
485 int comment_end_byte = from_byte;
486 int comstart_pos = 0;
487 int comstart_byte;
488 /* Place where the containing defun starts,
489 or 0 if we didn't come across it yet. */
490 int defun_start = 0;
491 int defun_start_byte = 0;
492 register enum syntaxcode code;
493 int nesting = 1; /* current comment nesting */
494 int c;
495 int syntax = 0;
497 /* FIXME: A }} comment-ender style leads to incorrect behavior
498 in the case of {{ c }}} because we ignore the last two chars which are
499 assumed to be comment-enders although they aren't. */
501 /* At beginning of range to scan, we're outside of strings;
502 that determines quote parity to the comment-end. */
503 while (from != stop)
505 int temp_byte, prev_syntax;
506 int com2start, com2end;
508 /* Move back and examine a character. */
509 DEC_BOTH (from, from_byte);
510 UPDATE_SYNTAX_TABLE_BACKWARD (from);
512 prev_syntax = syntax;
513 c = FETCH_CHAR (from_byte);
514 syntax = SYNTAX_WITH_FLAGS (c);
515 code = SYNTAX (c);
517 /* Check for 2-char comment markers. */
518 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
519 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
520 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
521 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
522 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
523 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
524 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
526 /* Nasty cases with overlapping 2-char comment markers:
527 - snmp-mode: -- c -- foo -- c --
528 --- c --
529 ------ c --
530 - c-mode: *||*
531 |* *|* *|
532 |*| |* |*|
533 /// */
535 /* If a 2-char comment sequence partly overlaps with another,
536 we don't try to be clever. */
537 if (from > stop && (com2end || com2start))
539 int next = from, next_byte = from_byte, next_c, next_syntax;
540 DEC_BOTH (next, next_byte);
541 UPDATE_SYNTAX_TABLE_BACKWARD (next);
542 next_c = FETCH_CHAR (next_byte);
543 next_syntax = SYNTAX_WITH_FLAGS (next_c);
544 if (((com2start || comnested)
545 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
546 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
547 || ((com2end || comnested)
548 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
549 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
550 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
551 goto lossage;
552 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
555 if (com2start && comstart_pos == 0)
556 /* We're looking at a comment starter. But it might be a comment
557 ender as well (see snmp-mode). The first time we see one, we
558 need to consider it as a comment starter,
559 and the subsequent times as a comment ender. */
560 com2end = 0;
562 /* Turn a 2-char comment sequences into the appropriate syntax. */
563 if (com2end)
564 code = Sendcomment;
565 else if (com2start)
566 code = Scomment;
567 /* Ignore comment starters of a different style. */
568 else if (code == Scomment
569 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
570 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
571 continue;
573 /* Ignore escaped characters, except comment-enders. */
574 if (code != Sendcomment && char_quoted (from, from_byte))
575 continue;
577 switch (code)
579 case Sstring_fence:
580 case Scomment_fence:
581 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
582 case Sstring:
583 /* Track parity of quotes. */
584 if (string_style == -1)
585 /* Entering a string. */
586 string_style = c;
587 else if (string_style == c)
588 /* Leaving the string. */
589 string_style = -1;
590 else
591 /* If we have two kinds of string delimiters.
592 There's no way to grok this scanning backwards. */
593 string_lossage = 1;
594 break;
596 case Scomment:
597 /* We've already checked that it is the relevant comstyle. */
598 if (string_style != -1 || comment_lossage || string_lossage)
599 /* There are odd string quotes involved, so let's be careful.
600 Test case in Pascal: " { " a { " } */
601 goto lossage;
603 if (!comnested)
605 /* Record best comment-starter so far. */
606 comstart_pos = from;
607 comstart_byte = from_byte;
609 else if (--nesting <= 0)
610 /* nested comments have to be balanced, so we don't need to
611 keep looking for earlier ones. We use here the same (slightly
612 incorrect) reasoning as below: since it is followed by uniform
613 paired string quotes, this comment-start has to be outside of
614 strings, else the comment-end itself would be inside a string. */
615 goto done;
616 break;
618 case Sendcomment:
619 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
620 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
621 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
622 /* This is the same style of comment ender as ours. */
624 if (comnested)
625 nesting++;
626 else
627 /* Anything before that can't count because it would match
628 this comment-ender rather than ours. */
629 from = stop; /* Break out of the loop. */
631 else if (comstart_pos != 0 || c != '\n')
632 /* We're mixing comment styles here, so we'd better be careful.
633 The (comstart_pos != 0 || c != '\n') check is not quite correct
634 (we should just always set comment_lossage), but removing it
635 would imply that any multiline comment in C would go through
636 lossage, which seems overkill.
637 The failure should only happen in the rare cases such as
638 { (* } *) */
639 comment_lossage = 1;
640 break;
642 case Sopen:
643 /* Assume a defun-start point is outside of strings. */
644 if (open_paren_in_column_0_is_defun_start
645 && (from == stop
646 || (temp_byte = dec_bytepos (from_byte),
647 FETCH_CHAR (temp_byte) == '\n')))
649 defun_start = from;
650 defun_start_byte = from_byte;
651 from = stop; /* Break out of the loop. */
653 break;
655 default:
656 break;
660 if (comstart_pos == 0)
662 from = comment_end;
663 from_byte = comment_end_byte;
664 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
666 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
667 or `done'), then we've found the beginning of the non-nested comment. */
668 else if (1) /* !comnested */
670 from = comstart_pos;
671 from_byte = comstart_byte;
672 /* Globals are correct now. */
674 else
676 struct lisp_parse_state state;
677 lossage:
678 /* We had two kinds of string delimiters mixed up
679 together. Decode this going forwards.
680 Scan fwd from a known safe place (beginning-of-defun)
681 to the one in question; this records where we
682 last passed a comment starter. */
683 /* If we did not already find the defun start, find it now. */
684 if (defun_start == 0)
686 defun_start = find_defun_start (comment_end, comment_end_byte);
687 defun_start_byte = find_start_value_byte;
691 scan_sexps_forward (&state,
692 defun_start, defun_start_byte,
693 comment_end, -10000, 0, Qnil, 0);
694 defun_start = comment_end;
695 if (state.incomment == (comnested ? 1 : -1)
696 && state.comstyle == comstyle)
697 from = state.comstr_start;
698 else
700 from = comment_end;
701 if (state.incomment)
702 /* If comment_end is inside some other comment, maybe ours
703 is nested, so we need to try again from within the
704 surrounding comment. Example: { a (* " *) */
706 /* FIXME: We should advance by one or two chars. */
707 defun_start = state.comstr_start + 2;
708 defun_start_byte = CHAR_TO_BYTE (defun_start);
711 } while (defun_start < comment_end);
713 from_byte = CHAR_TO_BYTE (from);
714 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
717 done:
718 *charpos_ptr = from;
719 *bytepos_ptr = from_byte;
721 return (from == comment_end) ? -1 : from;
724 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
725 "Return t if OBJECT is a syntax table.\n\
726 Currently, any char-table counts as a syntax table.")
727 (object)
728 Lisp_Object object;
730 if (CHAR_TABLE_P (object)
731 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
732 return Qt;
733 return Qnil;
736 static void
737 check_syntax_table (obj)
738 Lisp_Object obj;
740 if (!(CHAR_TABLE_P (obj)
741 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
742 wrong_type_argument (Qsyntax_table_p, obj);
745 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
746 "Return the current syntax table.\n\
747 This is the one specified by the current buffer.")
750 return current_buffer->syntax_table;
753 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
754 Sstandard_syntax_table, 0, 0, 0,
755 "Return the standard syntax table.\n\
756 This is the one used for new buffers.")
759 return Vstandard_syntax_table;
762 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
763 "Construct a new syntax table and return it.\n\
764 It is a copy of the TABLE, which defaults to the standard syntax table.")
765 (table)
766 Lisp_Object table;
768 Lisp_Object copy;
770 if (!NILP (table))
771 check_syntax_table (table);
772 else
773 table = Vstandard_syntax_table;
775 copy = Fcopy_sequence (table);
777 /* Only the standard syntax table should have a default element.
778 Other syntax tables should inherit from parents instead. */
779 XCHAR_TABLE (copy)->defalt = Qnil;
781 /* Copied syntax tables should all have parents.
782 If we copied one with no parent, such as the standard syntax table,
783 use the standard syntax table as the copy's parent. */
784 if (NILP (XCHAR_TABLE (copy)->parent))
785 Fset_char_table_parent (copy, Vstandard_syntax_table);
786 return copy;
789 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
790 "Select a new syntax table for the current buffer.\n\
791 One argument, a syntax table.")
792 (table)
793 Lisp_Object table;
795 int idx;
796 check_syntax_table (table);
797 current_buffer->syntax_table = table;
798 /* Indicate that this buffer now has a specified syntax table. */
799 idx = PER_BUFFER_VAR_IDX (syntax_table);
800 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
801 return table;
804 /* Convert a letter which signifies a syntax code
805 into the code it signifies.
806 This is used by modify-syntax-entry, and other things. */
808 unsigned char syntax_spec_code[0400] =
809 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
810 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
811 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
812 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
813 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
814 (char) Smath, 0377, 0377, (char) Squote,
815 (char) Sopen, (char) Sclose, 0377, 0377,
816 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
818 0377, 0377, 0377, 0377,
819 (char) Scomment, 0377, (char) Sendcomment, 0377,
820 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
821 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
823 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
824 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
825 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
826 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
827 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
830 /* Indexed by syntax code, give the letter that describes it. */
832 char syntax_code_spec[16] =
834 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
835 '!', '|'
838 /* Indexed by syntax code, give the object (cons of syntax code and
839 nil) to be stored in syntax table. Since these objects can be
840 shared among syntax tables, we generate them in advance. By
841 sharing objects, the function `describe-syntax' can give a more
842 compact listing. */
843 static Lisp_Object Vsyntax_code_object;
846 /* Look up the value for CHARACTER in syntax table TABLE's parent
847 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
848 for CHARACTER. It's actually used only when not compiled with GCC. */
850 Lisp_Object
851 syntax_parent_lookup (table, character)
852 Lisp_Object table;
853 int character;
855 Lisp_Object value;
857 while (1)
859 table = XCHAR_TABLE (table)->parent;
860 if (NILP (table))
861 return Qnil;
863 value = XCHAR_TABLE (table)->contents[character];
864 if (!NILP (value))
865 return value;
869 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
870 "Return the syntax code of CHARACTER, described by a character.\n\
871 For example, if CHARACTER is a word constituent,\n\
872 the character `w' is returned.\n\
873 The characters that correspond to various syntax codes\n\
874 are listed in the documentation of `modify-syntax-entry'.")
875 (character)
876 Lisp_Object character;
878 int char_int;
879 gl_state.current_syntax_table = current_buffer->syntax_table;
881 gl_state.use_global = 0;
882 CHECK_NUMBER (character, 0);
883 char_int = XINT (character);
884 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
887 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
888 "Return the matching parenthesis of CHARACTER, or nil if none.")
889 (character)
890 Lisp_Object character;
892 int char_int, code;
893 gl_state.current_syntax_table = current_buffer->syntax_table;
894 gl_state.use_global = 0;
895 CHECK_NUMBER (character, 0);
896 char_int = XINT (character);
897 code = SYNTAX (char_int);
898 if (code == Sopen || code == Sclose)
899 return SYNTAX_MATCH (char_int);
900 return Qnil;
903 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
904 "Convert a syntax specification STRING into syntax cell form.\n\
905 STRING should be a string as it is allowed as argument of\n\
906 `modify-syntax-entry'. Value is the equivalent cons cell\n\
907 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'\n\
908 text property.")
909 (string)
910 Lisp_Object string;
912 register unsigned char *p;
913 register enum syntaxcode code;
914 int val;
915 Lisp_Object match;
917 CHECK_STRING (string, 0);
919 p = XSTRING (string)->data;
920 code = (enum syntaxcode) syntax_spec_code[*p++];
921 if (((int) code & 0377) == 0377)
922 error ("invalid syntax description letter: %c", p[-1]);
924 if (code == Sinherit)
925 return Qnil;
927 if (*p)
929 int len;
930 int character = (STRING_CHAR_AND_LENGTH
931 (p, STRING_BYTES (XSTRING (string)) - 1, len));
932 XSETINT (match, character);
933 if (XFASTINT (match) == ' ')
934 match = Qnil;
935 p += len;
937 else
938 match = Qnil;
940 val = (int) code;
941 while (*p)
942 switch (*p++)
944 case '1':
945 val |= 1 << 16;
946 break;
948 case '2':
949 val |= 1 << 17;
950 break;
952 case '3':
953 val |= 1 << 18;
954 break;
956 case '4':
957 val |= 1 << 19;
958 break;
960 case 'p':
961 val |= 1 << 20;
962 break;
964 case 'b':
965 val |= 1 << 21;
966 break;
968 case 'n':
969 val |= 1 << 22;
970 break;
973 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
974 return XVECTOR (Vsyntax_code_object)->contents[val];
975 else
976 /* Since we can't use a shared object, let's make a new one. */
977 return Fcons (make_number (val), match);
980 /* This comment supplies the doc string for modify-syntax-entry,
981 for make-docfile to see. We cannot put this in the real DEFUN
982 due to limits in the Unix cpp.
984 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
985 "Set syntax for character CHAR according to string S.\n\
986 The syntax is changed only for table TABLE, which defaults to\n\
987 the current buffer's syntax table.\n\
988 The first character of S should be one of the following:\n\
989 Space or - whitespace syntax. w word constituent.\n\
990 _ symbol constituent. . punctuation.\n\
991 ( open-parenthesis. ) close-parenthesis.\n\
992 \" string quote. \\ escape.\n\
993 $ paired delimiter. ' expression quote or prefix operator.\n\
994 < comment starter. > comment ender.\n\
995 / character-quote. @ inherit from `standard-syntax-table'.\n\
996 | generic string fence. ! generic comment fence.\n\
998 Only single-character comment start and end sequences are represented thus.\n\
999 Two-character sequences are represented as described below.\n\
1000 The second character of S is the matching parenthesis,\n\
1001 used only if the first character is `(' or `)'.\n\
1002 Any additional characters are flags.\n\
1003 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
1004 1 means CHAR is the start of a two-char comment start sequence.\n\
1005 2 means CHAR is the second character of such a sequence.\n\
1006 3 means CHAR is the start of a two-char comment end sequence.\n\
1007 4 means CHAR is the second character of such a sequence.\n\
1009 There can be up to two orthogonal comment sequences. This is to support\n\
1010 language modes such as C++. By default, all comment sequences are of style\n\
1011 a, but you can set the comment sequence style to b (on the second character\n\
1012 of a comment-start, or the first character of a comment-end sequence) using\n\
1013 this flag:\n\
1014 b means CHAR is part of comment sequence b.\n\
1015 n means CHAR is part of a nestable comment sequence.\n\
1017 p means CHAR is a prefix character for `backward-prefix-chars';\n\
1018 such characters are treated as whitespace when they occur\n\
1019 between expressions.")
1020 (char, s, table)
1023 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1024 /* I really don't know why this is interactive
1025 help-form should at least be made useful whilst reading the second arg
1027 "cSet syntax for character: \nsSet syntax for %s to: ",
1028 0 /* See immediately above */)
1029 (c, newentry, syntax_table)
1030 Lisp_Object c, newentry, syntax_table;
1032 CHECK_NUMBER (c, 0);
1034 if (NILP (syntax_table))
1035 syntax_table = current_buffer->syntax_table;
1036 else
1037 check_syntax_table (syntax_table);
1039 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
1040 return Qnil;
1043 /* Dump syntax table to buffer in human-readable format */
1045 static void
1046 describe_syntax (value)
1047 Lisp_Object value;
1049 register enum syntaxcode code;
1050 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
1051 char str[2];
1052 Lisp_Object first, match_lisp;
1054 Findent_to (make_number (16), make_number (1));
1056 if (NILP (value))
1058 insert_string ("default\n");
1059 return;
1062 if (CHAR_TABLE_P (value))
1064 insert_string ("deeper char-table ...\n");
1065 return;
1068 if (!CONSP (value))
1070 insert_string ("invalid\n");
1071 return;
1074 first = XCAR (value);
1075 match_lisp = XCDR (value);
1077 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1079 insert_string ("invalid\n");
1080 return;
1083 code = (enum syntaxcode) (XINT (first) & 0377);
1084 start1 = (XINT (first) >> 16) & 1;
1085 start2 = (XINT (first) >> 17) & 1;
1086 end1 = (XINT (first) >> 18) & 1;
1087 end2 = (XINT (first) >> 19) & 1;
1088 prefix = (XINT (first) >> 20) & 1;
1089 comstyle = (XINT (first) >> 21) & 1;
1090 comnested = (XINT (first) >> 22) & 1;
1092 if ((int) code < 0 || (int) code >= (int) Smax)
1094 insert_string ("invalid");
1095 return;
1097 desc = syntax_code_spec[(int) code];
1099 str[0] = desc, str[1] = 0;
1100 insert (str, 1);
1102 if (NILP (match_lisp))
1103 insert (" ", 1);
1104 else
1105 insert_char (XINT (match_lisp));
1107 if (start1)
1108 insert ("1", 1);
1109 if (start2)
1110 insert ("2", 1);
1112 if (end1)
1113 insert ("3", 1);
1114 if (end2)
1115 insert ("4", 1);
1117 if (prefix)
1118 insert ("p", 1);
1119 if (comstyle)
1120 insert ("b", 1);
1121 if (comnested)
1122 insert ("n", 1);
1124 insert_string ("\twhich means: ");
1126 switch (SWITCH_ENUM_CAST (code))
1128 case Swhitespace:
1129 insert_string ("whitespace"); break;
1130 case Spunct:
1131 insert_string ("punctuation"); break;
1132 case Sword:
1133 insert_string ("word"); break;
1134 case Ssymbol:
1135 insert_string ("symbol"); break;
1136 case Sopen:
1137 insert_string ("open"); break;
1138 case Sclose:
1139 insert_string ("close"); break;
1140 case Squote:
1141 insert_string ("prefix"); break;
1142 case Sstring:
1143 insert_string ("string"); break;
1144 case Smath:
1145 insert_string ("math"); break;
1146 case Sescape:
1147 insert_string ("escape"); break;
1148 case Scharquote:
1149 insert_string ("charquote"); break;
1150 case Scomment:
1151 insert_string ("comment"); break;
1152 case Sendcomment:
1153 insert_string ("endcomment"); break;
1154 case Sinherit:
1155 insert_string ("inherit"); break;
1156 case Scomment_fence:
1157 insert_string ("comment fence"); break;
1158 case Sstring_fence:
1159 insert_string ("string fence"); break;
1160 default:
1161 insert_string ("invalid");
1162 return;
1165 if (!NILP (match_lisp))
1167 insert_string (", matches ");
1168 insert_char (XINT (match_lisp));
1171 if (start1)
1172 insert_string (",\n\t is the first character of a comment-start sequence");
1173 if (start2)
1174 insert_string (",\n\t is the second character of a comment-start sequence");
1176 if (end1)
1177 insert_string (",\n\t is the first character of a comment-end sequence");
1178 if (end2)
1179 insert_string (",\n\t is the second character of a comment-end sequence");
1180 if (comstyle)
1181 insert_string (" (comment style b)");
1182 if (comnested)
1183 insert_string (" (nestable)");
1185 if (prefix)
1186 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1188 insert_string ("\n");
1191 static Lisp_Object
1192 describe_syntax_1 (vector)
1193 Lisp_Object vector;
1195 struct buffer *old = current_buffer;
1196 set_buffer_internal (XBUFFER (Vstandard_output));
1197 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
1198 while (! NILP (XCHAR_TABLE (vector)->parent))
1200 vector = XCHAR_TABLE (vector)->parent;
1201 insert_string ("\nThe parent syntax table is:");
1202 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1203 (int *) 0, 0);
1206 call0 (intern ("help-mode"));
1207 set_buffer_internal (old);
1208 return Qnil;
1211 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1212 "Describe the syntax specifications in the syntax table.\n\
1213 The descriptions are inserted in a buffer, which is then displayed.")
1216 internal_with_output_to_temp_buffer
1217 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1219 return Qnil;
1222 int parse_sexp_ignore_comments;
1224 /* Return the position across COUNT words from FROM.
1225 If that many words cannot be found before the end of the buffer, return 0.
1226 COUNT negative means scan backward and stop at word beginning. */
1229 scan_words (from, count)
1230 register int from, count;
1232 register int beg = BEGV;
1233 register int end = ZV;
1234 register int from_byte = CHAR_TO_BYTE (from);
1235 register enum syntaxcode code;
1236 int ch0, ch1;
1238 immediate_quit = 1;
1239 QUIT;
1241 SETUP_SYNTAX_TABLE (from, count);
1243 while (count > 0)
1245 while (1)
1247 if (from == end)
1249 immediate_quit = 0;
1250 return 0;
1252 UPDATE_SYNTAX_TABLE_FORWARD (from);
1253 ch0 = FETCH_CHAR (from_byte);
1254 code = SYNTAX (ch0);
1255 INC_BOTH (from, from_byte);
1256 if (words_include_escapes
1257 && (code == Sescape || code == Scharquote))
1258 break;
1259 if (code == Sword)
1260 break;
1262 /* Now CH0 is a character which begins a word and FROM is the
1263 position of the next character. */
1264 while (1)
1266 if (from == end) break;
1267 UPDATE_SYNTAX_TABLE_FORWARD (from);
1268 ch1 = FETCH_CHAR (from_byte);
1269 code = SYNTAX (ch1);
1270 if (!(words_include_escapes
1271 && (code == Sescape || code == Scharquote)))
1272 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1273 break;
1274 INC_BOTH (from, from_byte);
1275 ch0 = ch1;
1277 count--;
1279 while (count < 0)
1281 while (1)
1283 if (from == beg)
1285 immediate_quit = 0;
1286 return 0;
1288 DEC_BOTH (from, from_byte);
1289 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1290 ch1 = FETCH_CHAR (from_byte);
1291 code = SYNTAX (ch1);
1292 if (words_include_escapes
1293 && (code == Sescape || code == Scharquote))
1294 break;
1295 if (code == Sword)
1296 break;
1298 /* Now CH1 is a character which ends a word and FROM is the
1299 position of it. */
1300 while (1)
1302 int temp_byte;
1304 if (from == beg)
1305 break;
1306 temp_byte = dec_bytepos (from_byte);
1307 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1308 ch0 = FETCH_CHAR (temp_byte);
1309 code = SYNTAX (ch0);
1310 if (!(words_include_escapes
1311 && (code == Sescape || code == Scharquote)))
1312 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1313 break;
1314 DEC_BOTH (from, from_byte);
1315 ch1 = ch0;
1317 count++;
1320 immediate_quit = 0;
1322 return from;
1325 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1326 "Move point forward ARG words (backward if ARG is negative).\n\
1327 Normally returns t.\n\
1328 If an edge of the buffer or a field boundary is reached, point is left there\n\
1329 and the function returns nil. Field boundaries are not noticed if\n\
1330 `inhibit-field-text-motion' is non-nil.")
1331 (count)
1332 Lisp_Object count;
1334 int orig_val, val;
1335 CHECK_NUMBER (count, 0);
1337 val = orig_val = scan_words (PT, XINT (count));
1338 if (! orig_val)
1339 val = XINT (count) > 0 ? ZV : BEGV;
1341 /* Avoid jumping out of an input field. */
1342 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
1343 Qt, Qnil, Qnil));
1345 SET_PT (val);
1346 return val == orig_val ? Qt : Qnil;
1349 Lisp_Object skip_chars ();
1351 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1352 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1353 STRING is like the inside of a `[...]' in a regular expression\n\
1354 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1355 (but not as the end of a range; quoting is never needed there).\n\
1356 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1357 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1358 Returns the distance traveled, either zero or positive.")
1359 (string, lim)
1360 Lisp_Object string, lim;
1362 return skip_chars (1, 0, string, lim);
1365 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1366 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1367 See `skip-chars-forward' for details.\n\
1368 Returns the distance traveled, either zero or negative.")
1369 (string, lim)
1370 Lisp_Object string, lim;
1372 return skip_chars (0, 0, string, lim);
1375 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1376 "Move point forward across chars in specified syntax classes.\n\
1377 SYNTAX is a string of syntax code characters.\n\
1378 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1379 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1380 This function returns the distance traveled, either zero or positive.")
1381 (syntax, lim)
1382 Lisp_Object syntax, lim;
1384 return skip_chars (1, 1, syntax, lim);
1387 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1388 "Move point backward across chars in specified syntax classes.\n\
1389 SYNTAX is a string of syntax code characters.\n\
1390 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1391 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1392 This function returns the distance traveled, either zero or negative.")
1393 (syntax, lim)
1394 Lisp_Object syntax, lim;
1396 return skip_chars (0, 1, syntax, lim);
1399 static Lisp_Object
1400 skip_chars (forwardp, syntaxp, string, lim)
1401 int forwardp, syntaxp;
1402 Lisp_Object string, lim;
1404 register unsigned int c;
1405 unsigned char fastmap[0400];
1406 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1407 of which codes don't fit in FASTMAP. In that case, set the
1408 ranges of characters in CHAR_RANGES. */
1409 int *char_ranges;
1410 int n_char_ranges = 0;
1411 int negate = 0;
1412 register int i, i_byte;
1413 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1414 int string_multibyte;
1415 int size_byte;
1416 unsigned char *str;
1417 int len;
1419 CHECK_STRING (string, 0);
1420 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1421 string_multibyte = STRING_MULTIBYTE (string);
1422 str = XSTRING (string)->data;
1423 size_byte = STRING_BYTES (XSTRING (string));
1425 /* Adjust the multibyteness of the string to that of the buffer. */
1426 if (multibyte != string_multibyte)
1428 int nbytes;
1430 if (multibyte)
1431 nbytes = count_size_as_multibyte (XSTRING (string)->data,
1432 XSTRING (string)->size);
1433 else
1434 nbytes = XSTRING (string)->size;
1435 if (nbytes != size_byte)
1437 str = (unsigned char *) alloca (nbytes);
1438 copy_text (XSTRING (string)->data, str, size_byte,
1439 string_multibyte, multibyte);
1440 size_byte = nbytes;
1444 if (NILP (lim))
1445 XSETINT (lim, forwardp ? ZV : BEGV);
1446 else
1447 CHECK_NUMBER_COERCE_MARKER (lim, 0);
1449 /* In any case, don't allow scan outside bounds of buffer. */
1450 if (XINT (lim) > ZV)
1451 XSETFASTINT (lim, ZV);
1452 if (XINT (lim) < BEGV)
1453 XSETFASTINT (lim, BEGV);
1455 bzero (fastmap, sizeof fastmap);
1457 i_byte = 0;
1459 if (i_byte < size_byte
1460 && XSTRING (string)->data[0] == '^')
1462 negate = 1; i_byte++;
1465 /* Find the characters specified and set their elements of fastmap.
1466 If syntaxp, each character counts as itself.
1467 Otherwise, handle backslashes and ranges specially. */
1469 while (i_byte < size_byte)
1471 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1472 i_byte += len;
1474 if (syntaxp)
1475 fastmap[syntax_spec_code[c & 0377]] = 1;
1476 else
1478 if (c == '\\')
1480 if (i_byte == size_byte)
1481 break;
1483 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1484 i_byte += len;
1486 if (i_byte < size_byte
1487 && str[i_byte] == '-')
1489 unsigned int c2;
1491 /* Skip over the dash. */
1492 i_byte++;
1494 if (i_byte == size_byte)
1495 break;
1497 /* Get the end of the range. */
1498 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1499 i_byte += len;
1501 if (SINGLE_BYTE_CHAR_P (c))
1503 if (! SINGLE_BYTE_CHAR_P (c2))
1505 /* Handle a range starting with a character of
1506 less than 256, and ending with a character of
1507 not less than 256. Split that into two
1508 ranges, the low one ending at 0377, and the
1509 high one starting at the smallest character
1510 in the charset of C2 and ending at C2. */
1511 int charset = CHAR_CHARSET (c2);
1512 int c1 = MAKE_CHAR (charset, 0, 0);
1514 char_ranges[n_char_ranges++] = c1;
1515 char_ranges[n_char_ranges++] = c2;
1516 c2 = 0377;
1518 while (c <= c2)
1520 fastmap[c] = 1;
1521 c++;
1524 else if (c <= c2) /* Both C and C2 are multibyte char. */
1526 char_ranges[n_char_ranges++] = c;
1527 char_ranges[n_char_ranges++] = c2;
1530 else
1532 if (SINGLE_BYTE_CHAR_P (c))
1533 fastmap[c] = 1;
1534 else
1536 char_ranges[n_char_ranges++] = c;
1537 char_ranges[n_char_ranges++] = c;
1543 /* If ^ was the first character, complement the fastmap. */
1544 if (negate)
1545 for (i = 0; i < sizeof fastmap; i++)
1546 fastmap[i] ^= 1;
1549 int start_point = PT;
1550 int pos = PT;
1551 int pos_byte = PT_BYTE;
1553 immediate_quit = 1;
1554 if (syntaxp)
1556 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1557 if (forwardp)
1559 if (multibyte)
1561 if (pos < XINT (lim))
1562 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1564 /* Since we already checked for multibyteness,
1565 avoid using INC_BOTH which checks again. */
1566 INC_POS (pos_byte);
1567 pos++;
1568 if (pos >= XINT (lim))
1569 break;
1570 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1573 else
1575 while (pos < XINT (lim)
1576 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1578 pos++;
1579 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1583 else
1585 if (multibyte)
1587 while (pos > XINT (lim))
1589 int savepos = pos_byte;
1590 /* Since we already checked for multibyteness,
1591 avoid using DEC_BOTH which checks again. */
1592 pos--;
1593 DEC_POS (pos_byte);
1594 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1595 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1597 pos++;
1598 pos_byte = savepos;
1599 break;
1603 else
1605 if (pos > XINT (lim))
1606 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1608 pos--;
1609 if (pos <= XINT (lim))
1610 break;
1611 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1616 else
1618 if (forwardp)
1620 if (multibyte)
1621 while (pos < XINT (lim))
1623 c = FETCH_MULTIBYTE_CHAR (pos_byte);
1624 if (SINGLE_BYTE_CHAR_P (c))
1626 if (!fastmap[c])
1627 break;
1629 else
1631 /* If we are looking at a multibyte character,
1632 we must look up the character in the table
1633 CHAR_RANGES. If there's no data in the
1634 table, that character is not what we want to
1635 skip. */
1637 /* The following code do the right thing even if
1638 n_char_ranges is zero (i.e. no data in
1639 CHAR_RANGES). */
1640 for (i = 0; i < n_char_ranges; i += 2)
1641 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1642 break;
1643 if (!(negate ^ (i < n_char_ranges)))
1644 break;
1646 INC_BOTH (pos, pos_byte);
1648 else
1649 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1650 pos++;
1652 else
1654 if (multibyte)
1655 while (pos > XINT (lim))
1657 int prev_pos_byte = pos_byte;
1659 DEC_POS (prev_pos_byte);
1660 c = FETCH_MULTIBYTE_CHAR (prev_pos_byte);
1661 if (SINGLE_BYTE_CHAR_P (c))
1663 if (!fastmap[c])
1664 break;
1666 else
1668 /* See the comment in the previous similar code. */
1669 for (i = 0; i < n_char_ranges; i += 2)
1670 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1671 break;
1672 if (!(negate ^ (i < n_char_ranges)))
1673 break;
1675 pos--;
1676 pos_byte = prev_pos_byte;
1678 else
1679 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1680 pos--;
1684 #if 0 /* Not needed now that a position in mid-character
1685 cannot be specified in Lisp. */
1686 if (multibyte
1687 /* INC_POS or DEC_POS might have moved POS over LIM. */
1688 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1689 pos = XINT (lim);
1690 #endif
1692 if (! multibyte)
1693 pos_byte = pos;
1695 SET_PT_BOTH (pos, pos_byte);
1696 immediate_quit = 0;
1698 return make_number (PT - start_point);
1702 /* Jump over a comment, assuming we are at the beginning of one.
1703 FROM is the current position.
1704 FROM_BYTE is the bytepos corresponding to FROM.
1705 Do not move past STOP (a charpos).
1706 The comment over which we have to jump is of style STYLE
1707 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1708 NESTING should be positive to indicate the nesting at the beginning
1709 for nested comments and should be zero or negative else.
1710 ST_COMMENT_STYLE cannot be nested.
1711 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1712 (or 0 If the search cannot start in the middle of a two-character).
1714 If successful, return 1 and store the charpos of the comment's end
1715 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1716 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1717 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1718 (as defined for state.incomment) in *INCOMMENT_PTR.
1720 The comment end is the last character of the comment rather than the
1721 character just after the comment.
1723 Global syntax data is assumed to initially be valid for FROM and
1724 remains valid for forward search starting at the returned position. */
1726 static int
1727 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1728 charpos_ptr, bytepos_ptr, incomment_ptr)
1729 int from, from_byte, stop;
1730 int nesting, style, prev_syntax;
1731 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1733 register int c, c1;
1734 register enum syntaxcode code;
1735 register int syntax;
1737 if (nesting <= 0) nesting = -1;
1739 /* Enter the loop in the middle so that we find
1740 a 2-char comment ender if we start in the middle of it. */
1741 syntax = prev_syntax;
1742 if (syntax != 0) goto forw_incomment;
1744 while (1)
1746 if (from == stop)
1748 *incomment_ptr = nesting;
1749 *charpos_ptr = from;
1750 *bytepos_ptr = from_byte;
1751 return 0;
1753 c = FETCH_CHAR (from_byte);
1754 syntax = SYNTAX_WITH_FLAGS (c);
1755 code = syntax & 0xff;
1756 if (code == Sendcomment
1757 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1758 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1759 (nesting > 0 && --nesting == 0) : nesting < 0))
1760 /* we have encountered a comment end of the same style
1761 as the comment sequence which began this comment
1762 section */
1763 break;
1764 if (code == Scomment_fence
1765 && style == ST_COMMENT_STYLE)
1766 /* we have encountered a comment end of the same style
1767 as the comment sequence which began this comment
1768 section. */
1769 break;
1770 if (nesting > 0
1771 && code == Scomment
1772 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
1773 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1774 /* we have encountered a nested comment of the same style
1775 as the comment sequence which began this comment section */
1776 nesting++;
1777 INC_BOTH (from, from_byte);
1778 UPDATE_SYNTAX_TABLE_FORWARD (from);
1780 forw_incomment:
1781 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1782 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1783 && (c1 = FETCH_CHAR (from_byte),
1784 SYNTAX_COMEND_SECOND (c1))
1785 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1786 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
1788 if (--nesting <= 0)
1789 /* we have encountered a comment end of the same style
1790 as the comment sequence which began this comment
1791 section */
1792 break;
1793 else
1795 INC_BOTH (from, from_byte);
1796 UPDATE_SYNTAX_TABLE_FORWARD (from);
1799 if (nesting > 0
1800 && from < stop
1801 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1802 && (c1 = FETCH_CHAR (from_byte),
1803 SYNTAX_COMMENT_STYLE (c1) == style
1804 && SYNTAX_COMSTART_SECOND (c1))
1805 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1806 SYNTAX_COMMENT_NESTED (c1)))
1807 /* we have encountered a nested comment of the same style
1808 as the comment sequence which began this comment
1809 section */
1811 INC_BOTH (from, from_byte);
1812 UPDATE_SYNTAX_TABLE_FORWARD (from);
1813 nesting++;
1816 *charpos_ptr = from;
1817 *bytepos_ptr = from_byte;
1818 return 1;
1821 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1822 "Move forward across up to N comments. If N is negative, move backward.\n\
1823 Stop scanning if we find something other than a comment or whitespace.\n\
1824 Set point to where scanning stops.\n\
1825 If N comments are found as expected, with nothing except whitespace\n\
1826 between them, return t; otherwise return nil.")
1827 (count)
1828 Lisp_Object count;
1830 register int from;
1831 int from_byte;
1832 register int stop;
1833 register int c, c1;
1834 register enum syntaxcode code;
1835 int comstyle = 0; /* style of comment encountered */
1836 int comnested = 0; /* whether the comment is nestable or not */
1837 int found;
1838 int count1;
1839 int out_charpos, out_bytepos;
1840 int dummy;
1842 CHECK_NUMBER (count, 0);
1843 count1 = XINT (count);
1844 stop = count1 > 0 ? ZV : BEGV;
1846 immediate_quit = 1;
1847 QUIT;
1849 from = PT;
1850 from_byte = PT_BYTE;
1852 SETUP_SYNTAX_TABLE (from, count1);
1853 while (count1 > 0)
1857 int comstart_first;
1859 if (from == stop)
1861 SET_PT_BOTH (from, from_byte);
1862 immediate_quit = 0;
1863 return Qnil;
1865 c = FETCH_CHAR (from_byte);
1866 code = SYNTAX (c);
1867 comstart_first = SYNTAX_COMSTART_FIRST (c);
1868 comnested = SYNTAX_COMMENT_NESTED (c);
1869 comstyle = SYNTAX_COMMENT_STYLE (c);
1870 INC_BOTH (from, from_byte);
1871 UPDATE_SYNTAX_TABLE_FORWARD (from);
1872 if (from < stop && comstart_first
1873 && (c1 = FETCH_CHAR (from_byte),
1874 SYNTAX_COMSTART_SECOND (c1)))
1876 /* We have encountered a comment start sequence and we
1877 are ignoring all text inside comments. We must record
1878 the comment style this sequence begins so that later,
1879 only a comment end of the same style actually ends
1880 the comment section. */
1881 code = Scomment;
1882 comstyle = SYNTAX_COMMENT_STYLE (c1);
1883 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1884 INC_BOTH (from, from_byte);
1885 UPDATE_SYNTAX_TABLE_FORWARD (from);
1888 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
1890 if (code == Scomment_fence)
1891 comstyle = ST_COMMENT_STYLE;
1892 else if (code != Scomment)
1894 immediate_quit = 0;
1895 DEC_BOTH (from, from_byte);
1896 SET_PT_BOTH (from, from_byte);
1897 return Qnil;
1899 /* We're at the start of a comment. */
1900 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1901 &out_charpos, &out_bytepos, &dummy);
1902 from = out_charpos; from_byte = out_bytepos;
1903 if (!found)
1905 immediate_quit = 0;
1906 SET_PT_BOTH (from, from_byte);
1907 return Qnil;
1909 INC_BOTH (from, from_byte);
1910 UPDATE_SYNTAX_TABLE_FORWARD (from);
1911 /* We have skipped one comment. */
1912 count1--;
1915 while (count1 < 0)
1917 while (1)
1919 int quoted;
1921 if (from <= stop)
1923 SET_PT_BOTH (BEGV, BEGV_BYTE);
1924 immediate_quit = 0;
1925 return Qnil;
1928 DEC_BOTH (from, from_byte);
1929 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1930 quoted = char_quoted (from, from_byte);
1931 if (quoted)
1933 DEC_BOTH (from, from_byte);
1934 goto leave;
1936 c = FETCH_CHAR (from_byte);
1937 code = SYNTAX (c);
1938 comstyle = 0;
1939 comnested = SYNTAX_COMMENT_NESTED (c);
1940 if (code == Sendcomment)
1941 comstyle = SYNTAX_COMMENT_STYLE (c);
1942 if (from > stop && SYNTAX_COMEND_SECOND (c)
1943 && prev_char_comend_first (from, from_byte)
1944 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1946 /* We must record the comment style encountered so that
1947 later, we can match only the proper comment begin
1948 sequence of the same style. */
1949 DEC_BOTH (from, from_byte);
1950 code = Sendcomment;
1951 /* Calling char_quoted, above, set up global syntax position
1952 at the new value of FROM. */
1953 c1 = FETCH_CHAR (from_byte);
1954 comstyle = SYNTAX_COMMENT_STYLE (c1);
1955 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1958 if (code == Scomment_fence)
1960 /* Skip until first preceding unquoted comment_fence. */
1961 int found = 0, ini = from, ini_byte = from_byte;
1963 while (1)
1965 DEC_BOTH (from, from_byte);
1966 if (from == stop)
1967 break;
1968 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1969 c = FETCH_CHAR (from_byte);
1970 if (SYNTAX (c) == Scomment_fence
1971 && !char_quoted (from, from_byte))
1973 found = 1;
1974 break;
1977 if (found == 0)
1979 from = ini; /* Set point to ini + 1. */
1980 from_byte = ini_byte;
1981 goto leave;
1984 else if (code == Sendcomment)
1986 found = back_comment (from, from_byte, stop, comnested, comstyle,
1987 &out_charpos, &out_bytepos);
1988 if (found == -1)
1990 if (c == '\n')
1991 /* This end-of-line is not an end-of-comment.
1992 Treat it like a whitespace.
1993 CC-mode (and maybe others) relies on this behavior. */
1995 else
1997 /* Failure: we should go back to the end of this
1998 not-quite-endcomment. */
1999 if (SYNTAX(c) != code)
2000 /* It was a two-char Sendcomment. */
2001 INC_BOTH (from, from_byte);
2002 goto leave;
2005 else
2007 /* We have skipped one comment. */
2008 from = out_charpos, from_byte = out_bytepos;
2009 break;
2012 else if (code != Swhitespace)
2014 leave:
2015 immediate_quit = 0;
2016 INC_BOTH (from, from_byte);
2017 SET_PT_BOTH (from, from_byte);
2018 return Qnil;
2022 count1++;
2025 SET_PT_BOTH (from, from_byte);
2026 immediate_quit = 0;
2027 return Qt;
2030 /* Return syntax code of character C if C is a single byte character
2031 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
2033 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2034 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2035 ? SYNTAX (c) : Ssymbol)
2037 static Lisp_Object
2038 scan_lists (from, count, depth, sexpflag)
2039 register int from;
2040 int count, depth, sexpflag;
2042 Lisp_Object val;
2043 register int stop = count > 0 ? ZV : BEGV;
2044 register int c, c1;
2045 int stringterm;
2046 int quoted;
2047 int mathexit = 0;
2048 register enum syntaxcode code, temp_code;
2049 int min_depth = depth; /* Err out if depth gets less than this. */
2050 int comstyle = 0; /* style of comment encountered */
2051 int comnested = 0; /* whether the comment is nestable or not */
2052 int temp_pos;
2053 int last_good = from;
2054 int found;
2055 int from_byte;
2056 int out_bytepos, out_charpos;
2057 int temp, dummy;
2058 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2060 if (depth > 0) min_depth = 0;
2062 if (from > ZV) from = ZV;
2063 if (from < BEGV) from = BEGV;
2065 from_byte = CHAR_TO_BYTE (from);
2067 immediate_quit = 1;
2068 QUIT;
2070 SETUP_SYNTAX_TABLE (from, count);
2071 while (count > 0)
2073 while (from < stop)
2075 int comstart_first, prefix;
2076 UPDATE_SYNTAX_TABLE_FORWARD (from);
2077 c = FETCH_CHAR (from_byte);
2078 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2079 comstart_first = SYNTAX_COMSTART_FIRST (c);
2080 comnested = SYNTAX_COMMENT_NESTED (c);
2081 comstyle = SYNTAX_COMMENT_STYLE (c);
2082 prefix = SYNTAX_PREFIX (c);
2083 if (depth == min_depth)
2084 last_good = from;
2085 INC_BOTH (from, from_byte);
2086 UPDATE_SYNTAX_TABLE_FORWARD (from);
2087 if (from < stop && comstart_first
2088 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
2089 && parse_sexp_ignore_comments)
2091 /* we have encountered a comment start sequence and we
2092 are ignoring all text inside comments. We must record
2093 the comment style this sequence begins so that later,
2094 only a comment end of the same style actually ends
2095 the comment section */
2096 code = Scomment;
2097 c1 = FETCH_CHAR (from_byte);
2098 comstyle = SYNTAX_COMMENT_STYLE (c1);
2099 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2100 INC_BOTH (from, from_byte);
2101 UPDATE_SYNTAX_TABLE_FORWARD (from);
2104 if (prefix)
2105 continue;
2107 switch (SWITCH_ENUM_CAST (code))
2109 case Sescape:
2110 case Scharquote:
2111 if (from == stop) goto lose;
2112 INC_BOTH (from, from_byte);
2113 /* treat following character as a word constituent */
2114 case Sword:
2115 case Ssymbol:
2116 if (depth || !sexpflag) break;
2117 /* This word counts as a sexp; return at end of it. */
2118 while (from < stop)
2120 UPDATE_SYNTAX_TABLE_FORWARD (from);
2122 /* Some compilers can't handle this inside the switch. */
2123 c = FETCH_CHAR (from_byte);
2124 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2125 switch (temp)
2127 case Scharquote:
2128 case Sescape:
2129 INC_BOTH (from, from_byte);
2130 if (from == stop) goto lose;
2131 break;
2132 case Sword:
2133 case Ssymbol:
2134 case Squote:
2135 break;
2136 default:
2137 goto done;
2139 INC_BOTH (from, from_byte);
2141 goto done;
2143 case Scomment_fence:
2144 comstyle = ST_COMMENT_STYLE;
2145 /* FALLTHROUGH */
2146 case Scomment:
2147 if (!parse_sexp_ignore_comments) break;
2148 UPDATE_SYNTAX_TABLE_FORWARD (from);
2149 found = forw_comment (from, from_byte, stop,
2150 comnested, comstyle, 0,
2151 &out_charpos, &out_bytepos, &dummy);
2152 from = out_charpos, from_byte = out_bytepos;
2153 if (!found)
2155 if (depth == 0)
2156 goto done;
2157 goto lose;
2159 INC_BOTH (from, from_byte);
2160 UPDATE_SYNTAX_TABLE_FORWARD (from);
2161 break;
2163 case Smath:
2164 if (!sexpflag)
2165 break;
2166 if (from != stop && c == FETCH_CHAR (from_byte))
2168 INC_BOTH (from, from_byte);
2170 if (mathexit)
2172 mathexit = 0;
2173 goto close1;
2175 mathexit = 1;
2177 case Sopen:
2178 if (!++depth) goto done;
2179 break;
2181 case Sclose:
2182 close1:
2183 if (!--depth) goto done;
2184 if (depth < min_depth)
2185 Fsignal (Qscan_error,
2186 Fcons (build_string ("Containing expression ends prematurely"),
2187 Fcons (make_number (last_good),
2188 Fcons (make_number (from), Qnil))));
2189 break;
2191 case Sstring:
2192 case Sstring_fence:
2193 temp_pos = dec_bytepos (from_byte);
2194 stringterm = FETCH_CHAR (temp_pos);
2195 while (1)
2197 if (from >= stop) goto lose;
2198 UPDATE_SYNTAX_TABLE_FORWARD (from);
2199 c = FETCH_CHAR (from_byte);
2200 if (code == Sstring
2201 ? (c == stringterm
2202 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2203 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2204 break;
2206 /* Some compilers can't handle this inside the switch. */
2207 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2208 switch (temp)
2210 case Scharquote:
2211 case Sescape:
2212 INC_BOTH (from, from_byte);
2214 INC_BOTH (from, from_byte);
2216 INC_BOTH (from, from_byte);
2217 if (!depth && sexpflag) goto done;
2218 break;
2222 /* Reached end of buffer. Error if within object, return nil if between */
2223 if (depth) goto lose;
2225 immediate_quit = 0;
2226 return Qnil;
2228 /* End of object reached */
2229 done:
2230 count--;
2233 while (count < 0)
2235 while (from > stop)
2237 DEC_BOTH (from, from_byte);
2238 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2239 c = FETCH_CHAR (from_byte);
2240 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2241 if (depth == min_depth)
2242 last_good = from;
2243 comstyle = 0;
2244 comnested = SYNTAX_COMMENT_NESTED (c);
2245 if (code == Sendcomment)
2246 comstyle = SYNTAX_COMMENT_STYLE (c);
2247 if (from > stop && SYNTAX_COMEND_SECOND (c)
2248 && prev_char_comend_first (from, from_byte)
2249 && parse_sexp_ignore_comments)
2251 /* We must record the comment style encountered so that
2252 later, we can match only the proper comment begin
2253 sequence of the same style. */
2254 DEC_BOTH (from, from_byte);
2255 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2256 code = Sendcomment;
2257 c1 = FETCH_CHAR (from_byte);
2258 comstyle = SYNTAX_COMMENT_STYLE (c1);
2259 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2262 /* Quoting turns anything except a comment-ender
2263 into a word character. Note that this cannot be true
2264 if we decremented FROM in the if-statement above. */
2265 if (code != Sendcomment && char_quoted (from, from_byte))
2266 code = Sword;
2267 else if (SYNTAX_PREFIX (c))
2268 continue;
2270 switch (SWITCH_ENUM_CAST (code))
2272 case Sword:
2273 case Ssymbol:
2274 case Sescape:
2275 case Scharquote:
2276 if (depth || !sexpflag) break;
2277 /* This word counts as a sexp; count object finished
2278 after passing it. */
2279 while (from > stop)
2281 temp_pos = from_byte;
2282 if (! NILP (current_buffer->enable_multibyte_characters))
2283 DEC_POS (temp_pos);
2284 else
2285 temp_pos--;
2286 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2287 c1 = FETCH_CHAR (temp_pos);
2288 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2289 /* Don't allow comment-end to be quoted. */
2290 if (temp_code == Sendcomment)
2291 goto done2;
2292 quoted = char_quoted (from - 1, temp_pos);
2293 if (quoted)
2295 DEC_BOTH (from, from_byte);
2296 temp_pos = dec_bytepos (temp_pos);
2297 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2299 c1 = FETCH_CHAR (temp_pos);
2300 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2301 if (! (quoted || temp_code == Sword
2302 || temp_code == Ssymbol
2303 || temp_code == Squote))
2304 goto done2;
2305 DEC_BOTH (from, from_byte);
2307 goto done2;
2309 case Smath:
2310 if (!sexpflag)
2311 break;
2312 temp_pos = dec_bytepos (from_byte);
2313 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2314 if (from != stop && c == FETCH_CHAR (temp_pos))
2315 DEC_BOTH (from, from_byte);
2316 if (mathexit)
2318 mathexit = 0;
2319 goto open2;
2321 mathexit = 1;
2323 case Sclose:
2324 if (!++depth) goto done2;
2325 break;
2327 case Sopen:
2328 open2:
2329 if (!--depth) goto done2;
2330 if (depth < min_depth)
2331 Fsignal (Qscan_error,
2332 Fcons (build_string ("Containing expression ends prematurely"),
2333 Fcons (make_number (last_good),
2334 Fcons (make_number (from), Qnil))));
2335 break;
2337 case Sendcomment:
2338 if (!parse_sexp_ignore_comments)
2339 break;
2340 found = back_comment (from, from_byte, stop, comnested, comstyle,
2341 &out_charpos, &out_bytepos);
2342 /* FIXME: if found == -1, then it really wasn't a comment-end.
2343 For single-char Sendcomment, we can't do much about it apart
2344 from skipping the char.
2345 For 2-char endcomments, we could try again, taking both
2346 chars as separate entities, but it's a lot of trouble
2347 for very little gain, so we don't bother either. -sm */
2348 if (found != -1)
2349 from = out_charpos, from_byte = out_bytepos;
2350 break;
2352 case Scomment_fence:
2353 case Sstring_fence:
2354 while (1)
2356 DEC_BOTH (from, from_byte);
2357 if (from == stop) goto lose;
2358 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2359 if (!char_quoted (from, from_byte)
2360 && (c = FETCH_CHAR (from_byte),
2361 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2362 break;
2364 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2365 break;
2367 case Sstring:
2368 stringterm = FETCH_CHAR (from_byte);
2369 while (1)
2371 if (from == stop) goto lose;
2372 temp_pos = from_byte;
2373 if (! NILP (current_buffer->enable_multibyte_characters))
2374 DEC_POS (temp_pos);
2375 else
2376 temp_pos--;
2377 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2378 if (!char_quoted (from - 1, temp_pos)
2379 && stringterm == (c = FETCH_CHAR (temp_pos))
2380 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2381 break;
2382 DEC_BOTH (from, from_byte);
2384 DEC_BOTH (from, from_byte);
2385 if (!depth && sexpflag) goto done2;
2386 break;
2390 /* Reached start of buffer. Error if within object, return nil if between */
2391 if (depth) goto lose;
2393 immediate_quit = 0;
2394 return Qnil;
2396 done2:
2397 count++;
2401 immediate_quit = 0;
2402 XSETFASTINT (val, from);
2403 return val;
2405 lose:
2406 Fsignal (Qscan_error,
2407 Fcons (build_string ("Unbalanced parentheses"),
2408 Fcons (make_number (last_good),
2409 Fcons (make_number (from), Qnil))));
2411 /* NOTREACHED */
2414 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2415 "Scan from character number FROM by COUNT lists.\n\
2416 Returns the character number of the position thus found.\n\
2418 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2419 only places where the depth in parentheses becomes zero\n\
2420 are candidates for stopping; COUNT such places are counted.\n\
2421 Thus, a positive value for DEPTH means go out levels.\n\
2423 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2425 If the beginning or end of (the accessible part of) the buffer is reached\n\
2426 and the depth is wrong, an error is signaled.\n\
2427 If the depth is right but the count is not used up, nil is returned.")
2428 (from, count, depth)
2429 Lisp_Object from, count, depth;
2431 CHECK_NUMBER (from, 0);
2432 CHECK_NUMBER (count, 1);
2433 CHECK_NUMBER (depth, 2);
2435 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2438 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2439 "Scan from character number FROM by COUNT balanced expressions.\n\
2440 If COUNT is negative, scan backwards.\n\
2441 Returns the character number of the position thus found.\n\
2443 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2445 If the beginning or end of (the accessible part of) the buffer is reached\n\
2446 in the middle of a parenthetical grouping, an error is signaled.\n\
2447 If the beginning or end is reached between groupings\n\
2448 but before count is used up, nil is returned.")
2449 (from, count)
2450 Lisp_Object from, count;
2452 CHECK_NUMBER (from, 0);
2453 CHECK_NUMBER (count, 1);
2455 return scan_lists (XINT (from), XINT (count), 0, 1);
2458 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2459 0, 0, 0,
2460 "Move point backward over any number of chars with prefix syntax.\n\
2461 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2464 int beg = BEGV;
2465 int opoint = PT;
2466 int opoint_byte = PT_BYTE;
2467 int pos = PT;
2468 int pos_byte = PT_BYTE;
2469 int c;
2471 if (pos <= beg)
2473 SET_PT_BOTH (opoint, opoint_byte);
2475 return Qnil;
2478 SETUP_SYNTAX_TABLE (pos, -1);
2480 DEC_BOTH (pos, pos_byte);
2482 while (!char_quoted (pos, pos_byte)
2483 /* Previous statement updates syntax table. */
2484 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2485 || SYNTAX_PREFIX (c)))
2487 opoint = pos;
2488 opoint_byte = pos_byte;
2490 if (pos + 1 > beg)
2491 DEC_BOTH (pos, pos_byte);
2494 SET_PT_BOTH (opoint, opoint_byte);
2496 return Qnil;
2499 /* Parse forward from FROM / FROM_BYTE to END,
2500 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2501 and return a description of the state of the parse at END.
2502 If STOPBEFORE is nonzero, stop at the start of an atom.
2503 If COMMENTSTOP is 1, stop at the start of a comment.
2504 If COMMENTSTOP is -1, stop at the start or end of a comment,
2505 after the beginning of a string, or after the end of a string. */
2507 static void
2508 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2509 stopbefore, oldstate, commentstop)
2510 struct lisp_parse_state *stateptr;
2511 register int from;
2512 int end, targetdepth, stopbefore;
2513 Lisp_Object oldstate;
2514 int commentstop;
2516 struct lisp_parse_state state;
2518 register enum syntaxcode code;
2519 int c1;
2520 int comnested;
2521 struct level { int last, prev; };
2522 struct level levelstart[100];
2523 register struct level *curlevel = levelstart;
2524 struct level *endlevel = levelstart + 100;
2525 register int depth; /* Paren depth of current scanning location.
2526 level - levelstart equals this except
2527 when the depth becomes negative. */
2528 int mindepth; /* Lowest DEPTH value seen. */
2529 int start_quoted = 0; /* Nonzero means starting after a char quote */
2530 Lisp_Object tem;
2531 int prev_from; /* Keep one character before FROM. */
2532 int prev_from_byte;
2533 int prev_from_syntax;
2534 int boundary_stop = commentstop == -1;
2535 int nofence;
2536 int found;
2537 int out_bytepos, out_charpos;
2538 int temp;
2540 prev_from = from;
2541 prev_from_byte = from_byte;
2542 if (from != BEGV)
2543 DEC_BOTH (prev_from, prev_from_byte);
2545 /* Use this macro instead of `from++'. */
2546 #define INC_FROM \
2547 do { prev_from = from; \
2548 prev_from_byte = from_byte; \
2549 prev_from_syntax \
2550 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2551 INC_BOTH (from, from_byte); \
2552 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2553 } while (0)
2555 immediate_quit = 1;
2556 QUIT;
2558 if (NILP (oldstate))
2560 depth = 0;
2561 state.instring = -1;
2562 state.incomment = 0;
2563 state.comstyle = 0; /* comment style a by default. */
2564 state.comstr_start = -1; /* no comment/string seen. */
2566 else
2568 tem = Fcar (oldstate);
2569 if (!NILP (tem))
2570 depth = XINT (tem);
2571 else
2572 depth = 0;
2574 oldstate = Fcdr (oldstate);
2575 oldstate = Fcdr (oldstate);
2576 oldstate = Fcdr (oldstate);
2577 tem = Fcar (oldstate);
2578 /* Check whether we are inside string_fence-style string: */
2579 state.instring = (!NILP (tem)
2580 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2581 : -1);
2583 oldstate = Fcdr (oldstate);
2584 tem = Fcar (oldstate);
2585 state.incomment = (!NILP (tem)
2586 ? (INTEGERP (tem) ? XINT (tem) : -1)
2587 : 0);
2589 oldstate = Fcdr (oldstate);
2590 tem = Fcar (oldstate);
2591 start_quoted = !NILP (tem);
2593 /* if the eighth element of the list is nil, we are in comment
2594 style a. If it is non-nil, we are in comment style b */
2595 oldstate = Fcdr (oldstate);
2596 oldstate = Fcdr (oldstate);
2597 tem = Fcar (oldstate);
2598 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2599 ? ST_COMMENT_STYLE : 1);
2601 oldstate = Fcdr (oldstate);
2602 tem = Fcar (oldstate);
2603 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2604 oldstate = Fcdr (oldstate);
2605 tem = Fcar (oldstate);
2606 while (!NILP (tem)) /* >= second enclosing sexps. */
2608 /* curlevel++->last ran into compiler bug on Apollo */
2609 curlevel->last = XINT (Fcar (tem));
2610 if (++curlevel == endlevel)
2611 curlevel--; /* error ("Nesting too deep for parser"); */
2612 curlevel->prev = -1;
2613 curlevel->last = -1;
2614 tem = Fcdr (tem);
2617 state.quoted = 0;
2618 mindepth = depth;
2620 curlevel->prev = -1;
2621 curlevel->last = -1;
2623 SETUP_SYNTAX_TABLE (prev_from, 1);
2624 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2625 UPDATE_SYNTAX_TABLE_FORWARD (from);
2627 /* Enter the loop at a place appropriate for initial state. */
2629 if (state.incomment)
2630 goto startincomment;
2631 if (state.instring >= 0)
2633 nofence = state.instring != ST_STRING_STYLE;
2634 if (start_quoted)
2635 goto startquotedinstring;
2636 goto startinstring;
2638 else if (start_quoted)
2639 goto startquoted;
2641 #if 0 /* This seems to be redundant with the identical code above. */
2642 SETUP_SYNTAX_TABLE (prev_from, 1);
2643 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2644 UPDATE_SYNTAX_TABLE_FORWARD (from);
2645 #endif
2647 while (from < end)
2649 INC_FROM;
2650 code = prev_from_syntax & 0xff;
2652 if (code == Scomment)
2654 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2655 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2656 1 : -1);
2657 state.comstr_start = prev_from;
2659 else if (code == Scomment_fence)
2661 /* Record the comment style we have entered so that only
2662 the comment-end sequence of the same style actually
2663 terminates the comment section. */
2664 state.comstyle = ST_COMMENT_STYLE;
2665 state.incomment = -1;
2666 state.comstr_start = prev_from;
2667 code = Scomment;
2669 else if (from < end)
2670 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2671 if (c1 = FETCH_CHAR (from_byte),
2672 SYNTAX_COMSTART_SECOND (c1))
2673 /* Duplicate code to avoid a complex if-expression
2674 which causes trouble for the SGI compiler. */
2676 /* Record the comment style we have entered so that only
2677 the comment-end sequence of the same style actually
2678 terminates the comment section. */
2679 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2680 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2681 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2682 state.incomment = comnested ? 1 : -1;
2683 state.comstr_start = prev_from;
2684 INC_FROM;
2685 code = Scomment;
2688 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2689 continue;
2690 switch (SWITCH_ENUM_CAST (code))
2692 case Sescape:
2693 case Scharquote:
2694 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2695 curlevel->last = prev_from;
2696 startquoted:
2697 if (from == end) goto endquoted;
2698 INC_FROM;
2699 goto symstarted;
2700 /* treat following character as a word constituent */
2701 case Sword:
2702 case Ssymbol:
2703 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2704 curlevel->last = prev_from;
2705 symstarted:
2706 while (from < end)
2708 /* Some compilers can't handle this inside the switch. */
2709 temp = SYNTAX (FETCH_CHAR (from_byte));
2710 switch (temp)
2712 case Scharquote:
2713 case Sescape:
2714 INC_FROM;
2715 if (from == end) goto endquoted;
2716 break;
2717 case Sword:
2718 case Ssymbol:
2719 case Squote:
2720 break;
2721 default:
2722 goto symdone;
2724 INC_FROM;
2726 symdone:
2727 curlevel->prev = curlevel->last;
2728 break;
2730 case Scomment:
2731 if (commentstop || boundary_stop) goto done;
2732 startincomment:
2733 /* The (from == BEGV) test was to enter the loop in the middle so
2734 that we find a 2-char comment ender even if we start in the
2735 middle of it. We don't want to do that if we're just at the
2736 beginning of the comment (think of (*) ... (*)). */
2737 found = forw_comment (from, from_byte, end,
2738 state.incomment, state.comstyle,
2739 (from == BEGV || from < state.comstr_start + 3)
2740 ? 0 : prev_from_syntax,
2741 &out_charpos, &out_bytepos, &state.incomment);
2742 from = out_charpos; from_byte = out_bytepos;
2743 /* Beware! prev_from and friends are invalid now.
2744 Luckily, the `done' doesn't use them and the INC_FROM
2745 sets them to a sane value without looking at them. */
2746 if (!found) goto done;
2747 INC_FROM;
2748 state.incomment = 0;
2749 state.comstyle = 0; /* reset the comment style */
2750 if (boundary_stop) goto done;
2751 break;
2753 case Sopen:
2754 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2755 depth++;
2756 /* curlevel++->last ran into compiler bug on Apollo */
2757 curlevel->last = prev_from;
2758 if (++curlevel == endlevel)
2759 curlevel--; /* error ("Nesting too deep for parser"); */
2760 curlevel->prev = -1;
2761 curlevel->last = -1;
2762 if (targetdepth == depth) goto done;
2763 break;
2765 case Sclose:
2766 depth--;
2767 if (depth < mindepth)
2768 mindepth = depth;
2769 if (curlevel != levelstart)
2770 curlevel--;
2771 curlevel->prev = curlevel->last;
2772 if (targetdepth == depth) goto done;
2773 break;
2775 case Sstring:
2776 case Sstring_fence:
2777 state.comstr_start = from - 1;
2778 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2779 curlevel->last = prev_from;
2780 state.instring = (code == Sstring
2781 ? (FETCH_CHAR (prev_from_byte))
2782 : ST_STRING_STYLE);
2783 if (boundary_stop) goto done;
2784 startinstring:
2786 nofence = state.instring != ST_STRING_STYLE;
2788 while (1)
2790 int c;
2792 if (from >= end) goto done;
2793 c = FETCH_CHAR (from_byte);
2794 /* Some compilers can't handle this inside the switch. */
2795 temp = SYNTAX (c);
2797 /* Check TEMP here so that if the char has
2798 a syntax-table property which says it is NOT
2799 a string character, it does not end the string. */
2800 if (nofence && c == state.instring && temp == Sstring)
2801 break;
2803 switch (temp)
2805 case Sstring_fence:
2806 if (!nofence) goto string_end;
2807 break;
2808 case Scharquote:
2809 case Sescape:
2810 INC_FROM;
2811 startquotedinstring:
2812 if (from >= end) goto endquoted;
2814 INC_FROM;
2817 string_end:
2818 state.instring = -1;
2819 curlevel->prev = curlevel->last;
2820 INC_FROM;
2821 if (boundary_stop) goto done;
2822 break;
2824 case Smath:
2825 break;
2828 goto done;
2830 stop: /* Here if stopping before start of sexp. */
2831 from = prev_from; /* We have just fetched the char that starts it; */
2832 goto done; /* but return the position before it. */
2834 endquoted:
2835 state.quoted = 1;
2836 done:
2837 state.depth = depth;
2838 state.mindepth = mindepth;
2839 state.thislevelstart = curlevel->prev;
2840 state.prevlevelstart
2841 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2842 state.location = from;
2843 state.levelstarts = Qnil;
2844 while (--curlevel >= levelstart)
2845 state.levelstarts = Fcons (make_number (curlevel->last),
2846 state.levelstarts);
2847 immediate_quit = 0;
2849 *stateptr = state;
2852 /* This comment supplies the doc string for parse-partial-sexp,
2853 for make-docfile to see. We cannot put this in the real DEFUN
2854 due to limits in the Unix cpp.
2856 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2857 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2858 Parsing stops at TO or when certain criteria are met;\n\
2859 point is set to where parsing stops.\n\
2860 If fifth arg STATE is omitted or nil,\n\
2861 parsing assumes that FROM is the beginning of a function.\n\
2862 Value is a list of ten elements describing final state of parsing:\n\
2863 0. depth in parens.\n\
2864 1. character address of start of innermost containing list; nil if none.\n\
2865 2. character address of start of last complete sexp terminated.\n\
2866 3. non-nil if inside a string.\n\
2867 (it is the character that will terminate the string,\n\
2868 or t if the string should be terminated by a generic string delimiter.)\n\
2869 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2870 else an integer (the current comment nesting).\n\
2871 5. t if following a quote character.\n\
2872 6. the minimum paren-depth encountered during this scan.\n\
2873 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
2874 should be terminated by a generic comment delimiter.\n\
2875 8. character address of start of comment or string; nil if not in one.\n\
2876 9. Intermediate data for continuation of parsing (subject to change).\n\
2877 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2878 in parentheses becomes equal to TARGETDEPTH.\n\
2879 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2880 any character that starts a sexp.\n\
2881 Fifth arg STATE is a nine-element list like what this function returns.\n\
2882 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2883 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2884 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2885 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2886 string, or after end of a comment or a string.")
2887 (from, to, targetdepth, stopbefore, state, commentstop)
2890 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2891 0 /* See immediately above */)
2892 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2893 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2895 struct lisp_parse_state state;
2896 int target;
2898 if (!NILP (targetdepth))
2900 CHECK_NUMBER (targetdepth, 3);
2901 target = XINT (targetdepth);
2903 else
2904 target = -100000; /* We won't reach this depth */
2906 validate_region (&from, &to);
2907 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2908 XINT (to),
2909 target, !NILP (stopbefore), oldstate,
2910 (NILP (commentstop)
2911 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2913 SET_PT (state.location);
2915 return Fcons (make_number (state.depth),
2916 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2917 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2918 Fcons (state.instring >= 0
2919 ? (state.instring == ST_STRING_STYLE
2920 ? Qt : make_number (state.instring)) : Qnil,
2921 Fcons (state.incomment < 0 ? Qt :
2922 (state.incomment == 0 ? Qnil :
2923 make_number (state.incomment)),
2924 Fcons (state.quoted ? Qt : Qnil,
2925 Fcons (make_number (state.mindepth),
2926 Fcons ((state.comstyle
2927 ? (state.comstyle == ST_COMMENT_STYLE
2928 ? Qsyntax_table : Qt) :
2929 Qnil),
2930 Fcons (((state.incomment
2931 || (state.instring >= 0))
2932 ? make_number (state.comstr_start)
2933 : Qnil),
2934 Fcons (state.levelstarts, Qnil))))))))));
2937 void
2938 init_syntax_once ()
2940 register int i, c;
2941 Lisp_Object temp;
2943 /* This has to be done here, before we call Fmake_char_table. */
2944 Qsyntax_table = intern ("syntax-table");
2945 staticpro (&Qsyntax_table);
2947 /* Intern this now in case it isn't already done.
2948 Setting this variable twice is harmless.
2949 But don't staticpro it here--that is done in alloc.c. */
2950 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2952 /* Create objects which can be shared among syntax tables. */
2953 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
2954 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2955 XVECTOR (Vsyntax_code_object)->contents[i]
2956 = Fcons (make_number (i), Qnil);
2958 /* Now we are ready to set up this property, so we can
2959 create syntax tables. */
2960 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2962 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2964 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2966 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2967 for (i = 'a'; i <= 'z'; i++)
2968 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2969 for (i = 'A'; i <= 'Z'; i++)
2970 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2971 for (i = '0'; i <= '9'; i++)
2972 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2974 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2975 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2977 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2978 Fcons (make_number (Sopen), make_number (')')));
2979 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2980 Fcons (make_number (Sclose), make_number ('(')));
2981 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2982 Fcons (make_number (Sopen), make_number (']')));
2983 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2984 Fcons (make_number (Sclose), make_number ('[')));
2985 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2986 Fcons (make_number (Sopen), make_number ('}')));
2987 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2988 Fcons (make_number (Sclose), make_number ('{')));
2989 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2990 Fcons (make_number ((int) Sstring), Qnil));
2991 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2992 Fcons (make_number ((int) Sescape), Qnil));
2994 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2995 for (i = 0; i < 10; i++)
2997 c = "_-+*/&|<>="[i];
2998 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3001 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3002 for (i = 0; i < 12; i++)
3004 c = ".,;:?!#@~^'`"[i];
3005 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3008 /* All multibyte characters have syntax `word' by default. */
3009 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3010 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3011 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
3014 void
3015 syms_of_syntax ()
3017 Qsyntax_table_p = intern ("syntax-table-p");
3018 staticpro (&Qsyntax_table_p);
3020 staticpro (&Vsyntax_code_object);
3022 Qscan_error = intern ("scan-error");
3023 staticpro (&Qscan_error);
3024 Fput (Qscan_error, Qerror_conditions,
3025 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
3026 Fput (Qscan_error, Qerror_message,
3027 build_string ("Scan error"));
3029 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3030 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
3032 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3033 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
3034 The value of this property should be either a syntax table, or a cons\n\
3035 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
3036 syntax code, MATCHCHAR being nil or the character to match (which is\n\
3037 relevant only for open/close type.");
3039 words_include_escapes = 0;
3040 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3041 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
3043 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3044 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
3045 multibyte_syntax_as_symbol = 0;
3047 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3048 &open_paren_in_column_0_is_defun_start,
3049 "Non-nil means an open paren in column 0 denotes the start of a defun.");
3050 open_paren_in_column_0_is_defun_start = 1;
3052 defsubr (&Ssyntax_table_p);
3053 defsubr (&Ssyntax_table);
3054 defsubr (&Sstandard_syntax_table);
3055 defsubr (&Scopy_syntax_table);
3056 defsubr (&Sset_syntax_table);
3057 defsubr (&Schar_syntax);
3058 defsubr (&Smatching_paren);
3059 defsubr (&Sstring_to_syntax);
3060 defsubr (&Smodify_syntax_entry);
3061 defsubr (&Sdescribe_syntax);
3063 defsubr (&Sforward_word);
3065 defsubr (&Sskip_chars_forward);
3066 defsubr (&Sskip_chars_backward);
3067 defsubr (&Sskip_syntax_forward);
3068 defsubr (&Sskip_syntax_backward);
3070 defsubr (&Sforward_comment);
3071 defsubr (&Sscan_lists);
3072 defsubr (&Sscan_sexps);
3073 defsubr (&Sbackward_prefix_chars);
3074 defsubr (&Sparse_partial_sexp);