(back_comment): Use one switch rather than a few `if's.
[emacs.git] / src / syntax.c
blob43081e279456f674db9be5310953d484e31a09d5
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
32 #include "syntax.h"
33 #include "intervals.h"
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
45 int words_include_escapes;
46 int parse_sexp_lookup_properties;
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol;
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
60 int open_paren_in_column_0_is_defun_start;
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
64 struct lisp_parse_state
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
68 int incomment; /* -1 if in unnestable comment else comment nesting */
69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
73 int location; /* Char number at which parsing stopped. */
74 int mindepth; /* Minimum depth seen while scanning. */
75 int comstr_start; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
88 static int find_start_pos;
89 static int find_start_value;
90 static int find_start_value_byte;
91 static struct buffer *find_start_buffer;
92 static int find_start_begv;
93 static int find_start_modiff;
96 static int find_defun_start P_ ((int, int));
97 static int back_comment P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted P_ ((int, int));
99 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100 static Lisp_Object scan_lists P_ ((int, int, int, int));
101 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
106 struct gl_state_s gl_state; /* Global state of syntax parser. */
108 INTERVAL interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
125 void
126 update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
128 Lisp_Object object;
130 Lisp_Object tmp_table;
131 int cnt = 0, invalidate = 1;
132 INTERVAL i, oldi;
134 if (init)
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
138 gl_state.forward_i = interval_of (charpos, object);
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
148 if (AM_RIGHT_CHILD (i))
149 INTERVAL_PARENT (i)->position = i->position
150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
153 else
154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
155 + TOTAL_LENGTH (i);
156 i = INTERVAL_PARENT (i);
158 i = gl_state.forward_i;
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
161 goto update;
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
165 /* We are guarantied to be called with CHARPOS either in i,
166 or further off. */
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos < i->position) /* Move left. */
171 if (count > 0)
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i = update_interval (i, charpos);
175 if (oldi->position != INTERVAL_LAST_POS (i))
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
185 if (count < 0)
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i = update_interval (i, charpos);
189 if (i->position != INTERVAL_LAST_POS (oldi))
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
194 gl_state.b_property = i->position - 1 - gl_state.offset;
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
212 if (count > 0)
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
216 gl_state.b_property = i->position - 1 - gl_state.offset;
218 else
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
230 gl_state.use_global = 0;
232 else if (CONSP (tmp_table))
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
237 else
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
243 while (!NULL_INTERVAL_P (i))
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
253 else if (cnt == INTERVALS_AT_ONCE)
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
271 else
273 if (count > 0)
275 gl_state.e_property = i->position - gl_state.offset;
276 gl_state.forward_i = i;
278 else
280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
281 gl_state.backward_i = i;
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
290 static int
291 char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
297 int orig = charpos;
299 DEC_BOTH (charpos, bytepos);
301 while (bytepos >= beg)
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
312 UPDATE_SYNTAX_TABLE (orig);
313 return quoted;
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
319 INLINE int
320 inc_bytepos (bytepos)
321 int bytepos;
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
326 INC_POS (bytepos);
327 return bytepos;
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
333 INLINE int
334 dec_bytepos (bytepos)
335 int bytepos;
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
340 DEC_POS (bytepos);
341 return bytepos;
344 /* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
346 can return the same value right away.
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
352 static int
353 find_defun_start (pos, pos_byte)
354 int pos, pos_byte;
356 int opoint = PT, opoint_byte = PT_BYTE;
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer == find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos <= find_start_pos + 1000
364 && pos >= find_start_value
365 && BEGV == find_start_begv
366 && MODIFF == find_start_modiff)
367 return find_start_value;
369 /* Back up to start of line. */
370 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
374 syntax-tables. */
375 gl_state.current_syntax_table = current_buffer->syntax_table;
376 gl_state.use_global = 0;
377 if (open_paren_in_column_0_is_defun_start)
379 while (PT > BEGV)
381 /* Open-paren at start of line means we may have found our
382 defun-start. */
383 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
385 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
387 break;
388 /* Now fallback to the default value. */
389 gl_state.current_syntax_table = current_buffer->syntax_table;
390 gl_state.use_global = 0;
392 /* Move to beg of previous line. */
393 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
397 /* Record what we found, for the next try. */
398 find_start_value = PT;
399 find_start_value_byte = PT_BYTE;
400 find_start_buffer = current_buffer;
401 find_start_modiff = MODIFF;
402 find_start_begv = BEGV;
403 find_start_pos = pos;
405 TEMP_SET_PT_BOTH (opoint, opoint_byte);
407 return find_start_value;
410 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
412 static int
413 prev_char_comend_first (pos, pos_byte)
414 int pos, pos_byte;
416 int c, val;
418 DEC_BOTH (pos, pos_byte);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
420 c = FETCH_CHAR (pos_byte);
421 val = SYNTAX_COMEND_FIRST (c);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
423 return val;
426 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
428 static int
429 prev_char_comstart_first (pos, pos_byte)
430 int pos, pos_byte;
432 int c, val;
434 DEC_BOTH (pos, pos_byte);
435 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
436 c = FETCH_CHAR (pos_byte);
437 val = SYNTAX_COMSTART_FIRST (c);
438 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
439 return val;
442 /* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
447 return -1 otherwise.
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
455 static int
456 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
457 int from, from_byte, stop;
458 int comnested, comstyle;
459 int *charpos_ptr, *bytepos_ptr;
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
470 int string_style = -1; /* Presumed outside of any string. */
471 int string_lossage = 0;
472 int comment_end = from;
473 int comment_end_byte = from_byte;
474 int comstart_pos = 0;
475 int comstart_byte;
476 /* Value that PARITY had, when we reached the position
477 in COMSTART_POS. */
478 int scanstart = from - 1;
479 /* Place where the containing defun starts,
480 or 0 if we didn't come across it yet. */
481 int defun_start = 0;
482 int defun_start_byte = 0;
483 register enum syntaxcode code;
484 int nesting = 1; /* current comment nesting */
485 int c;
487 /* At beginning of range to scan, we're outside of strings;
488 that determines quote parity to the comment-end. */
489 while (from != stop)
491 int temp_byte;
493 /* Move back and examine a character. */
494 DEC_BOTH (from, from_byte);
495 UPDATE_SYNTAX_TABLE_BACKWARD (from);
497 c = FETCH_CHAR (from_byte);
498 code = SYNTAX (c);
500 /* If this char is the second of a 2-char comment end sequence,
501 back up and give the pair the appropriate syntax. */
502 if (from > stop && SYNTAX_COMEND_SECOND (c)
503 && prev_char_comend_first (from, from_byte))
505 code = Sendcomment;
506 DEC_BOTH (from, from_byte);
507 UPDATE_SYNTAX_TABLE_BACKWARD (from);
508 c = FETCH_CHAR (from_byte);
511 /* If this char starts a 2-char comment start sequence,
512 treat it like a 1-char comment starter. */
513 if (from < scanstart && SYNTAX_COMSTART_FIRST (c))
515 temp_byte = inc_bytepos (from_byte);
516 UPDATE_SYNTAX_TABLE_FORWARD (from + 1);
517 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte))
518 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte)))
519 code = Scomment;
520 UPDATE_SYNTAX_TABLE_BACKWARD (from);
522 else if (code == Scomment && comstyle != SYNTAX_COMMENT_STYLE (c))
523 /* Ignore comment starters of a different style. */
524 continue;
526 /* Ignore escaped characters, except comment-enders. */
527 if (code != Sendcomment && char_quoted (from, from_byte))
528 continue;
530 switch (code)
532 case Sstring_fence:
533 case Scomment_fence:
534 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
535 case Sstring:
536 /* Track parity of quotes. */
537 if (string_style == -1)
538 /* Entering a string. */
539 string_style = c;
540 else if (string_style == c)
541 /* Leaving the string. */
542 string_style = -1;
543 else
544 /* If we have two kinds of string delimiters.
545 There's no way to grok this scanning backwards. */
546 string_lossage = 1;
547 break;
549 case Scomment:
550 /* We've already checked that it is the relevant comstyle. */
551 if (string_style != -1 || string_lossage)
552 /* There are odd string quotes involved, so let's be careful.
553 Test case in Pascal: " { " a { " } */
554 goto lossage;
556 if (comnested && --nesting <= 0)
557 /* nested comments have to be balanced, so we don't need to
558 keep looking for earlier ones. We use here the same (slightly
559 incorrect) reasoning as below: since it is followed by uniform
560 paired string quotes, this comment-start has to be outside of
561 strings, else the comment-end itself would be inside a string. */
562 goto done;
564 /* Record comment-starters according to that
565 quote-parity to the comment-end. */
566 comstart_pos = from;
567 comstart_byte = from_byte;
568 break;
570 case Sendcomment:
571 if (SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
572 /* This is the same style of comment ender as ours. */
574 if (comnested)
575 nesting++;
576 else
577 /* Anything before that can't count because it would match
578 this comment-ender rather than ours. */
579 from = stop; /* Break out of the loop. */
581 break;
583 case Sopen:
584 /* Assume a defun-start point is outside of strings. */
585 if (open_paren_in_column_0_is_defun_start
586 && (from == stop
587 || (temp_byte = dec_bytepos (from_byte),
588 FETCH_CHAR (temp_byte) == '\n')))
590 defun_start = from;
591 defun_start_byte = from_byte;
592 from = stop; /* Break out of the loop. */
594 break;
596 default:
600 if (comstart_pos == 0)
602 from = comment_end;
603 from_byte = comment_end_byte;
604 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
606 /* If the earliest comment starter
607 is followed by uniform paired string quotes or none,
608 we know it can't be inside a string
609 since if it were then the comment ender would be inside one.
610 So it does start a comment. Skip back to it. */
611 else if (!comnested)
613 from = comstart_pos;
614 from_byte = comstart_byte;
615 /* Globals are correct now. */
617 else
619 struct lisp_parse_state state;
620 lossage:
621 /* We had two kinds of string delimiters mixed up
622 together. Decode this going forwards.
623 Scan fwd from a known safe place (beginning-of-defun)
624 to the one in question; this records where we
625 last passed a comment starter. */
626 /* If we did not already find the defun start, find it now. */
627 if (defun_start == 0)
629 defun_start = find_defun_start (comment_end, comment_end_byte);
630 defun_start_byte = find_start_value_byte;
634 scan_sexps_forward (&state,
635 defun_start, defun_start_byte,
636 comment_end, -10000, 0, Qnil, 0);
637 defun_start = comment_end;
638 if (state.incomment == (comnested ? 1 : -1)
639 && state.comstyle == comstyle)
640 from = state.comstr_start;
641 else
643 from = comment_end;
644 if (state.incomment)
645 /* If comment_end is inside some other comment, maybe ours
646 is nested, so we need to try again from within the
647 surrounding comment. Example: { a (* " *) */
649 /* FIXME: We should advance by one or two chars. */
650 defun_start = state.comstr_start + 2;
651 defun_start_byte = CHAR_TO_BYTE (defun_start);
654 } while (defun_start < comment_end);
656 from_byte = CHAR_TO_BYTE (from);
657 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
660 done:
661 *charpos_ptr = from;
662 *bytepos_ptr = from_byte;
664 return (from == comment_end) ? -1 : from;
667 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
668 "Return t if OBJECT is a syntax table.\n\
669 Currently, any char-table counts as a syntax table.")
670 (object)
671 Lisp_Object object;
673 if (CHAR_TABLE_P (object)
674 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
675 return Qt;
676 return Qnil;
679 static void
680 check_syntax_table (obj)
681 Lisp_Object obj;
683 if (!(CHAR_TABLE_P (obj)
684 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
685 wrong_type_argument (Qsyntax_table_p, obj);
688 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
689 "Return the current syntax table.\n\
690 This is the one specified by the current buffer.")
693 return current_buffer->syntax_table;
696 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
697 Sstandard_syntax_table, 0, 0, 0,
698 "Return the standard syntax table.\n\
699 This is the one used for new buffers.")
702 return Vstandard_syntax_table;
705 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
706 "Construct a new syntax table and return it.\n\
707 It is a copy of the TABLE, which defaults to the standard syntax table.")
708 (table)
709 Lisp_Object table;
711 Lisp_Object copy;
713 if (!NILP (table))
714 check_syntax_table (table);
715 else
716 table = Vstandard_syntax_table;
718 copy = Fcopy_sequence (table);
720 /* Only the standard syntax table should have a default element.
721 Other syntax tables should inherit from parents instead. */
722 XCHAR_TABLE (copy)->defalt = Qnil;
724 /* Copied syntax tables should all have parents.
725 If we copied one with no parent, such as the standard syntax table,
726 use the standard syntax table as the copy's parent. */
727 if (NILP (XCHAR_TABLE (copy)->parent))
728 Fset_char_table_parent (copy, Vstandard_syntax_table);
729 return copy;
732 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
733 "Select a new syntax table for the current buffer.\n\
734 One argument, a syntax table.")
735 (table)
736 Lisp_Object table;
738 int idx;
739 check_syntax_table (table);
740 current_buffer->syntax_table = table;
741 /* Indicate that this buffer now has a specified syntax table. */
742 idx = PER_BUFFER_VAR_IDX (syntax_table);
743 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
744 return table;
747 /* Convert a letter which signifies a syntax code
748 into the code it signifies.
749 This is used by modify-syntax-entry, and other things. */
751 unsigned char syntax_spec_code[0400] =
752 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
753 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
754 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
755 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
756 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
757 (char) Smath, 0377, 0377, (char) Squote,
758 (char) Sopen, (char) Sclose, 0377, 0377,
759 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
760 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
761 0377, 0377, 0377, 0377,
762 (char) Scomment, 0377, (char) Sendcomment, 0377,
763 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
764 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
765 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
766 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
767 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
768 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
769 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
770 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
773 /* Indexed by syntax code, give the letter that describes it. */
775 char syntax_code_spec[16] =
777 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
778 '!', '|'
781 /* Indexed by syntax code, give the object (cons of syntax code and
782 nil) to be stored in syntax table. Since these objects can be
783 shared among syntax tables, we generate them in advance. By
784 sharing objects, the function `describe-syntax' can give a more
785 compact listing. */
786 static Lisp_Object Vsyntax_code_object;
789 /* Look up the value for CHARACTER in syntax table TABLE's parent
790 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
791 for CHARACTER. It's actually used only when not compiled with GCC. */
793 Lisp_Object
794 syntax_parent_lookup (table, character)
795 Lisp_Object table;
796 int character;
798 Lisp_Object value;
800 while (1)
802 table = XCHAR_TABLE (table)->parent;
803 if (NILP (table))
804 return Qnil;
806 value = XCHAR_TABLE (table)->contents[character];
807 if (!NILP (value))
808 return value;
812 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
813 "Return the syntax code of CHARACTER, described by a character.\n\
814 For example, if CHARACTER is a word constituent,\n\
815 the character `w' is returned.\n\
816 The characters that correspond to various syntax codes\n\
817 are listed in the documentation of `modify-syntax-entry'.")
818 (character)
819 Lisp_Object character;
821 int char_int;
822 gl_state.current_syntax_table = current_buffer->syntax_table;
824 gl_state.use_global = 0;
825 CHECK_NUMBER (character, 0);
826 char_int = XINT (character);
827 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
830 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
831 "Return the matching parenthesis of CHARACTER, or nil if none.")
832 (character)
833 Lisp_Object character;
835 int char_int, code;
836 gl_state.current_syntax_table = current_buffer->syntax_table;
837 gl_state.use_global = 0;
838 CHECK_NUMBER (character, 0);
839 char_int = XINT (character);
840 code = SYNTAX (char_int);
841 if (code == Sopen || code == Sclose)
842 return SYNTAX_MATCH (char_int);
843 return Qnil;
846 /* This comment supplies the doc string for modify-syntax-entry,
847 for make-docfile to see. We cannot put this in the real DEFUN
848 due to limits in the Unix cpp.
850 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
851 "Set syntax for character CHAR according to string S.\n\
852 The syntax is changed only for table TABLE, which defaults to\n\
853 the current buffer's syntax table.\n\
854 The first character of S should be one of the following:\n\
855 Space or - whitespace syntax. w word constituent.\n\
856 _ symbol constituent. . punctuation.\n\
857 ( open-parenthesis. ) close-parenthesis.\n\
858 \" string quote. \\ escape.\n\
859 $ paired delimiter. ' expression quote or prefix operator.\n\
860 < comment starter. > comment ender.\n\
861 / character-quote. @ inherit from `standard-syntax-table'.\n\
863 Only single-character comment start and end sequences are represented thus.\n\
864 Two-character sequences are represented as described below.\n\
865 The second character of S is the matching parenthesis,\n\
866 used only if the first character is `(' or `)'.\n\
867 Any additional characters are flags.\n\
868 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
869 1 means CHAR is the start of a two-char comment start sequence.\n\
870 2 means CHAR is the second character of such a sequence.\n\
871 3 means CHAR is the start of a two-char comment end sequence.\n\
872 4 means CHAR is the second character of such a sequence.\n\
874 There can be up to two orthogonal comment sequences. This is to support\n\
875 language modes such as C++. By default, all comment sequences are of style\n\
876 a, but you can set the comment sequence style to b (on the second character\n\
877 of a comment-start, or the first character of a comment-end sequence) using\n\
878 this flag:\n\
879 b means CHAR is part of comment sequence b.\n\
880 n means CHAR is part of a nestable comment sequence.\n\
882 p means CHAR is a prefix character for `backward-prefix-chars';\n\
883 such characters are treated as whitespace when they occur\n\
884 between expressions.")
885 (char, s, table)
888 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
889 /* I really don't know why this is interactive
890 help-form should at least be made useful whilst reading the second arg
892 "cSet syntax for character: \nsSet syntax for %s to: ",
893 0 /* See immediately above */)
894 (c, newentry, syntax_table)
895 Lisp_Object c, newentry, syntax_table;
897 register unsigned char *p;
898 register enum syntaxcode code;
899 int val;
900 Lisp_Object match;
902 CHECK_NUMBER (c, 0);
903 CHECK_STRING (newentry, 1);
905 if (NILP (syntax_table))
906 syntax_table = current_buffer->syntax_table;
907 else
908 check_syntax_table (syntax_table);
910 p = XSTRING (newentry)->data;
911 code = (enum syntaxcode) syntax_spec_code[*p++];
912 if (((int) code & 0377) == 0377)
913 error ("invalid syntax description letter: %c", p[-1]);
915 if (code == Sinherit)
917 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
918 return Qnil;
921 if (*p)
923 int len;
924 int character = (STRING_CHAR_AND_LENGTH
925 (p, STRING_BYTES (XSTRING (newentry)) - 1, len));
926 XSETINT (match, character);
927 if (XFASTINT (match) == ' ')
928 match = Qnil;
929 p += len;
931 else
932 match = Qnil;
934 val = (int) code;
935 while (*p)
936 switch (*p++)
938 case '1':
939 val |= 1 << 16;
940 break;
942 case '2':
943 val |= 1 << 17;
944 break;
946 case '3':
947 val |= 1 << 18;
948 break;
950 case '4':
951 val |= 1 << 19;
952 break;
954 case 'p':
955 val |= 1 << 20;
956 break;
958 case 'b':
959 val |= 1 << 21;
960 break;
962 case 'n':
963 val |= 1 << 22;
964 break;
967 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
968 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
969 else
970 /* Since we can't use a shared object, let's make a new one. */
971 newentry = Fcons (make_number (val), match);
973 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
975 return Qnil;
978 /* Dump syntax table to buffer in human-readable format */
980 static void
981 describe_syntax (value)
982 Lisp_Object value;
984 register enum syntaxcode code;
985 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
986 char str[2];
987 Lisp_Object first, match_lisp;
989 Findent_to (make_number (16), make_number (1));
991 if (NILP (value))
993 insert_string ("default\n");
994 return;
997 if (CHAR_TABLE_P (value))
999 insert_string ("deeper char-table ...\n");
1000 return;
1003 if (!CONSP (value))
1005 insert_string ("invalid\n");
1006 return;
1009 first = XCAR (value);
1010 match_lisp = XCDR (value);
1012 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1014 insert_string ("invalid\n");
1015 return;
1018 code = (enum syntaxcode) (XINT (first) & 0377);
1019 start1 = (XINT (first) >> 16) & 1;
1020 start2 = (XINT (first) >> 17) & 1;
1021 end1 = (XINT (first) >> 18) & 1;
1022 end2 = (XINT (first) >> 19) & 1;
1023 prefix = (XINT (first) >> 20) & 1;
1024 comstyle = (XINT (first) >> 21) & 1;
1025 comnested = (XINT (first) >> 22) & 1;
1027 if ((int) code < 0 || (int) code >= (int) Smax)
1029 insert_string ("invalid");
1030 return;
1032 desc = syntax_code_spec[(int) code];
1034 str[0] = desc, str[1] = 0;
1035 insert (str, 1);
1037 if (NILP (match_lisp))
1038 insert (" ", 1);
1039 else
1040 insert_char (XINT (match_lisp));
1042 if (start1)
1043 insert ("1", 1);
1044 if (start2)
1045 insert ("2", 1);
1047 if (end1)
1048 insert ("3", 1);
1049 if (end2)
1050 insert ("4", 1);
1052 if (prefix)
1053 insert ("p", 1);
1054 if (comstyle)
1055 insert ("b", 1);
1056 if (comnested)
1057 insert ("n", 1);
1059 insert_string ("\twhich means: ");
1061 switch (SWITCH_ENUM_CAST (code))
1063 case Swhitespace:
1064 insert_string ("whitespace"); break;
1065 case Spunct:
1066 insert_string ("punctuation"); break;
1067 case Sword:
1068 insert_string ("word"); break;
1069 case Ssymbol:
1070 insert_string ("symbol"); break;
1071 case Sopen:
1072 insert_string ("open"); break;
1073 case Sclose:
1074 insert_string ("close"); break;
1075 case Squote:
1076 insert_string ("quote"); break;
1077 case Sstring:
1078 insert_string ("string"); break;
1079 case Smath:
1080 insert_string ("math"); break;
1081 case Sescape:
1082 insert_string ("escape"); break;
1083 case Scharquote:
1084 insert_string ("charquote"); break;
1085 case Scomment:
1086 insert_string ("comment"); break;
1087 case Sendcomment:
1088 insert_string ("endcomment"); break;
1089 default:
1090 insert_string ("invalid");
1091 return;
1094 if (!NILP (match_lisp))
1096 insert_string (", matches ");
1097 insert_char (XINT (match_lisp));
1100 if (start1)
1101 insert_string (",\n\t is the first character of a comment-start sequence");
1102 if (start2)
1103 insert_string (",\n\t is the second character of a comment-start sequence");
1105 if (end1)
1106 insert_string (",\n\t is the first character of a comment-end sequence");
1107 if (end2)
1108 insert_string (",\n\t is the second character of a comment-end sequence");
1109 if (comstyle)
1110 insert_string (" (comment style b)");
1111 if (comnested)
1112 insert_string (" (nestable)");
1114 if (prefix)
1115 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1117 insert_string ("\n");
1120 static Lisp_Object
1121 describe_syntax_1 (vector)
1122 Lisp_Object vector;
1124 struct buffer *old = current_buffer;
1125 set_buffer_internal (XBUFFER (Vstandard_output));
1126 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
1127 while (! NILP (XCHAR_TABLE (vector)->parent))
1129 vector = XCHAR_TABLE (vector)->parent;
1130 insert_string ("\nThe parent syntax table is:");
1131 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1132 (int *) 0, 0);
1135 call0 (intern ("help-mode"));
1136 set_buffer_internal (old);
1137 return Qnil;
1140 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1141 "Describe the syntax specifications in the syntax table.\n\
1142 The descriptions are inserted in a buffer, which is then displayed.")
1145 internal_with_output_to_temp_buffer
1146 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1148 return Qnil;
1151 int parse_sexp_ignore_comments;
1153 /* Return the position across COUNT words from FROM.
1154 If that many words cannot be found before the end of the buffer, return 0.
1155 COUNT negative means scan backward and stop at word beginning. */
1158 scan_words (from, count)
1159 register int from, count;
1161 register int beg = BEGV;
1162 register int end = ZV;
1163 register int from_byte = CHAR_TO_BYTE (from);
1164 register enum syntaxcode code;
1165 int ch0, ch1;
1167 immediate_quit = 1;
1168 QUIT;
1170 SETUP_SYNTAX_TABLE (from, count);
1172 while (count > 0)
1174 while (1)
1176 if (from == end)
1178 immediate_quit = 0;
1179 return 0;
1181 UPDATE_SYNTAX_TABLE_FORWARD (from);
1182 ch0 = FETCH_CHAR (from_byte);
1183 code = SYNTAX (ch0);
1184 INC_BOTH (from, from_byte);
1185 if (words_include_escapes
1186 && (code == Sescape || code == Scharquote))
1187 break;
1188 if (code == Sword)
1189 break;
1191 /* Now CH0 is a character which begins a word and FROM is the
1192 position of the next character. */
1193 while (1)
1195 if (from == end) break;
1196 UPDATE_SYNTAX_TABLE_FORWARD (from);
1197 ch1 = FETCH_CHAR (from_byte);
1198 code = SYNTAX (ch1);
1199 if (!(words_include_escapes
1200 && (code == Sescape || code == Scharquote)))
1201 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1202 break;
1203 INC_BOTH (from, from_byte);
1204 ch0 = ch1;
1206 count--;
1208 while (count < 0)
1210 while (1)
1212 if (from == beg)
1214 immediate_quit = 0;
1215 return 0;
1217 DEC_BOTH (from, from_byte);
1218 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1219 ch1 = FETCH_CHAR (from_byte);
1220 code = SYNTAX (ch1);
1221 if (words_include_escapes
1222 && (code == Sescape || code == Scharquote))
1223 break;
1224 if (code == Sword)
1225 break;
1227 /* Now CH1 is a character which ends a word and FROM is the
1228 position of it. */
1229 while (1)
1231 int temp_byte;
1233 if (from == beg)
1234 break;
1235 temp_byte = dec_bytepos (from_byte);
1236 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1237 ch0 = FETCH_CHAR (temp_byte);
1238 code = SYNTAX (ch0);
1239 if (!(words_include_escapes
1240 && (code == Sescape || code == Scharquote)))
1241 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1242 break;
1243 DEC_BOTH (from, from_byte);
1244 ch1 = ch0;
1246 count++;
1249 immediate_quit = 0;
1251 return from;
1254 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1255 "Move point forward ARG words (backward if ARG is negative).\n\
1256 Normally returns t.\n\
1257 If an edge of the buffer or a field boundary is reached, point is left there\n\
1258 and the function returns nil. Field boundaries are not noticed if\n\
1259 `inhibit-field-text-motion' is non-nil.")
1260 (count)
1261 Lisp_Object count;
1263 int orig_val, val;
1264 CHECK_NUMBER (count, 0);
1266 val = orig_val = scan_words (PT, XINT (count));
1267 if (! orig_val)
1268 val = XINT (count) > 0 ? ZV : BEGV;
1270 /* Avoid jumping out of an input field. */
1271 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
1272 Qt, Qnil));
1274 SET_PT (val);
1275 return val == orig_val ? Qt : Qnil;
1278 Lisp_Object skip_chars ();
1280 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1281 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1282 STRING is like the inside of a `[...]' in a regular expression\n\
1283 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1284 (but not as the end of a range; quoting is never needed there).\n\
1285 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1286 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1287 Returns the distance traveled, either zero or positive.")
1288 (string, lim)
1289 Lisp_Object string, lim;
1291 return skip_chars (1, 0, string, lim);
1294 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1295 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1296 See `skip-chars-forward' for details.\n\
1297 Returns the distance traveled, either zero or negative.")
1298 (string, lim)
1299 Lisp_Object string, lim;
1301 return skip_chars (0, 0, string, lim);
1304 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1305 "Move point forward across chars in specified syntax classes.\n\
1306 SYNTAX is a string of syntax code characters.\n\
1307 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1308 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1309 This function returns the distance traveled, either zero or positive.")
1310 (syntax, lim)
1311 Lisp_Object syntax, lim;
1313 return skip_chars (1, 1, syntax, lim);
1316 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1317 "Move point backward across chars in specified syntax classes.\n\
1318 SYNTAX is a string of syntax code characters.\n\
1319 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1320 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1321 This function returns the distance traveled, either zero or negative.")
1322 (syntax, lim)
1323 Lisp_Object syntax, lim;
1325 return skip_chars (0, 1, syntax, lim);
1328 static Lisp_Object
1329 skip_chars (forwardp, syntaxp, string, lim)
1330 int forwardp, syntaxp;
1331 Lisp_Object string, lim;
1333 register unsigned int c;
1334 register int ch;
1335 unsigned char fastmap[0400];
1336 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1337 of which codes don't fit in FASTMAP. In that case, we set the
1338 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1339 and set the actual ranges of characters in CHAR_RANGES. In the
1340 form "X-Y" of STRING, both X and Y must belong to the same
1341 character set because a range striding across character sets is
1342 meaningless. */
1343 int *char_ranges;
1344 int n_char_ranges = 0;
1345 int negate = 0;
1346 register int i, i_byte;
1347 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1348 int string_multibyte;
1349 int size_byte;
1351 CHECK_STRING (string, 0);
1352 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1353 string_multibyte = STRING_MULTIBYTE (string);
1354 size_byte = STRING_BYTES (XSTRING (string));
1356 if (NILP (lim))
1357 XSETINT (lim, forwardp ? ZV : BEGV);
1358 else
1359 CHECK_NUMBER_COERCE_MARKER (lim, 0);
1361 /* In any case, don't allow scan outside bounds of buffer. */
1362 if (XINT (lim) > ZV)
1363 XSETFASTINT (lim, ZV);
1364 if (XINT (lim) < BEGV)
1365 XSETFASTINT (lim, BEGV);
1367 bzero (fastmap, sizeof fastmap);
1369 i = 0, i_byte = 0;
1371 if (i_byte < size_byte
1372 && XSTRING (string)->data[0] == '^')
1374 negate = 1; i++, i_byte++;
1377 /* Find the characters specified and set their elements of fastmap.
1378 If syntaxp, each character counts as itself.
1379 Otherwise, handle backslashes and ranges specially. */
1381 while (i_byte < size_byte)
1383 int c_leading_code = XSTRING (string)->data[i_byte];
1385 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1387 /* Convert multibyteness between what the string has
1388 and what the buffer has. */
1389 if (multibyte)
1390 c = unibyte_char_to_multibyte (c);
1391 else
1392 c &= 0377;
1394 if (syntaxp)
1395 fastmap[syntax_spec_code[c & 0377]] = 1;
1396 else
1398 if (c == '\\')
1400 if (i_byte == size_byte)
1401 break;
1403 c_leading_code = XSTRING (string)->data[i_byte];
1404 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1406 if (i_byte < size_byte
1407 && XSTRING (string)->data[i_byte] == '-')
1409 unsigned int c2, c2_leading_code;
1411 /* Skip over the dash. */
1412 i++, i_byte++;
1414 if (i_byte == size_byte)
1415 break;
1417 /* Get the end of the range. */
1418 c2_leading_code = XSTRING (string)->data[i_byte];
1419 FETCH_STRING_CHAR_ADVANCE (c2, string, i, i_byte);
1421 if (SINGLE_BYTE_CHAR_P (c))
1423 if (! SINGLE_BYTE_CHAR_P (c2))
1424 error ("Invalid charcter range: %s",
1425 XSTRING (string)->data);
1426 while (c <= c2)
1428 fastmap[c] = 1;
1429 c++;
1432 else
1434 if (c_leading_code != c2_leading_code)
1435 error ("Invalid charcter range: %s",
1436 XSTRING (string)->data);
1437 fastmap[c_leading_code] = 1;
1438 if (c <= c2)
1440 char_ranges[n_char_ranges++] = c;
1441 char_ranges[n_char_ranges++] = c2;
1445 else
1447 fastmap[c_leading_code] = 1;
1448 if (!SINGLE_BYTE_CHAR_P (c))
1450 char_ranges[n_char_ranges++] = c;
1451 char_ranges[n_char_ranges++] = c;
1457 /* If ^ was the first character, complement the fastmap. In
1458 addition, as all multibyte characters have possibility of
1459 matching, set all entries for base leading codes, which is
1460 harmless even if SYNTAXP is 1. */
1462 if (negate)
1463 for (i = 0; i < sizeof fastmap; i++)
1465 if (!multibyte || !BASE_LEADING_CODE_P (i))
1466 fastmap[i] ^= 1;
1467 else
1468 fastmap[i] = 1;
1472 int start_point = PT;
1473 int pos = PT;
1474 int pos_byte = PT_BYTE;
1476 immediate_quit = 1;
1477 if (syntaxp)
1479 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1480 if (forwardp)
1482 if (multibyte)
1484 if (pos < XINT (lim))
1485 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1487 /* Since we already checked for multibyteness,
1488 avoid using INC_BOTH which checks again. */
1489 INC_POS (pos_byte);
1490 pos++;
1491 if (pos >= XINT (lim))
1492 break;
1493 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1496 else
1498 while (pos < XINT (lim)
1499 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1501 pos++;
1502 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1506 else
1508 if (multibyte)
1510 while (pos > XINT (lim))
1512 int savepos = pos_byte;
1513 /* Since we already checked for multibyteness,
1514 avoid using DEC_BOTH which checks again. */
1515 pos--;
1516 DEC_POS (pos_byte);
1517 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1518 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1520 pos++;
1521 pos_byte = savepos;
1522 break;
1526 else
1528 if (pos > XINT (lim))
1529 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1531 pos--;
1532 if (pos <= XINT (lim))
1533 break;
1534 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1539 else
1541 if (forwardp)
1543 if (multibyte)
1544 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
1546 if (!BASE_LEADING_CODE_P (c))
1547 INC_BOTH (pos, pos_byte);
1548 else if (n_char_ranges)
1550 /* We much check CHAR_RANGES for a multibyte
1551 character. */
1552 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
1553 for (i = 0; i < n_char_ranges; i += 2)
1554 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1555 break;
1556 if (!(negate ^ (i < n_char_ranges)))
1557 break;
1559 INC_BOTH (pos, pos_byte);
1561 else
1563 if (!negate) break;
1564 INC_BOTH (pos, pos_byte);
1567 else
1568 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1569 pos++;
1571 else
1573 if (multibyte)
1574 while (pos > XINT (lim))
1576 int savepos = pos_byte;
1577 DEC_BOTH (pos, pos_byte);
1578 if (fastmap[(c = FETCH_BYTE (pos_byte))])
1580 if (!BASE_LEADING_CODE_P (c))
1582 else if (n_char_ranges)
1584 /* We much check CHAR_RANGES for a multibyte
1585 character. */
1586 ch = FETCH_MULTIBYTE_CHAR (pos_byte);
1587 for (i = 0; i < n_char_ranges; i += 2)
1588 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1589 break;
1590 if (!(negate ^ (i < n_char_ranges)))
1592 pos++;
1593 pos_byte = savepos;
1594 break;
1597 else
1598 if (!negate)
1600 pos++;
1601 pos_byte = savepos;
1602 break;
1605 else
1607 pos++;
1608 pos_byte = savepos;
1609 break;
1612 else
1613 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1614 pos--;
1618 #if 0 /* Not needed now that a position in mid-character
1619 cannot be specified in Lisp. */
1620 if (multibyte
1621 /* INC_POS or DEC_POS might have moved POS over LIM. */
1622 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1623 pos = XINT (lim);
1624 #endif
1626 if (! multibyte)
1627 pos_byte = pos;
1629 SET_PT_BOTH (pos, pos_byte);
1630 immediate_quit = 0;
1632 return make_number (PT - start_point);
1636 /* Jump over a comment, assuming we are at the beginning of one.
1637 FROM is the current position.
1638 FROM_BYTE is the bytepos corresponding to FROM.
1639 Do not move past STOP (a charpos).
1640 The comment over which we have to jump is of style STYLE
1641 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1642 NESTING should be positive to indicate the nesting at the beginning
1643 for nested comments and should be zero or negative else.
1644 ST_COMMENT_STYLE cannot be nested.
1645 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1646 (or 0 If the search cannot start in the middle of a two-character).
1648 If successful, return 1 and store the charpos of the comment's end
1649 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1650 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1651 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1652 (as defined for state.incomment) in *INCOMMENT_PTR.
1654 The comment end is the last character of the comment rather than the
1655 character just after the comment.
1657 Global syntax data is assumed to initially be valid for FROM and
1658 remains valid for forward search starting at the returned position. */
1660 static int
1661 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1662 charpos_ptr, bytepos_ptr, incomment_ptr)
1663 int from, from_byte, stop;
1664 int nesting, style, prev_syntax;
1665 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1667 register int c, c1;
1668 register enum syntaxcode code;
1669 register int syntax;
1671 if (nesting <= 0) nesting = -1;
1673 /* Enter the loop in the middle so that we find
1674 a 2-char comment ender if we start in the middle of it. */
1675 syntax = prev_syntax;
1676 if (syntax != 0) goto forw_incomment;
1678 while (1)
1680 if (from == stop)
1682 *incomment_ptr = nesting;
1683 *charpos_ptr = from;
1684 *bytepos_ptr = from_byte;
1685 return 0;
1687 c = FETCH_CHAR (from_byte);
1688 syntax = SYNTAX_WITH_FLAGS (c);
1689 code = syntax & 0xff;
1690 if (code == Sendcomment
1691 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1692 && --nesting <= 0)
1693 /* we have encountered a comment end of the same style
1694 as the comment sequence which began this comment
1695 section */
1696 break;
1697 if (code == Scomment_fence
1698 && style == ST_COMMENT_STYLE)
1699 /* we have encountered a comment end of the same style
1700 as the comment sequence which began this comment
1701 section. */
1702 break;
1703 if (nesting > 0
1704 && code == Scomment
1705 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1706 /* we have encountered a nested comment of the same style
1707 as the comment sequence which began this comment section */
1708 nesting++;
1709 INC_BOTH (from, from_byte);
1710 UPDATE_SYNTAX_TABLE_FORWARD (from);
1712 forw_incomment:
1713 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1714 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1715 && (c1 = FETCH_CHAR (from_byte),
1716 SYNTAX_COMEND_SECOND (c1)))
1718 if (--nesting <= 0)
1719 /* we have encountered a comment end of the same style
1720 as the comment sequence which began this comment
1721 section */
1722 break;
1723 else
1725 INC_BOTH (from, from_byte);
1726 UPDATE_SYNTAX_TABLE_FORWARD (from);
1729 if (nesting > 0
1730 && from < stop
1731 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1732 && (c1 = FETCH_CHAR (from_byte),
1733 SYNTAX_COMMENT_STYLE (c1) == style
1734 && SYNTAX_COMSTART_SECOND (c1)))
1735 /* we have encountered a nested comment of the same style
1736 as the comment sequence which began this comment
1737 section */
1739 INC_BOTH (from, from_byte);
1740 UPDATE_SYNTAX_TABLE_FORWARD (from);
1741 nesting++;
1744 *charpos_ptr = from;
1745 *bytepos_ptr = from_byte;
1746 return 1;
1749 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1750 "Move forward across up to N comments. If N is negative, move backward.\n\
1751 Stop scanning if we find something other than a comment or whitespace.\n\
1752 Set point to where scanning stops.\n\
1753 If N comments are found as expected, with nothing except whitespace\n\
1754 between them, return t; otherwise return nil.")
1755 (count)
1756 Lisp_Object count;
1758 register int from;
1759 int from_byte;
1760 register int stop;
1761 register int c, c1;
1762 register enum syntaxcode code;
1763 int comstyle = 0; /* style of comment encountered */
1764 int comnested = 0; /* whether the comment is nestable or not */
1765 int found;
1766 int count1;
1767 int out_charpos, out_bytepos;
1768 int dummy;
1770 CHECK_NUMBER (count, 0);
1771 count1 = XINT (count);
1772 stop = count1 > 0 ? ZV : BEGV;
1774 immediate_quit = 1;
1775 QUIT;
1777 from = PT;
1778 from_byte = PT_BYTE;
1780 SETUP_SYNTAX_TABLE (from, count1);
1781 while (count1 > 0)
1785 int comstart_first;
1787 if (from == stop)
1789 SET_PT_BOTH (from, from_byte);
1790 immediate_quit = 0;
1791 return Qnil;
1793 c = FETCH_CHAR (from_byte);
1794 code = SYNTAX (c);
1795 comstart_first = SYNTAX_COMSTART_FIRST (c);
1796 comnested = SYNTAX_COMMENT_NESTED (c);
1797 comstyle = SYNTAX_COMMENT_STYLE (c);
1798 INC_BOTH (from, from_byte);
1799 UPDATE_SYNTAX_TABLE_FORWARD (from);
1800 if (from < stop && comstart_first
1801 && (c1 = FETCH_CHAR (from_byte),
1802 SYNTAX_COMSTART_SECOND (c1)))
1804 /* We have encountered a comment start sequence and we
1805 are ignoring all text inside comments. We must record
1806 the comment style this sequence begins so that later,
1807 only a comment end of the same style actually ends
1808 the comment section. */
1809 code = Scomment;
1810 comstyle = SYNTAX_COMMENT_STYLE (c1);
1811 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1812 INC_BOTH (from, from_byte);
1813 UPDATE_SYNTAX_TABLE_FORWARD (from);
1815 /* FIXME: here we ignore 2-char endcomments while we don't
1816 when going backwards. */
1818 while (code == Swhitespace || code == Sendcomment);
1820 if (code == Scomment_fence)
1821 comstyle = ST_COMMENT_STYLE;
1822 else if (code != Scomment)
1824 immediate_quit = 0;
1825 DEC_BOTH (from, from_byte);
1826 SET_PT_BOTH (from, from_byte);
1827 return Qnil;
1829 /* We're at the start of a comment. */
1830 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1831 &out_charpos, &out_bytepos, &dummy);
1832 from = out_charpos; from_byte = out_bytepos;
1833 if (!found)
1835 immediate_quit = 0;
1836 SET_PT_BOTH (from, from_byte);
1837 return Qnil;
1839 INC_BOTH (from, from_byte);
1840 UPDATE_SYNTAX_TABLE_FORWARD (from);
1841 /* We have skipped one comment. */
1842 count1--;
1845 while (count1 < 0)
1847 while (1)
1849 int quoted, comstart_second;
1851 if (from <= stop)
1853 SET_PT_BOTH (BEGV, BEGV_BYTE);
1854 immediate_quit = 0;
1855 return Qnil;
1858 DEC_BOTH (from, from_byte);
1859 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1860 quoted = char_quoted (from, from_byte);
1861 if (quoted)
1863 DEC_BOTH (from, from_byte);
1864 goto leave;
1866 c = FETCH_CHAR (from_byte);
1867 code = SYNTAX (c);
1868 comstyle = 0;
1869 comnested = SYNTAX_COMMENT_NESTED (c);
1870 if (code == Sendcomment)
1871 comstyle = SYNTAX_COMMENT_STYLE (c);
1872 comstart_second = SYNTAX_COMSTART_SECOND (c);
1873 if (from > stop && SYNTAX_COMEND_SECOND (c)
1874 && prev_char_comend_first (from, from_byte)
1875 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1877 /* We must record the comment style encountered so that
1878 later, we can match only the proper comment begin
1879 sequence of the same style. */
1880 DEC_BOTH (from, from_byte);
1881 code = Sendcomment;
1882 /* Calling char_quoted, above, set up global syntax position
1883 at the new value of FROM. */
1884 c1 = FETCH_CHAR (from_byte);
1885 comstyle = SYNTAX_COMMENT_STYLE (c1);
1886 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1888 if (from > stop && comstart_second
1889 && prev_char_comstart_first (from, from_byte)
1890 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1892 code = Scomment;
1893 DEC_BOTH (from, from_byte);
1896 if (code == Scomment_fence)
1898 /* Skip until first preceding unquoted comment_fence. */
1899 int found = 0, ini = from, ini_byte = from_byte;
1901 while (1)
1903 DEC_BOTH (from, from_byte);
1904 if (from == stop)
1905 break;
1906 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1907 c = FETCH_CHAR (from_byte);
1908 if (SYNTAX (c) == Scomment_fence
1909 && !char_quoted (from, from_byte))
1911 found = 1;
1912 break;
1915 if (found == 0)
1917 from = ini; /* Set point to ini + 1. */
1918 from_byte = ini_byte;
1919 goto leave;
1922 else if (code == Sendcomment)
1924 found = back_comment (from, from_byte, stop, comnested, comstyle,
1925 &out_charpos, &out_bytepos);
1926 if (found == -1)
1928 #if 0 /* cc-mode (and maybe others) relies on the bogus behavior. */
1929 /* Failure: we should go back to the end of this
1930 not-quite-endcomment. */
1931 if (SYNTAX(c) != code)
1932 /* It was a two-char Sendcomment. */
1933 INC_BOTH (from, from_byte);
1934 goto leave;
1935 #endif
1937 else
1938 /* We have skipped one comment. */
1939 from = out_charpos, from_byte = out_bytepos;
1940 break;
1942 else if (code != Swhitespace && code != Scomment)
1944 leave:
1945 immediate_quit = 0;
1946 INC_BOTH (from, from_byte);
1947 SET_PT_BOTH (from, from_byte);
1948 return Qnil;
1952 count1++;
1955 SET_PT_BOTH (from, from_byte);
1956 immediate_quit = 0;
1957 return Qt;
1960 /* Return syntax code of character C if C is a single byte character
1961 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
1963 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
1964 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
1965 ? SYNTAX (c) : Ssymbol)
1967 static Lisp_Object
1968 scan_lists (from, count, depth, sexpflag)
1969 register int from;
1970 int count, depth, sexpflag;
1972 Lisp_Object val;
1973 register int stop = count > 0 ? ZV : BEGV;
1974 register int c, c1;
1975 int stringterm;
1976 int quoted;
1977 int mathexit = 0;
1978 register enum syntaxcode code, temp_code;
1979 int min_depth = depth; /* Err out if depth gets less than this. */
1980 int comstyle = 0; /* style of comment encountered */
1981 int comnested = 0; /* whether the comment is nestable or not */
1982 int temp_pos;
1983 int last_good = from;
1984 int found;
1985 int from_byte;
1986 int out_bytepos, out_charpos;
1987 int temp, dummy;
1988 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
1990 if (depth > 0) min_depth = 0;
1992 if (from > ZV) from = ZV;
1993 if (from < BEGV) from = BEGV;
1995 from_byte = CHAR_TO_BYTE (from);
1997 immediate_quit = 1;
1998 QUIT;
2000 SETUP_SYNTAX_TABLE (from, count);
2001 while (count > 0)
2003 while (from < stop)
2005 int comstart_first, prefix;
2006 UPDATE_SYNTAX_TABLE_FORWARD (from);
2007 c = FETCH_CHAR (from_byte);
2008 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2009 comstart_first = SYNTAX_COMSTART_FIRST (c);
2010 comnested = SYNTAX_COMMENT_NESTED (c);
2011 comstyle = SYNTAX_COMMENT_STYLE (c);
2012 prefix = SYNTAX_PREFIX (c);
2013 if (depth == min_depth)
2014 last_good = from;
2015 INC_BOTH (from, from_byte);
2016 UPDATE_SYNTAX_TABLE_FORWARD (from);
2017 if (from < stop && comstart_first
2018 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
2019 && parse_sexp_ignore_comments)
2021 /* we have encountered a comment start sequence and we
2022 are ignoring all text inside comments. We must record
2023 the comment style this sequence begins so that later,
2024 only a comment end of the same style actually ends
2025 the comment section */
2026 code = Scomment;
2027 c1 = FETCH_CHAR (from_byte);
2028 comstyle = SYNTAX_COMMENT_STYLE (c1);
2029 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2030 INC_BOTH (from, from_byte);
2031 UPDATE_SYNTAX_TABLE_FORWARD (from);
2034 if (prefix)
2035 continue;
2037 switch (SWITCH_ENUM_CAST (code))
2039 case Sescape:
2040 case Scharquote:
2041 if (from == stop) goto lose;
2042 INC_BOTH (from, from_byte);
2043 /* treat following character as a word constituent */
2044 case Sword:
2045 case Ssymbol:
2046 if (depth || !sexpflag) break;
2047 /* This word counts as a sexp; return at end of it. */
2048 while (from < stop)
2050 UPDATE_SYNTAX_TABLE_FORWARD (from);
2052 /* Some compilers can't handle this inside the switch. */
2053 c = FETCH_CHAR (from_byte);
2054 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2055 switch (temp)
2057 case Scharquote:
2058 case Sescape:
2059 INC_BOTH (from, from_byte);
2060 if (from == stop) goto lose;
2061 break;
2062 case Sword:
2063 case Ssymbol:
2064 case Squote:
2065 break;
2066 default:
2067 goto done;
2069 INC_BOTH (from, from_byte);
2071 goto done;
2073 case Scomment_fence:
2074 comstyle = ST_COMMENT_STYLE;
2075 /* FALLTHROUGH */
2076 case Scomment:
2077 if (!parse_sexp_ignore_comments) break;
2078 UPDATE_SYNTAX_TABLE_FORWARD (from);
2079 found = forw_comment (from, from_byte, stop,
2080 comnested, comstyle, 0,
2081 &out_charpos, &out_bytepos, &dummy);
2082 from = out_charpos, from_byte = out_bytepos;
2083 if (!found)
2085 if (depth == 0)
2086 goto done;
2087 goto lose;
2089 INC_BOTH (from, from_byte);
2090 UPDATE_SYNTAX_TABLE_FORWARD (from);
2091 break;
2093 case Smath:
2094 if (!sexpflag)
2095 break;
2096 if (from != stop && c == FETCH_CHAR (from_byte))
2098 INC_BOTH (from, from_byte);
2100 if (mathexit)
2102 mathexit = 0;
2103 goto close1;
2105 mathexit = 1;
2107 case Sopen:
2108 if (!++depth) goto done;
2109 break;
2111 case Sclose:
2112 close1:
2113 if (!--depth) goto done;
2114 if (depth < min_depth)
2115 Fsignal (Qscan_error,
2116 Fcons (build_string ("Containing expression ends prematurely"),
2117 Fcons (make_number (last_good),
2118 Fcons (make_number (from), Qnil))));
2119 break;
2121 case Sstring:
2122 case Sstring_fence:
2123 temp_pos = dec_bytepos (from_byte);
2124 stringterm = FETCH_CHAR (temp_pos);
2125 while (1)
2127 if (from >= stop) goto lose;
2128 UPDATE_SYNTAX_TABLE_FORWARD (from);
2129 c = FETCH_CHAR (from_byte);
2130 if (code == Sstring
2131 ? c == stringterm
2132 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2133 break;
2135 /* Some compilers can't handle this inside the switch. */
2136 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2137 switch (temp)
2139 case Scharquote:
2140 case Sescape:
2141 INC_BOTH (from, from_byte);
2143 INC_BOTH (from, from_byte);
2145 INC_BOTH (from, from_byte);
2146 if (!depth && sexpflag) goto done;
2147 break;
2151 /* Reached end of buffer. Error if within object, return nil if between */
2152 if (depth) goto lose;
2154 immediate_quit = 0;
2155 return Qnil;
2157 /* End of object reached */
2158 done:
2159 count--;
2162 while (count < 0)
2164 while (from > stop)
2166 DEC_BOTH (from, from_byte);
2167 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2168 c = FETCH_CHAR (from_byte);
2169 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2170 if (depth == min_depth)
2171 last_good = from;
2172 comstyle = 0;
2173 comnested = SYNTAX_COMMENT_NESTED (c);
2174 if (code == Sendcomment)
2175 comstyle = SYNTAX_COMMENT_STYLE (c);
2176 if (from > stop && SYNTAX_COMEND_SECOND (c)
2177 && prev_char_comend_first (from, from_byte)
2178 && parse_sexp_ignore_comments)
2180 /* We must record the comment style encountered so that
2181 later, we can match only the proper comment begin
2182 sequence of the same style. */
2183 DEC_BOTH (from, from_byte);
2184 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2185 code = Sendcomment;
2186 c1 = FETCH_CHAR (from_byte);
2187 comstyle = SYNTAX_COMMENT_STYLE (c1);
2188 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2191 /* Quoting turns anything except a comment-ender
2192 into a word character. Note that this cannot be true
2193 if we decremented FROM in the if-statement above. */
2194 if (code != Sendcomment && char_quoted (from, from_byte))
2195 code = Sword;
2196 else if (SYNTAX_PREFIX (c))
2197 continue;
2199 switch (SWITCH_ENUM_CAST (code))
2201 case Sword:
2202 case Ssymbol:
2203 case Sescape:
2204 case Scharquote:
2205 if (depth || !sexpflag) break;
2206 /* This word counts as a sexp; count object finished
2207 after passing it. */
2208 while (from > stop)
2210 temp_pos = from_byte;
2211 if (! NILP (current_buffer->enable_multibyte_characters))
2212 DEC_POS (temp_pos);
2213 else
2214 temp_pos--;
2215 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2216 c1 = FETCH_CHAR (temp_pos);
2217 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2218 /* Don't allow comment-end to be quoted. */
2219 if (temp_code == Sendcomment)
2220 goto done2;
2221 quoted = char_quoted (from - 1, temp_pos);
2222 if (quoted)
2224 DEC_BOTH (from, from_byte);
2225 temp_pos = dec_bytepos (temp_pos);
2226 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2228 c1 = FETCH_CHAR (temp_pos);
2229 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2230 if (! (quoted || temp_code == Sword
2231 || temp_code == Ssymbol
2232 || temp_code == Squote))
2233 goto done2;
2234 DEC_BOTH (from, from_byte);
2236 goto done2;
2238 case Smath:
2239 if (!sexpflag)
2240 break;
2241 temp_pos = dec_bytepos (from_byte);
2242 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2243 if (from != stop && c == FETCH_CHAR (temp_pos))
2244 DEC_BOTH (from, from_byte);
2245 if (mathexit)
2247 mathexit = 0;
2248 goto open2;
2250 mathexit = 1;
2252 case Sclose:
2253 if (!++depth) goto done2;
2254 break;
2256 case Sopen:
2257 open2:
2258 if (!--depth) goto done2;
2259 if (depth < min_depth)
2260 Fsignal (Qscan_error,
2261 Fcons (build_string ("Containing expression ends prematurely"),
2262 Fcons (make_number (last_good),
2263 Fcons (make_number (from), Qnil))));
2264 break;
2266 case Sendcomment:
2267 if (!parse_sexp_ignore_comments)
2268 break;
2269 found = back_comment (from, from_byte, stop, comnested, comstyle,
2270 &out_charpos, &out_bytepos);
2271 /* FIXME: if found == -1, then it really wasn't a comment-end.
2272 For single-char Sendcomment, we can't do much about it apart
2273 from skipping the char.
2274 For 2-char endcomments, we could try again, taking both
2275 chars as separate entities, but it's a lot of trouble
2276 for very little gain, so we don't bother either. -sm */
2277 if (found != -1)
2278 from = out_charpos, from_byte = out_bytepos;
2279 break;
2281 case Scomment_fence:
2282 case Sstring_fence:
2283 while (1)
2285 DEC_BOTH (from, from_byte);
2286 if (from == stop) goto lose;
2287 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2288 if (!char_quoted (from, from_byte)
2289 && (c = FETCH_CHAR (from_byte),
2290 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2291 break;
2293 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2294 break;
2296 case Sstring:
2297 stringterm = FETCH_CHAR (from_byte);
2298 while (1)
2300 if (from == stop) goto lose;
2301 temp_pos = from_byte;
2302 if (! NILP (current_buffer->enable_multibyte_characters))
2303 DEC_POS (temp_pos);
2304 else
2305 temp_pos--;
2306 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2307 if (!char_quoted (from - 1, temp_pos)
2308 && stringterm == FETCH_CHAR (temp_pos))
2309 break;
2310 DEC_BOTH (from, from_byte);
2312 DEC_BOTH (from, from_byte);
2313 if (!depth && sexpflag) goto done2;
2314 break;
2318 /* Reached start of buffer. Error if within object, return nil if between */
2319 if (depth) goto lose;
2321 immediate_quit = 0;
2322 return Qnil;
2324 done2:
2325 count++;
2329 immediate_quit = 0;
2330 XSETFASTINT (val, from);
2331 return val;
2333 lose:
2334 Fsignal (Qscan_error,
2335 Fcons (build_string ("Unbalanced parentheses"),
2336 Fcons (make_number (last_good),
2337 Fcons (make_number (from), Qnil))));
2339 /* NOTREACHED */
2342 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2343 "Scan from character number FROM by COUNT lists.\n\
2344 Returns the character number of the position thus found.\n\
2346 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2347 only places where the depth in parentheses becomes zero\n\
2348 are candidates for stopping; COUNT such places are counted.\n\
2349 Thus, a positive value for DEPTH means go out levels.\n\
2351 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2353 If the beginning or end of (the accessible part of) the buffer is reached\n\
2354 and the depth is wrong, an error is signaled.\n\
2355 If the depth is right but the count is not used up, nil is returned.")
2356 (from, count, depth)
2357 Lisp_Object from, count, depth;
2359 CHECK_NUMBER (from, 0);
2360 CHECK_NUMBER (count, 1);
2361 CHECK_NUMBER (depth, 2);
2363 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2366 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2367 "Scan from character number FROM by COUNT balanced expressions.\n\
2368 If COUNT is negative, scan backwards.\n\
2369 Returns the character number of the position thus found.\n\
2371 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2373 If the beginning or end of (the accessible part of) the buffer is reached\n\
2374 in the middle of a parenthetical grouping, an error is signaled.\n\
2375 If the beginning or end is reached between groupings\n\
2376 but before count is used up, nil is returned.")
2377 (from, count)
2378 Lisp_Object from, count;
2380 CHECK_NUMBER (from, 0);
2381 CHECK_NUMBER (count, 1);
2383 return scan_lists (XINT (from), XINT (count), 0, 1);
2386 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2387 0, 0, 0,
2388 "Move point backward over any number of chars with prefix syntax.\n\
2389 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2392 int beg = BEGV;
2393 int opoint = PT;
2394 int opoint_byte = PT_BYTE;
2395 int pos = PT;
2396 int pos_byte = PT_BYTE;
2397 int c;
2399 if (pos <= beg)
2401 SET_PT_BOTH (opoint, opoint_byte);
2403 return Qnil;
2406 SETUP_SYNTAX_TABLE (pos, -1);
2408 DEC_BOTH (pos, pos_byte);
2410 while (!char_quoted (pos, pos_byte)
2411 /* Previous statement updates syntax table. */
2412 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
2413 || SYNTAX_PREFIX (c)))
2415 opoint = pos;
2416 opoint_byte = pos_byte;
2418 if (pos + 1 > beg)
2419 DEC_BOTH (pos, pos_byte);
2422 SET_PT_BOTH (opoint, opoint_byte);
2424 return Qnil;
2427 /* Parse forward from FROM / FROM_BYTE to END,
2428 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2429 and return a description of the state of the parse at END.
2430 If STOPBEFORE is nonzero, stop at the start of an atom.
2431 If COMMENTSTOP is 1, stop at the start of a comment.
2432 If COMMENTSTOP is -1, stop at the start or end of a comment,
2433 after the beginning of a string, or after the end of a string. */
2435 static void
2436 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2437 stopbefore, oldstate, commentstop)
2438 struct lisp_parse_state *stateptr;
2439 register int from;
2440 int end, targetdepth, stopbefore;
2441 Lisp_Object oldstate;
2442 int commentstop;
2444 struct lisp_parse_state state;
2446 register enum syntaxcode code;
2447 int c1;
2448 int comnested;
2449 struct level { int last, prev; };
2450 struct level levelstart[100];
2451 register struct level *curlevel = levelstart;
2452 struct level *endlevel = levelstart + 100;
2453 register int depth; /* Paren depth of current scanning location.
2454 level - levelstart equals this except
2455 when the depth becomes negative. */
2456 int mindepth; /* Lowest DEPTH value seen. */
2457 int start_quoted = 0; /* Nonzero means starting after a char quote */
2458 Lisp_Object tem;
2459 int prev_from; /* Keep one character before FROM. */
2460 int prev_from_byte;
2461 int prev_from_syntax;
2462 int boundary_stop = commentstop == -1;
2463 int nofence;
2464 int found;
2465 int out_bytepos, out_charpos;
2466 int temp;
2468 prev_from = from;
2469 prev_from_byte = from_byte;
2470 if (from != BEGV)
2471 DEC_BOTH (prev_from, prev_from_byte);
2473 /* Use this macro instead of `from++'. */
2474 #define INC_FROM \
2475 do { prev_from = from; \
2476 prev_from_byte = from_byte; \
2477 prev_from_syntax \
2478 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2479 INC_BOTH (from, from_byte); \
2480 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2481 } while (0)
2483 immediate_quit = 1;
2484 QUIT;
2486 if (NILP (oldstate))
2488 depth = 0;
2489 state.instring = -1;
2490 state.incomment = 0;
2491 state.comstyle = 0; /* comment style a by default. */
2492 state.comstr_start = -1; /* no comment/string seen. */
2494 else
2496 tem = Fcar (oldstate);
2497 if (!NILP (tem))
2498 depth = XINT (tem);
2499 else
2500 depth = 0;
2502 oldstate = Fcdr (oldstate);
2503 oldstate = Fcdr (oldstate);
2504 oldstate = Fcdr (oldstate);
2505 tem = Fcar (oldstate);
2506 /* Check whether we are inside string_fence-style string: */
2507 state.instring = (!NILP (tem)
2508 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2509 : -1);
2511 oldstate = Fcdr (oldstate);
2512 tem = Fcar (oldstate);
2513 state.incomment = (!NILP (tem)
2514 ? (INTEGERP (tem) ? XINT (tem) : -1)
2515 : 0);
2517 oldstate = Fcdr (oldstate);
2518 tem = Fcar (oldstate);
2519 start_quoted = !NILP (tem);
2521 /* if the eighth element of the list is nil, we are in comment
2522 style a. If it is non-nil, we are in comment style b */
2523 oldstate = Fcdr (oldstate);
2524 oldstate = Fcdr (oldstate);
2525 tem = Fcar (oldstate);
2526 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2527 ? ST_COMMENT_STYLE : 1);
2529 oldstate = Fcdr (oldstate);
2530 tem = Fcar (oldstate);
2531 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2532 oldstate = Fcdr (oldstate);
2533 tem = Fcar (oldstate);
2534 while (!NILP (tem)) /* >= second enclosing sexps. */
2536 /* curlevel++->last ran into compiler bug on Apollo */
2537 curlevel->last = XINT (Fcar (tem));
2538 if (++curlevel == endlevel)
2539 curlevel--; /* error ("Nesting too deep for parser"); */
2540 curlevel->prev = -1;
2541 curlevel->last = -1;
2542 tem = Fcdr (tem);
2545 state.quoted = 0;
2546 mindepth = depth;
2548 curlevel->prev = -1;
2549 curlevel->last = -1;
2551 SETUP_SYNTAX_TABLE (prev_from, 1);
2552 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2553 UPDATE_SYNTAX_TABLE_FORWARD (from);
2555 /* Enter the loop at a place appropriate for initial state. */
2557 if (state.incomment)
2558 goto startincomment;
2559 if (state.instring >= 0)
2561 nofence = state.instring != ST_STRING_STYLE;
2562 if (start_quoted)
2563 goto startquotedinstring;
2564 goto startinstring;
2566 else if (start_quoted)
2567 goto startquoted;
2569 #if 0 /* This seems to be redundant with the identical code above. */
2570 SETUP_SYNTAX_TABLE (prev_from, 1);
2571 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2572 UPDATE_SYNTAX_TABLE_FORWARD (from);
2573 #endif
2575 while (from < end)
2577 INC_FROM;
2578 code = prev_from_syntax & 0xff;
2580 if (code == Scomment)
2582 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2583 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2584 1 : -1);
2585 state.comstr_start = prev_from;
2587 else if (code == Scomment_fence)
2589 /* Record the comment style we have entered so that only
2590 the comment-end sequence of the same style actually
2591 terminates the comment section. */
2592 state.comstyle = ST_COMMENT_STYLE;
2593 state.incomment = -1;
2594 state.comstr_start = prev_from;
2595 code = Scomment;
2597 else if (from < end)
2598 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2599 if (c1 = FETCH_CHAR (from_byte),
2600 SYNTAX_COMSTART_SECOND (c1))
2601 /* Duplicate code to avoid a complex if-expression
2602 which causes trouble for the SGI compiler. */
2604 /* Record the comment style we have entered so that only
2605 the comment-end sequence of the same style actually
2606 terminates the comment section. */
2607 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2608 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2609 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2610 state.incomment = comnested ? 1 : -1;
2611 state.comstr_start = prev_from;
2612 INC_FROM;
2613 code = Scomment;
2616 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2617 continue;
2618 switch (SWITCH_ENUM_CAST (code))
2620 case Sescape:
2621 case Scharquote:
2622 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2623 curlevel->last = prev_from;
2624 startquoted:
2625 if (from == end) goto endquoted;
2626 INC_FROM;
2627 goto symstarted;
2628 /* treat following character as a word constituent */
2629 case Sword:
2630 case Ssymbol:
2631 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2632 curlevel->last = prev_from;
2633 symstarted:
2634 while (from < end)
2636 /* Some compilers can't handle this inside the switch. */
2637 temp = SYNTAX (FETCH_CHAR (from_byte));
2638 switch (temp)
2640 case Scharquote:
2641 case Sescape:
2642 INC_FROM;
2643 if (from == end) goto endquoted;
2644 break;
2645 case Sword:
2646 case Ssymbol:
2647 case Squote:
2648 break;
2649 default:
2650 goto symdone;
2652 INC_FROM;
2654 symdone:
2655 curlevel->prev = curlevel->last;
2656 break;
2658 case Scomment:
2659 if (commentstop || boundary_stop) goto done;
2660 startincomment:
2661 /* The (from == BEGV) test was to enter the loop in the middle so
2662 that we find a 2-char comment ender even if we start in the
2663 middle of it. We don't want to do that if we're just at the
2664 beginning of the comment (think of (*) ... (*)). */
2665 found = forw_comment (from, from_byte, end,
2666 state.incomment, state.comstyle,
2667 (from == BEGV || from < state.comstr_start + 3)
2668 ? 0 : prev_from_syntax,
2669 &out_charpos, &out_bytepos, &state.incomment);
2670 from = out_charpos; from_byte = out_bytepos;
2671 /* Beware! prev_from and friends are invalid now.
2672 Luckily, the `done' doesn't use them and the INC_FROM
2673 sets them to a sane value without looking at them. */
2674 if (!found) goto done;
2675 INC_FROM;
2676 state.incomment = 0;
2677 state.comstyle = 0; /* reset the comment style */
2678 if (boundary_stop) goto done;
2679 break;
2681 case Sopen:
2682 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2683 depth++;
2684 /* curlevel++->last ran into compiler bug on Apollo */
2685 curlevel->last = prev_from;
2686 if (++curlevel == endlevel)
2687 curlevel--; /* error ("Nesting too deep for parser"); */
2688 curlevel->prev = -1;
2689 curlevel->last = -1;
2690 if (targetdepth == depth) goto done;
2691 break;
2693 case Sclose:
2694 depth--;
2695 if (depth < mindepth)
2696 mindepth = depth;
2697 if (curlevel != levelstart)
2698 curlevel--;
2699 curlevel->prev = curlevel->last;
2700 if (targetdepth == depth) goto done;
2701 break;
2703 case Sstring:
2704 case Sstring_fence:
2705 state.comstr_start = from - 1;
2706 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2707 curlevel->last = prev_from;
2708 state.instring = (code == Sstring
2709 ? (FETCH_CHAR (prev_from_byte))
2710 : ST_STRING_STYLE);
2711 if (boundary_stop) goto done;
2712 startinstring:
2714 nofence = state.instring != ST_STRING_STYLE;
2716 while (1)
2718 int c;
2720 if (from >= end) goto done;
2721 c = FETCH_CHAR (from_byte);
2722 /* Some compilers can't handle this inside the switch. */
2723 temp = SYNTAX (c);
2725 /* Check TEMP here so that if the char has
2726 a syntax-table property which says it is NOT
2727 a string character, it does not end the string. */
2728 if (nofence && c == state.instring && temp == Sstring)
2729 break;
2731 switch (temp)
2733 case Sstring_fence:
2734 if (!nofence) goto string_end;
2735 break;
2736 case Scharquote:
2737 case Sescape:
2738 INC_FROM;
2739 startquotedinstring:
2740 if (from >= end) goto endquoted;
2742 INC_FROM;
2745 string_end:
2746 state.instring = -1;
2747 curlevel->prev = curlevel->last;
2748 INC_FROM;
2749 if (boundary_stop) goto done;
2750 break;
2752 case Smath:
2753 break;
2756 goto done;
2758 stop: /* Here if stopping before start of sexp. */
2759 from = prev_from; /* We have just fetched the char that starts it; */
2760 goto done; /* but return the position before it. */
2762 endquoted:
2763 state.quoted = 1;
2764 done:
2765 state.depth = depth;
2766 state.mindepth = mindepth;
2767 state.thislevelstart = curlevel->prev;
2768 state.prevlevelstart
2769 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2770 state.location = from;
2771 state.levelstarts = Qnil;
2772 while (--curlevel >= levelstart)
2773 state.levelstarts = Fcons (make_number (curlevel->last),
2774 state.levelstarts);
2775 immediate_quit = 0;
2777 *stateptr = state;
2780 /* This comment supplies the doc string for parse-partial-sexp,
2781 for make-docfile to see. We cannot put this in the real DEFUN
2782 due to limits in the Unix cpp.
2784 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2785 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2786 Parsing stops at TO or when certain criteria are met;\n\
2787 point is set to where parsing stops.\n\
2788 If fifth arg STATE is omitted or nil,\n\
2789 parsing assumes that FROM is the beginning of a function.\n\
2790 Value is a list of ten elements describing final state of parsing:\n\
2791 0. depth in parens.\n\
2792 1. character address of start of innermost containing list; nil if none.\n\
2793 2. character address of start of last complete sexp terminated.\n\
2794 3. non-nil if inside a string.\n\
2795 (it is the character that will terminate the string,\n\
2796 or t if the string should be terminated by a generic string delimiter.)\n\
2797 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2798 else an integer (the current comment nesting).\n\
2799 5. t if following a quote character.\n\
2800 6. the minimum paren-depth encountered during this scan.\n\
2801 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
2802 should be terminated by a generic comment delimiter.\n\
2803 8. character address of start of comment or string; nil if not in one.\n\
2804 9. Intermediate data for continuation of parsing (subject to change).\n\
2805 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2806 in parentheses becomes equal to TARGETDEPTH.\n\
2807 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2808 any character that starts a sexp.\n\
2809 Fifth arg STATE is a nine-element list like what this function returns.\n\
2810 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2811 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2812 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2813 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2814 string, or after end of a comment or a string.")
2815 (from, to, targetdepth, stopbefore, state, commentstop)
2818 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2819 0 /* See immediately above */)
2820 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2821 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2823 struct lisp_parse_state state;
2824 int target;
2826 if (!NILP (targetdepth))
2828 CHECK_NUMBER (targetdepth, 3);
2829 target = XINT (targetdepth);
2831 else
2832 target = -100000; /* We won't reach this depth */
2834 validate_region (&from, &to);
2835 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2836 XINT (to),
2837 target, !NILP (stopbefore), oldstate,
2838 (NILP (commentstop)
2839 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2841 SET_PT (state.location);
2843 return Fcons (make_number (state.depth),
2844 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2845 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2846 Fcons (state.instring >= 0
2847 ? (state.instring == ST_STRING_STYLE
2848 ? Qt : make_number (state.instring)) : Qnil,
2849 Fcons (state.incomment < 0 ? Qt :
2850 (state.incomment == 0 ? Qnil :
2851 make_number (state.incomment)),
2852 Fcons (state.quoted ? Qt : Qnil,
2853 Fcons (make_number (state.mindepth),
2854 Fcons ((state.comstyle
2855 ? (state.comstyle == ST_COMMENT_STYLE
2856 ? Qsyntax_table : Qt) :
2857 Qnil),
2858 Fcons (((state.incomment
2859 || (state.instring >= 0))
2860 ? make_number (state.comstr_start)
2861 : Qnil),
2862 Fcons (state.levelstarts, Qnil))))))))));
2865 void
2866 init_syntax_once ()
2868 register int i, c;
2869 Lisp_Object temp;
2871 /* This has to be done here, before we call Fmake_char_table. */
2872 Qsyntax_table = intern ("syntax-table");
2873 staticpro (&Qsyntax_table);
2875 /* Intern this now in case it isn't already done.
2876 Setting this variable twice is harmless.
2877 But don't staticpro it here--that is done in alloc.c. */
2878 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2880 /* Create objects which can be shared among syntax tables. */
2881 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
2882 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2883 XVECTOR (Vsyntax_code_object)->contents[i]
2884 = Fcons (make_number (i), Qnil);
2886 /* Now we are ready to set up this property, so we can
2887 create syntax tables. */
2888 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2890 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2892 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2894 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2895 for (i = 'a'; i <= 'z'; i++)
2896 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2897 for (i = 'A'; i <= 'Z'; i++)
2898 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2899 for (i = '0'; i <= '9'; i++)
2900 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2902 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2903 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2905 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2906 Fcons (make_number (Sopen), make_number (')')));
2907 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2908 Fcons (make_number (Sclose), make_number ('(')));
2909 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2910 Fcons (make_number (Sopen), make_number (']')));
2911 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2912 Fcons (make_number (Sclose), make_number ('[')));
2913 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2914 Fcons (make_number (Sopen), make_number ('}')));
2915 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2916 Fcons (make_number (Sclose), make_number ('{')));
2917 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2918 Fcons (make_number ((int) Sstring), Qnil));
2919 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2920 Fcons (make_number ((int) Sescape), Qnil));
2922 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2923 for (i = 0; i < 10; i++)
2925 c = "_-+*/&|<>="[i];
2926 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2929 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2930 for (i = 0; i < 12; i++)
2932 c = ".,;:?!#@~^'`"[i];
2933 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2936 /* All multibyte characters have syntax `word' by default. */
2937 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2938 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
2939 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
2942 void
2943 syms_of_syntax ()
2945 Qsyntax_table_p = intern ("syntax-table-p");
2946 staticpro (&Qsyntax_table_p);
2948 staticpro (&Vsyntax_code_object);
2950 Qscan_error = intern ("scan-error");
2951 staticpro (&Qscan_error);
2952 Fput (Qscan_error, Qerror_conditions,
2953 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
2954 Fput (Qscan_error, Qerror_message,
2955 build_string ("Scan error"));
2957 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2958 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2960 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2961 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2962 The value of this property should be either a syntax table, or a cons\n\
2963 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2964 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2965 relevant only for open/close type.");
2967 words_include_escapes = 0;
2968 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2969 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2971 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
2972 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
2973 multibyte_syntax_as_symbol = 0;
2975 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
2976 &open_paren_in_column_0_is_defun_start,
2977 "Non-nil means an open paren in column 0 denotes the start of a defun.");
2978 open_paren_in_column_0_is_defun_start = 1;
2980 defsubr (&Ssyntax_table_p);
2981 defsubr (&Ssyntax_table);
2982 defsubr (&Sstandard_syntax_table);
2983 defsubr (&Scopy_syntax_table);
2984 defsubr (&Sset_syntax_table);
2985 defsubr (&Schar_syntax);
2986 defsubr (&Smatching_paren);
2987 defsubr (&Smodify_syntax_entry);
2988 defsubr (&Sdescribe_syntax);
2990 defsubr (&Sforward_word);
2992 defsubr (&Sskip_chars_forward);
2993 defsubr (&Sskip_chars_backward);
2994 defsubr (&Sskip_syntax_forward);
2995 defsubr (&Sskip_syntax_backward);
2997 defsubr (&Sforward_comment);
2998 defsubr (&Sscan_lists);
2999 defsubr (&Sscan_sexps);
3000 defsubr (&Sbackward_prefix_chars);
3001 defsubr (&Sparse_partial_sexp);