(set-auto-mode): New arg JUST-FROM-FILE-NAME.
[emacs.git] / src / syntax.c
blobf396b24e087f003e8e9e486a2c75fe20339443b8
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 1997 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
32 #include "syntax.h"
33 #include "intervals.h"
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
45 static void scan_sexps_forward ();
46 static int char_quoted ();
48 int words_include_escapes;
49 int parse_sexp_lookup_properties;
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
56 /* This is the internal form of the parse state used in parse-partial-sexp. */
58 struct lisp_parse_state
60 int depth; /* Depth at end of parsing. */
61 int instring; /* -1 if not within string, else desired terminator. */
62 int incomment; /* Nonzero if within a comment at end of parsing. */
63 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
64 int quoted; /* Nonzero if just after an escape char at end of parsing */
65 int thislevelstart; /* Char number of most recent start-of-expression at current level */
66 int prevlevelstart; /* Char number of start of containing expression */
67 int location; /* Char number at which parsing stopped. */
68 int mindepth; /* Minimum depth seen while scanning. */
69 int comstr_start; /* Position just after last comment/string starter. */
72 /* These variables are a cache for finding the start of a defun.
73 find_start_pos is the place for which the defun start was found.
74 find_start_value is the defun start position found for it.
75 find_start_buffer is the buffer it was found in.
76 find_start_begv is the BEGV value when it was found.
77 find_start_modiff is the value of MODIFF when it was found. */
79 static int find_start_pos;
80 static int find_start_value;
81 static struct buffer *find_start_buffer;
82 static int find_start_begv;
83 static int find_start_modiff;
86 struct gl_state_s gl_state; /* Global state of syntax parser. */
88 INTERVAL interval_of ();
89 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
90 to scan to property-change. */
92 /*
93 Update gl_state to an appropriate interval which contains POS. The
94 sign of COUNT give the relative position of POS wrt the previously
95 valid interval. If INIT, only [be]_property fields of gl_state are
96 valid at start, the rest is filled basing on OBJECT.
98 `gl_state.*_i' are the intervals, and pos is further in the search
99 direction than the intervals - or in an interval. We update the
100 current syntax-table basing on the property of this interval, and
101 update the interval to start further than POS - or be
102 NULL_INTERVAL. We also update lim_property to be the next value of
103 pos to call this subroutine again - or be before/after the
104 start/end of OBJECT. */
106 void
107 update_syntax_table (pos, count, init, object)
108 int pos, count, init;
109 Lisp_Object object;
111 Lisp_Object tmp_table;
112 int cnt = 0, doing_extra = 0, invalidate = 1;
113 INTERVAL i, oldi;
115 if (init)
117 gl_state.start = gl_state.b_property;
118 gl_state.stop = gl_state.e_property;
119 gl_state.forward_i = interval_of (pos, object);
120 i = gl_state.backward_i = gl_state.forward_i;
121 gl_state.left_ok = gl_state.right_ok = 1;
122 invalidate = 0;
123 if (NULL_INTERVAL_P (i))
124 return;
125 gl_state.b_property = i->position - 1;
126 gl_state.e_property = INTERVAL_LAST_POS (i);
127 goto update;
129 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
131 /* We are guarantied to be called with pos either in i, of further off. */
132 if (NULL_INTERVAL_P (i))
133 error ("Error in syntax_table logic for to-the-end intervals");
134 else if (pos < i->position) /* Move left. */
136 if (count > 0)
137 error ("Error in syntax_table logic for intervals <-.");
138 /* Update the interval. */
139 i = update_interval (i, pos);
140 if (oldi->position != INTERVAL_LAST_POS (i))
142 invalidate = 0;
143 gl_state.right_ok = 1; /* Invalidate the other end. */
144 gl_state.forward_i = i;
145 gl_state.e_property = INTERVAL_LAST_POS (i);
148 else if (pos >= INTERVAL_LAST_POS (i)) /* Move right. */
150 if (count < 0)
151 error ("Error in syntax_table logic for intervals ->.");
152 /* Update the interval. */
153 i = update_interval (i, pos);
154 if (i->position != INTERVAL_LAST_POS (oldi))
156 invalidate = 0;
157 gl_state.left_ok = 1; /* Invalidate the other end. */
158 gl_state.backward_i = i;
159 gl_state.b_property = i->position - 1;
162 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
164 /* We do not need to recalculate tmp_table. */
165 tmp_table = gl_state.old_prop;
168 update:
169 tmp_table = textget (i->plist, Qsyntax_table);
171 if (invalidate)
172 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
174 if (invalidate) /* Did not get to adjacent interval. */
175 { /* with the same table => */
176 /* invalidate the old range. */
177 if (count > 0)
179 gl_state.backward_i = i;
180 gl_state.left_ok = 1; /* Invalidate the other end. */
181 gl_state.b_property = i->position - 1;
183 else
185 gl_state.forward_i = i;
186 gl_state.right_ok = 1; /* Invalidate the other end. */
187 gl_state.e_property = INTERVAL_LAST_POS (i);
191 gl_state.current_syntax_table = tmp_table;
192 gl_state.old_prop = tmp_table;
193 if (Fsyntax_table_p (tmp_table) == Qt)
195 gl_state.use_global = 0;
197 else if (CONSP (tmp_table))
199 gl_state.use_global = 1;
200 gl_state.global_code = tmp_table;
202 else
204 gl_state.use_global = 0;
205 gl_state.current_syntax_table = current_buffer->syntax_table;
208 while (!NULL_INTERVAL_P (i))
210 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
212 if (count > 0)
213 gl_state.right_ok = 0;
214 else
215 gl_state.left_ok = 0;
216 break;
218 else if (cnt == INTERVALS_AT_ONCE)
220 if (count > 0)
221 gl_state.right_ok = 1;
222 else
223 gl_state.left_ok = 1;
224 break;
226 cnt++;
227 i = count > 0 ? next_interval (i) : previous_interval (i);
229 if (NULL_INTERVAL_P (i))
230 { /* This property goes to the end. */
231 if (count > 0)
232 gl_state.e_property = gl_state.stop;
233 else
234 gl_state.b_property = gl_state.start;
236 else
238 if (count > 0)
240 gl_state.e_property = i->position;
241 gl_state.forward_i = i;
243 else
245 gl_state.b_property = i->position + LENGTH (i) - 1;
246 gl_state.backward_i = i;
251 /* Returns TRUE if char at POS is quoted.
252 Global syntax-table data should be set up already to be good at pos
253 or after. On return global syntax data is good for lookup at POS. */
255 static int
256 char_quoted (pos)
257 register int pos;
259 register enum syntaxcode code;
260 register int beg = BEGV;
261 register int quoted = 0;
262 int temp_pos = pos;
264 DEC_POS (temp_pos);
265 while (temp_pos >= beg
266 && ( UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos), 1)
267 && ((code = SYNTAX (FETCH_CHAR (temp_pos))) == Scharquote
268 || code == Sescape))
270 temp_pos--, quoted = !quoted;
272 UPDATE_SYNTAX_TABLE (pos);
273 return quoted;
276 /* Find a defun-start that is the last one before POS (or nearly the last).
277 We record what we find, so that another call in the same area
278 can return the same value right away.
280 There is no promise at which position the global syntax data is
281 valid on return from the subroutine, so the caller should explicitly
282 update the global data. */
284 static int
285 find_defun_start (pos)
286 int pos;
288 int tem;
289 int shortage;
291 /* Use previous finding, if it's valid and applies to this inquiry. */
292 if (current_buffer == find_start_buffer
293 /* Reuse the defun-start even if POS is a little farther on.
294 POS might be in the next defun, but that's ok.
295 Our value may not be the best possible, but will still be usable. */
296 && pos <= find_start_pos + 1000
297 && pos >= find_start_value
298 && BEGV == find_start_begv
299 && MODIFF == find_start_modiff)
300 return find_start_value;
302 /* Back up to start of line. */
303 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
305 /* We optimize syntax-table lookup for rare updates. Thus we accept
306 only those `^\s(' which are good in global _and_ text-property
307 syntax-tables. */
308 gl_state.current_syntax_table = current_buffer->syntax_table;
309 gl_state.use_global = 0;
310 while (tem > BEGV)
312 /* Open-paren at start of line means we found our defun-start. */
313 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
315 SETUP_SYNTAX_TABLE (tem + 1, -1); /* Try again... */
316 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
317 break;
318 /* Now fallback to the default value. */
319 gl_state.current_syntax_table = current_buffer->syntax_table;
320 gl_state.use_global = 0;
322 /* Move to beg of previous line. */
323 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
326 /* Record what we found, for the next try. */
327 find_start_value = tem;
328 find_start_buffer = current_buffer;
329 find_start_modiff = MODIFF;
330 find_start_begv = BEGV;
331 find_start_pos = pos;
333 return find_start_value;
336 /* Checks whether FROM is the end of comment. Does not try to
337 fallback more than to STOP.
338 Returns -1 if cannot find comment ending at from, otherwise start
339 of comment. Global syntax data remains valid for
340 backward search starting at the returned value (or at FROM, if
341 the search was not successful). */
343 static int
344 back_comment (from, stop)
345 int from, stop;
347 /* Look back, counting the parity of string-quotes,
348 and recording the comment-starters seen.
349 When we reach a safe place, assume that's not in a string;
350 then step the main scan to the earliest comment-starter seen
351 an even number of string quotes away from the safe place.
353 OFROM[I] is position of the earliest comment-starter seen
354 which is I+2X quotes from the comment-end.
355 PARITY is current parity of quotes from the comment end. */
356 int parity = 0;
357 int my_stringend = 0;
358 int string_lossage = 0;
359 int comment_end = from;
360 int comstart_pos = 0;
361 int comstart_parity = 0;
362 int scanstart = from - 1;
363 register enum syntaxcode code;
364 int c, comstyle = 0;
366 /* At beginning of range to scan, we're outside of strings;
367 that determines quote parity to the comment-end. */
368 while (from != stop)
370 /* Move back and examine a character. */
371 DEC_POS (from);
372 UPDATE_SYNTAX_TABLE_BACKWARD (from);
374 c = FETCH_CHAR (from);
375 code = SYNTAX (c);
377 /* If this char is the second of a 2-char comment sequence,
378 back up and give the pair the appropriate syntax. */
379 if (from > stop && SYNTAX_COMEND_SECOND (c)
380 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
382 code = Sendcomment;
383 DEC_POS (from);
384 /* This is apparently the best we can do: */
385 UPDATE_SYNTAX_TABLE_BACKWARD (from);
386 c = FETCH_CHAR (from);
389 /* If this char starts a 2-char comment start sequence,
390 treat it like a 1-char comment starter. */
391 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
392 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
393 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
394 code = Scomment;
396 /* Ignore escaped characters. */
397 if (char_quoted (from))
398 continue;
400 /* Track parity of quotes. */
401 if (code == Sstring)
403 parity ^= 1;
404 if (my_stringend == 0)
405 my_stringend = c;
406 /* If we have two kinds of string delimiters.
407 There's no way to grok this scanning backwards. */
408 else if (my_stringend != c)
409 string_lossage = 1;
412 if (code == Sstring_fence || code == Scomment_fence)
414 parity ^= 1;
415 if (my_stringend == 0)
416 my_stringend =
417 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
418 /* If we have two kinds of string delimiters.
419 There's no way to grok this scanning backwards. */
420 else if (my_stringend != (code == Sstring_fence
421 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
422 string_lossage = 1;
425 /* Record comment-starters according to that
426 quote-parity to the comment-end. */
427 if (code == Scomment)
429 comstart_parity = parity;
430 comstart_pos = from;
433 /* If we find another earlier comment-ender,
434 any comment-starts earlier than that don't count
435 (because they go with the earlier comment-ender). */
436 if (code == Sendcomment
437 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
438 break;
440 /* Assume a defun-start point is outside of strings. */
441 if (code == Sopen
442 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
443 break;
446 if (comstart_pos == 0)
448 from = comment_end;
449 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
451 /* If the earliest comment starter
452 is followed by uniform paired string quotes or none,
453 we know it can't be inside a string
454 since if it were then the comment ender would be inside one.
455 So it does start a comment. Skip back to it. */
456 else if (comstart_parity == 0 && !string_lossage)
458 from = comstart_pos;
459 /* Globals are correct now. */
461 else
463 /* We had two kinds of string delimiters mixed up
464 together. Decode this going forwards.
465 Scan fwd from the previous comment ender
466 to the one in question; this records where we
467 last passed a comment starter. */
468 struct lisp_parse_state state;
469 scan_sexps_forward (&state, find_defun_start (comment_end),
470 comment_end - 1, -10000, 0, Qnil, 0);
471 if (state.incomment)
473 /* scan_sexps_forward changed the direction of search in
474 global variables, so we need to update it completely. */
476 from = state.comstr_start;
478 else
480 from = comment_end;
482 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
485 return from;
488 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
489 "Return t if OBJECT is a syntax table.\n\
490 Currently, any char-table counts as a syntax table.")
491 (object)
492 Lisp_Object object;
494 if (CHAR_TABLE_P (object)
495 && XCHAR_TABLE (object)->purpose == Qsyntax_table)
496 return Qt;
497 return Qnil;
500 static void
501 check_syntax_table (obj)
502 Lisp_Object obj;
504 if (!(CHAR_TABLE_P (obj)
505 && XCHAR_TABLE (obj)->purpose == Qsyntax_table))
506 wrong_type_argument (Qsyntax_table_p, obj);
509 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
510 "Return the current syntax table.\n\
511 This is the one specified by the current buffer.")
514 return current_buffer->syntax_table;
517 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
518 Sstandard_syntax_table, 0, 0, 0,
519 "Return the standard syntax table.\n\
520 This is the one used for new buffers.")
523 return Vstandard_syntax_table;
526 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
527 "Construct a new syntax table and return it.\n\
528 It is a copy of the TABLE, which defaults to the standard syntax table.")
529 (table)
530 Lisp_Object table;
532 Lisp_Object copy;
534 if (!NILP (table))
535 check_syntax_table (table);
536 else
537 table = Vstandard_syntax_table;
539 copy = Fcopy_sequence (table);
541 /* Only the standard syntax table should have a default element.
542 Other syntax tables should inherit from parents instead. */
543 XCHAR_TABLE (copy)->defalt = Qnil;
545 /* Copied syntax tables should all have parents.
546 If we copied one with no parent, such as the standard syntax table,
547 use the standard syntax table as the copy's parent. */
548 if (NILP (XCHAR_TABLE (copy)->parent))
549 Fset_char_table_parent (copy, Vstandard_syntax_table);
550 return copy;
553 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
554 "Select a new syntax table for the current buffer.\n\
555 One argument, a syntax table.")
556 (table)
557 Lisp_Object table;
559 check_syntax_table (table);
560 current_buffer->syntax_table = table;
561 /* Indicate that this buffer now has a specified syntax table. */
562 current_buffer->local_var_flags
563 |= XFASTINT (buffer_local_flags.syntax_table);
564 return table;
567 /* Convert a letter which signifies a syntax code
568 into the code it signifies.
569 This is used by modify-syntax-entry, and other things. */
571 unsigned char syntax_spec_code[0400] =
572 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
573 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
574 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
575 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
576 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
577 (char) Smath, 0377, 0377, (char) Squote,
578 (char) Sopen, (char) Sclose, 0377, 0377,
579 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
580 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
581 0377, 0377, 0377, 0377,
582 (char) Scomment, 0377, (char) Sendcomment, 0377,
583 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
584 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
585 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
586 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
587 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
588 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
589 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
590 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
593 /* Indexed by syntax code, give the letter that describes it. */
595 char syntax_code_spec[16] =
597 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
598 '!', '|'
601 /* Indexed by syntax code, give the object (cons of syntax code and
602 nil) to be stored in syntax table. Since these objects can be
603 shared among syntax tables, we generate them in advance. By
604 sharing objects, the function `describe-syntax' can give a more
605 compact listing. */
606 static Lisp_Object Vsyntax_code_object;
609 /* Look up the value for CHARACTER in syntax table TABLE's parent
610 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
611 for CHARACTER. It's actually used only when not compiled with GCC. */
613 Lisp_Object
614 syntax_parent_lookup (table, character)
615 Lisp_Object table;
616 int character;
618 Lisp_Object value;
620 while (1)
622 table = XCHAR_TABLE (table)->parent;
623 if (NILP (table))
624 return Qnil;
626 value = XCHAR_TABLE (table)->contents[character];
627 if (!NILP (value))
628 return value;
632 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
633 "Return the syntax code of CHARACTER, described by a character.\n\
634 For example, if CHARACTER is a word constituent,\n\
635 the character `w' is returned.\n\
636 The characters that correspond to various syntax codes\n\
637 are listed in the documentation of `modify-syntax-entry'.")
638 (character)
639 Lisp_Object character;
641 int char_int;
642 gl_state.current_syntax_table = current_buffer->syntax_table;
644 gl_state.use_global = 0;
645 CHECK_NUMBER (character, 0);
646 char_int = XINT (character);
647 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
650 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
651 "Return the matching parenthesis of CHARACTER, or nil if none.")
652 (character)
653 Lisp_Object character;
655 int char_int, code;
656 gl_state.current_syntax_table = current_buffer->syntax_table;
657 gl_state.use_global = 0;
658 CHECK_NUMBER (character, 0);
659 char_int = XINT (character);
660 code = SYNTAX (char_int);
661 if (code == Sopen || code == Sclose)
662 return make_number (SYNTAX_MATCH (char_int));
663 return Qnil;
666 /* This comment supplies the doc string for modify-syntax-entry,
667 for make-docfile to see. We cannot put this in the real DEFUN
668 due to limits in the Unix cpp.
670 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
671 "Set syntax for character CHAR according to string S.\n\
672 The syntax is changed only for table TABLE, which defaults to\n\
673 the current buffer's syntax table.\n\
674 The first character of S should be one of the following:\n\
675 Space or - whitespace syntax. w word constituent.\n\
676 _ symbol constituent. . punctuation.\n\
677 ( open-parenthesis. ) close-parenthesis.\n\
678 \" string quote. \\ escape.\n\
679 $ paired delimiter. ' expression quote or prefix operator.\n\
680 < comment starter. > comment ender.\n\
681 / character-quote. @ inherit from `standard-syntax-table'.\n\
683 Only single-character comment start and end sequences are represented thus.\n\
684 Two-character sequences are represented as described below.\n\
685 The second character of S is the matching parenthesis,\n\
686 used only if the first character is `(' or `)'.\n\
687 Any additional characters are flags.\n\
688 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
689 1 means CHAR is the start of a two-char comment start sequence.\n\
690 2 means CHAR is the second character of such a sequence.\n\
691 3 means CHAR is the start of a two-char comment end sequence.\n\
692 4 means CHAR is the second character of such a sequence.\n\
694 There can be up to two orthogonal comment sequences. This is to support\n\
695 language modes such as C++. By default, all comment sequences are of style\n\
696 a, but you can set the comment sequence style to b (on the second character\n\
697 of a comment-start, or the first character of a comment-end sequence) using\n\
698 this flag:\n\
699 b means CHAR is part of comment sequence b.\n\
701 p means CHAR is a prefix character for `backward-prefix-chars';\n\
702 such characters are treated as whitespace when they occur\n\
703 between expressions.")
704 (char, s, table)
707 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
708 /* I really don't know why this is interactive
709 help-form should at least be made useful whilst reading the second arg
711 "cSet syntax for character: \nsSet syntax for %s to: ",
712 0 /* See immediately above */)
713 (c, newentry, syntax_table)
714 Lisp_Object c, newentry, syntax_table;
716 register unsigned char *p;
717 register enum syntaxcode code;
718 int val;
719 Lisp_Object match;
721 CHECK_NUMBER (c, 0);
722 CHECK_STRING (newentry, 1);
724 if (NILP (syntax_table))
725 syntax_table = current_buffer->syntax_table;
726 else
727 check_syntax_table (syntax_table);
729 p = XSTRING (newentry)->data;
730 code = (enum syntaxcode) syntax_spec_code[*p++];
731 if (((int) code & 0377) == 0377)
732 error ("invalid syntax description letter: %c", c);
734 if (code == Sinherit)
736 SET_RAW_SYNTAX_ENTRY (syntax_table, c, Qnil);
737 return Qnil;
740 if (*p)
742 int len;
743 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
744 len);
745 XSETINT (match, character);
746 if (XFASTINT (match) == ' ')
747 match = Qnil;
748 p += len;
750 else
751 match = Qnil;
753 val = (int) code;
754 while (*p)
755 switch (*p++)
757 case '1':
758 val |= 1 << 16;
759 break;
761 case '2':
762 val |= 1 << 17;
763 break;
765 case '3':
766 val |= 1 << 18;
767 break;
769 case '4':
770 val |= 1 << 19;
771 break;
773 case 'p':
774 val |= 1 << 20;
775 break;
777 case 'b':
778 val |= 1 << 21;
779 break;
782 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
783 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
784 else
785 /* Since we can't use a shared object, let's make a new one. */
786 newentry = Fcons (make_number (val), match);
788 SET_RAW_SYNTAX_ENTRY (syntax_table, c, newentry);
790 return Qnil;
793 /* Dump syntax table to buffer in human-readable format */
795 static void
796 describe_syntax (value)
797 Lisp_Object value;
799 register enum syntaxcode code;
800 char desc, match, start1, start2, end1, end2, prefix, comstyle;
801 char str[2];
802 Lisp_Object first, match_lisp;
804 Findent_to (make_number (16), make_number (1));
806 if (NILP (value))
808 insert_string ("default\n");
809 return;
812 if (CHAR_TABLE_P (value))
814 insert_string ("deeper char-table ...\n");
815 return;
818 if (!CONSP (value))
820 insert_string ("invalid\n");
821 return;
824 first = XCONS (value)->car;
825 match_lisp = XCONS (value)->cdr;
827 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
829 insert_string ("invalid\n");
830 return;
833 code = (enum syntaxcode) (first & 0377);
834 start1 = (XINT (first) >> 16) & 1;
835 start2 = (XINT (first) >> 17) & 1;
836 end1 = (XINT (first) >> 18) & 1;
837 end2 = (XINT (first) >> 19) & 1;
838 prefix = (XINT (first) >> 20) & 1;
839 comstyle = (XINT (first) >> 21) & 1;
841 if ((int) code < 0 || (int) code >= (int) Smax)
843 insert_string ("invalid");
844 return;
846 desc = syntax_code_spec[(int) code];
848 str[0] = desc, str[1] = 0;
849 insert (str, 1);
851 if (NILP (match_lisp))
852 insert (" ", 1);
853 else
854 insert_char (XINT (match_lisp));
856 if (start1)
857 insert ("1", 1);
858 if (start2)
859 insert ("2", 1);
861 if (end1)
862 insert ("3", 1);
863 if (end2)
864 insert ("4", 1);
866 if (prefix)
867 insert ("p", 1);
868 if (comstyle)
869 insert ("b", 1);
871 insert_string ("\twhich means: ");
873 switch (SWITCH_ENUM_CAST (code))
875 case Swhitespace:
876 insert_string ("whitespace"); break;
877 case Spunct:
878 insert_string ("punctuation"); break;
879 case Sword:
880 insert_string ("word"); break;
881 case Ssymbol:
882 insert_string ("symbol"); break;
883 case Sopen:
884 insert_string ("open"); break;
885 case Sclose:
886 insert_string ("close"); break;
887 case Squote:
888 insert_string ("quote"); break;
889 case Sstring:
890 insert_string ("string"); break;
891 case Smath:
892 insert_string ("math"); break;
893 case Sescape:
894 insert_string ("escape"); break;
895 case Scharquote:
896 insert_string ("charquote"); break;
897 case Scomment:
898 insert_string ("comment"); break;
899 case Sendcomment:
900 insert_string ("endcomment"); break;
901 default:
902 insert_string ("invalid");
903 return;
906 if (!NILP (match_lisp))
908 insert_string (", matches ");
909 insert_char (XINT (match_lisp));
912 if (start1)
913 insert_string (",\n\t is the first character of a comment-start sequence");
914 if (start2)
915 insert_string (",\n\t is the second character of a comment-start sequence");
917 if (end1)
918 insert_string (",\n\t is the first character of a comment-end sequence");
919 if (end2)
920 insert_string (",\n\t is the second character of a comment-end sequence");
921 if (comstyle)
922 insert_string (" (comment style b)");
924 if (prefix)
925 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
927 insert_string ("\n");
930 static Lisp_Object
931 describe_syntax_1 (vector)
932 Lisp_Object vector;
934 struct buffer *old = current_buffer;
935 set_buffer_internal (XBUFFER (Vstandard_output));
936 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
937 while (! NILP (XCHAR_TABLE (vector)->parent))
939 vector = XCHAR_TABLE (vector)->parent;
940 insert_string ("\nThe parent syntax table is:");
941 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
944 call0 (intern ("help-mode"));
945 set_buffer_internal (old);
946 return Qnil;
949 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
950 "Describe the syntax specifications in the syntax table.\n\
951 The descriptions are inserted in a buffer, which is then displayed.")
954 internal_with_output_to_temp_buffer
955 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
957 return Qnil;
960 int parse_sexp_ignore_comments;
962 /* Return the position across COUNT words from FROM.
963 If that many words cannot be found before the end of the buffer, return 0.
964 COUNT negative means scan backward and stop at word beginning. */
966 scan_words (from, count)
967 register int from, count;
969 register int beg = BEGV;
970 register int end = ZV;
971 register enum syntaxcode code;
972 int ch0, ch1;
973 int temp_pos;
975 immediate_quit = 1;
976 QUIT;
978 SETUP_SYNTAX_TABLE (from, count);
980 while (count > 0)
982 while (1)
984 if (from == end)
986 immediate_quit = 0;
987 return 0;
989 UPDATE_SYNTAX_TABLE_FORWARD (from);
990 ch0 = FETCH_CHAR (from);
991 code = SYNTAX (ch0);
992 INC_POS (from);
993 if (words_include_escapes
994 && (code == Sescape || code == Scharquote))
995 break;
996 if (code == Sword)
997 break;
999 /* Now CH0 is a character which begins a word and FROM is the
1000 position of the next character. */
1001 while (1)
1003 if (from == end) break;
1004 UPDATE_SYNTAX_TABLE_FORWARD (from);
1005 ch1 = FETCH_CHAR (from);
1006 code = SYNTAX (ch1);
1007 if (!(words_include_escapes
1008 && (code == Sescape || code == Scharquote)))
1009 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1010 break;
1011 INC_POS (from);
1012 ch0 = ch1;
1014 count--;
1016 while (count < 0)
1018 while (1)
1020 if (from == beg)
1022 immediate_quit = 0;
1023 return 0;
1025 DEC_POS (from);
1026 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1027 ch1 = FETCH_CHAR (from);
1028 code = SYNTAX (ch1);
1029 if (words_include_escapes
1030 && (code == Sescape || code == Scharquote))
1031 break;
1032 if (code == Sword)
1033 break;
1035 /* Now CH1 is a character which ends a word and FROM is the
1036 position of it. */
1037 while (1)
1039 if (from == beg) break;
1040 temp_pos = from;
1041 DEC_POS (temp_pos);
1042 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1043 ch0 = FETCH_CHAR (temp_pos);
1044 code = SYNTAX (ch0);
1045 if (!(words_include_escapes
1046 && (code == Sescape || code == Scharquote)))
1047 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1048 break;
1049 from = temp_pos;
1050 ch1 = ch0;
1052 count++;
1055 immediate_quit = 0;
1057 return from;
1060 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1061 "Move point forward ARG words (backward if ARG is negative).\n\
1062 Normally returns t.\n\
1063 If an edge of the buffer is reached, point is left there\n\
1064 and nil is returned.")
1065 (count)
1066 Lisp_Object count;
1068 int val;
1069 CHECK_NUMBER (count, 0);
1071 if (!(val = scan_words (PT, XINT (count))))
1073 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1074 return Qnil;
1076 SET_PT (val);
1077 return Qt;
1080 Lisp_Object skip_chars ();
1082 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1083 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1084 STRING is like the inside of a `[...]' in a regular expression\n\
1085 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1086 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1087 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1088 Returns the distance traveled, either zero or positive.")
1089 (string, lim)
1090 Lisp_Object string, lim;
1092 return skip_chars (1, 0, string, lim);
1095 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1096 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1097 See `skip-chars-forward' for details.\n\
1098 Returns the distance traveled, either zero or negative.")
1099 (string, lim)
1100 Lisp_Object string, lim;
1102 return skip_chars (0, 0, string, lim);
1105 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1106 "Move point forward across chars in specified syntax classes.\n\
1107 SYNTAX is a string of syntax code characters.\n\
1108 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1109 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1110 This function returns the distance traveled, either zero or positive.")
1111 (syntax, lim)
1112 Lisp_Object syntax, lim;
1114 return skip_chars (1, 1, syntax, lim);
1117 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1118 "Move point backward across chars in specified syntax classes.\n\
1119 SYNTAX is a string of syntax code characters.\n\
1120 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1121 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1122 This function returns the distance traveled, either zero or negative.")
1123 (syntax, lim)
1124 Lisp_Object syntax, lim;
1126 return skip_chars (0, 1, syntax, lim);
1129 Lisp_Object
1130 skip_chars (forwardp, syntaxp, string, lim)
1131 int forwardp, syntaxp;
1132 Lisp_Object string, lim;
1134 register unsigned char *p, *pend;
1135 register unsigned int c;
1136 register int ch;
1137 unsigned char fastmap[0400];
1138 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1139 of which codes don't fit in FASTMAP. In that case, we set the
1140 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1141 and set the actual ranges of characters in CHAR_RANGES. In the
1142 form "X-Y" of STRING, both X and Y must belong to the same
1143 character set because a range striding across character sets is
1144 meaningless. */
1145 int *char_ranges
1146 = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1147 int n_char_ranges = 0;
1148 int negate = 0;
1149 register int i;
1150 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1152 CHECK_STRING (string, 0);
1154 if (NILP (lim))
1155 XSETINT (lim, forwardp ? ZV : BEGV);
1156 else
1157 CHECK_NUMBER_COERCE_MARKER (lim, 1);
1159 /* In any case, don't allow scan outside bounds of buffer. */
1160 /* jla turned this off, for no known reason.
1161 bfox turned the ZV part on, and rms turned the
1162 BEGV part back on. */
1163 if (XINT (lim) > ZV)
1164 XSETFASTINT (lim, ZV);
1165 if (XINT (lim) < BEGV)
1166 XSETFASTINT (lim, BEGV);
1168 p = XSTRING (string)->data;
1169 pend = p + XSTRING (string)->size;
1170 bzero (fastmap, sizeof fastmap);
1172 if (p != pend && *p == '^')
1174 negate = 1; p++;
1177 /* Find the characters specified and set their elements of fastmap.
1178 If syntaxp, each character counts as itself.
1179 Otherwise, handle backslashes and ranges specially. */
1181 while (p != pend)
1183 c = *p;
1184 if (multibyte)
1186 ch = STRING_CHAR (p, pend - p);
1187 p += BYTES_BY_CHAR_HEAD (*p);
1189 else
1191 ch = c;
1192 p++;
1194 if (syntaxp)
1195 fastmap[syntax_spec_code[c]] = 1;
1196 else
1198 if (c == '\\')
1200 if (p == pend) break;
1201 c = *p++;
1203 if (p != pend && *p == '-')
1205 unsigned int ch2;
1207 p++;
1208 if (p == pend) break;
1209 if (SINGLE_BYTE_CHAR_P (ch))
1210 while (c <= *p)
1212 fastmap[c] = 1;
1213 c++;
1215 else
1217 fastmap[c] = 1; /* C is the base leading-code. */
1218 ch2 = STRING_CHAR (p, pend - p);
1219 if (ch <= ch2)
1220 char_ranges[n_char_ranges++] = ch,
1221 char_ranges[n_char_ranges++] = ch2;
1223 p += multibyte ? BYTES_BY_CHAR_HEAD (*p) : 1;
1225 else
1227 fastmap[c] = 1;
1228 if (!SINGLE_BYTE_CHAR_P (ch))
1230 char_ranges[n_char_ranges++] = ch;
1231 char_ranges[n_char_ranges++] = ch;
1237 /* If ^ was the first character, complement the fastmap. In
1238 addition, as all multibyte characters have possibility of
1239 matching, set all entries for base leading codes, which is
1240 harmless even if SYNTAXP is 1. */
1242 if (negate)
1243 for (i = 0; i < sizeof fastmap; i++)
1245 if (!multibyte || !BASE_LEADING_CODE_P (i))
1246 fastmap[i] ^= 1;
1247 else
1248 fastmap[i] = 1;
1252 int start_point = PT;
1253 int pos = PT;
1255 immediate_quit = 1;
1256 if (syntaxp)
1258 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1259 if (forwardp)
1261 if (multibyte)
1263 while (pos < XINT (lim)
1264 && fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1266 INC_POS (pos);
1267 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1270 else
1272 while (pos < XINT (lim)
1273 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1275 pos++;
1276 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1280 else
1282 if (multibyte)
1284 while (pos > XINT (lim))
1286 int savepos = pos;
1287 DEC_POS (pos);
1288 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1289 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1291 pos = savepos;
1292 break;
1296 else
1298 while (pos > XINT (lim))
1300 pos--;
1301 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1302 if (!fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1304 pos++;
1305 break;
1311 else
1313 if (forwardp)
1315 if (multibyte)
1316 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos))])
1318 if (!BASE_LEADING_CODE_P (c))
1319 pos++;
1320 else if (n_char_ranges)
1322 /* We much check CHAR_RANGES for a multibyte
1323 character. */
1324 ch = FETCH_MULTIBYTE_CHAR (pos);
1325 for (i = 0; i < n_char_ranges; i += 2)
1326 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1327 break;
1328 if (!(negate ^ (i < n_char_ranges)))
1329 break;
1331 INC_POS (pos);
1333 else
1335 if (!negate) break;
1336 INC_POS (pos);
1339 else
1340 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1341 pos++;
1343 else
1345 if (multibyte)
1346 while (pos > XINT (lim))
1348 int savepos = pos;
1349 DEC_POS (pos);
1350 if (fastmap[(c = FETCH_BYTE (pos))])
1352 if (!BASE_LEADING_CODE_P (c))
1354 else if (n_char_ranges)
1356 /* We much check CHAR_RANGES for a multibyte
1357 character. */
1358 ch = FETCH_MULTIBYTE_CHAR (pos);
1359 for (i = 0; i < n_char_ranges; i += 2)
1360 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1361 break;
1362 if (!(negate ^ (i < n_char_ranges)))
1364 pos = savepos;
1365 break;
1368 else
1369 if (!negate)
1371 pos = savepos;
1372 break;
1375 else
1377 pos = savepos;
1378 break;
1381 else
1382 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1383 pos--;
1387 if (multibyte
1388 /* INC_POS or DEC_POS might have moved POS over LIM. */
1389 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1390 pos = XINT (lim);
1392 SET_PT (pos);
1393 immediate_quit = 0;
1395 return make_number (PT - start_point);
1399 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1400 "Move forward across up to N comments. If N is negative, move backward.\n\
1401 Stop scanning if we find something other than a comment or whitespace.\n\
1402 Set point to where scanning stops.\n\
1403 If N comments are found as expected, with nothing except whitespace\n\
1404 between them, return t; otherwise return nil.")
1405 (count)
1406 Lisp_Object count;
1408 register int from;
1409 register int stop;
1410 register int c, c1;
1411 register enum syntaxcode code;
1412 int comstyle = 0; /* style of comment encountered */
1413 int found;
1414 int count1;
1415 int temp_pos;
1417 CHECK_NUMBER (count, 0);
1418 count1 = XINT (count);
1419 stop = count1 > 0 ? ZV : BEGV;
1421 immediate_quit = 1;
1422 QUIT;
1424 from = PT;
1426 SETUP_SYNTAX_TABLE (from, count1);
1427 while (count1 > 0)
1431 if (from == stop)
1433 SET_PT (from);
1434 immediate_quit = 0;
1435 return Qnil;
1437 UPDATE_SYNTAX_TABLE_FORWARD (from);
1438 c = FETCH_CHAR (from);
1439 code = SYNTAX (c);
1440 INC_POS (from);
1441 comstyle = 0;
1442 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1443 && (c1 = FETCH_CHAR (from),
1444 SYNTAX_COMSTART_SECOND (c1)))
1446 /* We have encountered a comment start sequence and we
1447 are ignoring all text inside comments. We must record
1448 the comment style this sequence begins so that later,
1449 only a comment end of the same style actually ends
1450 the comment section. */
1451 code = Scomment;
1452 comstyle = SYNTAX_COMMENT_STYLE (c1);
1453 INC_POS (from);
1456 while (code == Swhitespace || code == Sendcomment);
1457 if (code != Scomment && code != Scomment_fence)
1459 immediate_quit = 0;
1460 DEC_POS (from);
1461 SET_PT (from);
1462 return Qnil;
1464 /* We're at the start of a comment. */
1465 while (1)
1467 if (from == stop)
1469 immediate_quit = 0;
1470 SET_PT (from);
1471 return Qnil;
1473 UPDATE_SYNTAX_TABLE_FORWARD (from);
1474 c = FETCH_CHAR (from);
1475 INC_POS (from);
1476 if (SYNTAX (c) == Sendcomment
1477 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1478 /* we have encountered a comment end of the same style
1479 as the comment sequence which began this comment
1480 section */
1481 break;
1482 if (SYNTAX (c) == Scomment_fence
1483 && comstyle == ST_COMMENT_STYLE)
1484 /* we have encountered a comment end of the same style
1485 as the comment sequence which began this comment
1486 section. */
1487 break;
1488 if (from < stop && SYNTAX_COMEND_FIRST (c)
1489 && (c1 = FETCH_CHAR (from),
1490 SYNTAX_COMEND_SECOND (c1))
1491 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1492 /* we have encountered a comment end of the same style
1493 as the comment sequence which began this comment
1494 section */
1495 { INC_POS (from); break; }
1497 /* We have skipped one comment. */
1498 count1--;
1501 while (count1 < 0)
1503 while (from > stop)
1505 int quoted;
1507 DEC_POS (from);
1508 quoted = char_quoted (from);
1509 if (quoted)
1511 DEC_POS (from);
1512 goto leave; /* ????? XXXXX */
1514 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1515 c = FETCH_CHAR (from);
1516 code = SYNTAX (c);
1517 comstyle = 0;
1518 if (code == Sendcomment)
1519 comstyle = SYNTAX_COMMENT_STYLE (c);
1520 temp_pos = from;
1521 DEC_POS (temp_pos);
1522 if (from > stop && SYNTAX_COMEND_SECOND (c)
1523 && (c1 = FETCH_CHAR (temp_pos),
1524 SYNTAX_COMEND_FIRST (c1))
1525 && !char_quoted (temp_pos))
1527 /* We must record the comment style encountered so that
1528 later, we can match only the proper comment begin
1529 sequence of the same style. */
1530 code = Sendcomment;
1531 comstyle = SYNTAX_COMMENT_STYLE (c1);
1532 from = temp_pos;
1535 if (code == Scomment_fence)
1537 /* Skip until first preceding unquoted comment_fence. */
1538 int found = 0, ini = from;
1540 while (--from != stop)
1542 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1543 c = FETCH_CHAR (from);
1544 if (SYNTAX (c) == Scomment_fence && !char_quoted (from))
1546 found = 1;
1547 break;
1550 if (found == 0)
1552 from = ini; /* Set point to ini + 1. */
1553 goto leave;
1556 else if (code == Sendcomment)
1558 #if 0
1559 if (code != SYNTAX (c))
1560 /* For a two-char comment ender, we can assume
1561 it does end a comment. So scan back in a simple way. */
1563 if (from != stop) DEC_POS (from);
1564 while (1)
1566 if ((c = FETCH_CHAR (from),
1567 SYNTAX (c) == Scomment)
1568 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1569 break;
1570 if (from == stop)
1572 immediate_quit = 0;
1573 SET_PT (from);
1574 return Qnil;
1576 DEC_POS (from);
1577 if (SYNTAX_COMSTART_SECOND (c)
1578 && (c1 = FETCH_CHAR (from),
1579 SYNTAX_COMSTART_FIRST (c1))
1580 && SYNTAX_COMMENT_STYLE (c) == comstyle
1581 && !char_quoted (from))
1582 break;
1584 break;
1586 #endif /* 0 */
1587 found = back_comment (from, stop);
1588 if (found != -1) from = found;
1589 #if 0
1590 /* Look back, counting the parity of string-quotes,
1591 and recording the comment-starters seen.
1592 When we reach a safe place, assume that's not in a string;
1593 then step the main scan to the earliest comment-starter seen
1594 an even number of string quotes away from the safe place.
1596 OFROM[I] is position of the earliest comment-starter seen
1597 which is I+2X quotes from the comment-end.
1598 PARITY is current parity of quotes from the comment end. */
1600 int parity = 0;
1601 char my_stringend = 0;
1602 int string_lossage = 0;
1603 int comment_end = from;
1604 int comstart_pos = 0;
1605 int comstart_parity = 0;
1606 int scanstart = from;
1608 DEC_POS (scanstart);
1609 /* At beginning of range to scan, we're outside of strings;
1610 that determines quote parity to the comment-end. */
1611 while (from != stop)
1613 /* Move back and examine a character. */
1614 DEC_POS (from);
1616 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1617 c = FETCH_CHAR (from);
1618 code = SYNTAX (c);
1620 /* If this char is the second of a 2-char comment sequence,
1621 back up and give the pair the appropriate syntax. */
1622 temp_pos = from;
1623 DEC_POS (temp_pos);
1624 if (from > stop && SYNTAX_COMEND_SECOND (c)
1625 && (c1 = FETCH_CHAR (temp_pos),
1626 SYNTAX_COMEND_FIRST (c1)))
1628 code = Sendcomment;
1629 from = temp_pos;
1630 c = c1;
1633 temp_pos = from;
1634 INC_POS (temp_pos);
1635 /* If this char starts a 2-char comment start sequence,
1636 treat it like a 1-char comment starter. */
1637 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1638 && (c1 = FETCH_CHAR (temp_pos),
1639 SYNTAX_COMSTART_SECOND (c1))
1640 && comstyle == SYNTAX_COMMENT_STYLE (c1))
1641 code = Scomment;
1643 /* Ignore escaped characters. */
1644 if (char_quoted (from))
1645 continue;
1647 /* Track parity of quotes. */
1648 if (code == Sstring)
1650 parity ^= 1;
1651 if (my_stringend == 0)
1652 my_stringend = c;
1653 /* If we have two kinds of string delimiters.
1654 There's no way to grok this scanning backwards. */
1655 else if (my_stringend != c)
1656 string_lossage = 1;
1659 /* Record comment-starters according to that
1660 quote-parity to the comment-end. */
1661 if (code == Scomment)
1663 comstart_parity = parity;
1664 comstart_pos = from;
1667 /* If we find another earlier comment-ender,
1668 any comment-starts earlier than that don't count
1669 (because they go with the earlier comment-ender). */
1670 if (code == Sendcomment
1671 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1672 break;
1674 /* Assume a defun-start point is outside of strings. */
1675 if (code == Sopen
1676 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
1677 break;
1680 if (comstart_pos == 0)
1681 from = comment_end;
1682 /* If the earliest comment starter
1683 is followed by uniform paired string quotes or none,
1684 we know it can't be inside a string
1685 since if it were then the comment ender would be inside one.
1686 So it does start a comment. Skip back to it. */
1687 else if (comstart_parity == 0 && !string_lossage)
1688 from = comstart_pos;
1689 else
1691 /* We had two kinds of string delimiters mixed up
1692 together. Decode this going forwards.
1693 Scan fwd from the previous comment ender
1694 to the one in question; this records where we
1695 last passed a comment starter. */
1696 struct lisp_parse_state state;
1697 scan_sexps_forward (&state, find_defun_start (comment_end),
1698 comment_end - 1, -10000, 0, Qnil, 0);
1699 if (state.incomment)
1700 from = state.comstr_start;
1701 else
1702 /* We can't grok this as a comment; scan it normally. */
1703 from = comment_end;
1706 #endif /* 0 */
1707 /* We have skipped one comment. */
1708 break;
1710 else if (code != Swhitespace && code != Scomment)
1712 leave:
1713 immediate_quit = 0;
1714 INC_POS (from);
1715 SET_PT (from);
1716 return Qnil;
1720 count1++;
1723 SET_PT (from);
1724 immediate_quit = 0;
1725 return Qt;
1728 Lisp_Object
1729 scan_lists (from, count, depth, sexpflag)
1730 register int from;
1731 int count, depth, sexpflag;
1733 Lisp_Object val;
1734 register int stop = count > 0 ? ZV : BEGV;
1735 register int c, c1;
1736 int stringterm;
1737 int quoted;
1738 int mathexit = 0;
1739 register enum syntaxcode code, temp_code;
1740 int min_depth = depth; /* Err out if depth gets less than this. */
1741 int comstyle = 0; /* style of comment encountered */
1742 int temp_pos;
1743 int last_good = from;
1744 int found;
1746 if (depth > 0) min_depth = 0;
1748 immediate_quit = 1;
1749 QUIT;
1751 SETUP_SYNTAX_TABLE (from, count);
1752 while (count > 0)
1754 while (from < stop)
1756 UPDATE_SYNTAX_TABLE_FORWARD (from);
1757 c = FETCH_CHAR (from);
1758 code = SYNTAX (c);
1759 if (depth == min_depth)
1760 last_good = from;
1761 INC_POS (from);
1762 UPDATE_SYNTAX_TABLE_FORWARD (from);
1763 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1764 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
1765 && parse_sexp_ignore_comments)
1767 /* we have encountered a comment start sequence and we
1768 are ignoring all text inside comments. We must record
1769 the comment style this sequence begins so that later,
1770 only a comment end of the same style actually ends
1771 the comment section */
1772 code = Scomment;
1773 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1774 INC_POS (from);
1777 UPDATE_SYNTAX_TABLE_FORWARD (from);
1778 if (SYNTAX_PREFIX (c))
1779 continue;
1781 switch (SWITCH_ENUM_CAST (code))
1783 case Sescape:
1784 case Scharquote:
1785 if (from == stop) goto lose;
1786 INC_POS (from);
1787 /* treat following character as a word constituent */
1788 case Sword:
1789 case Ssymbol:
1790 if (depth || !sexpflag) break;
1791 /* This word counts as a sexp; return at end of it. */
1792 while (from < stop)
1794 UPDATE_SYNTAX_TABLE_FORWARD (from);
1795 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1797 case Scharquote:
1798 case Sescape:
1799 INC_POS (from);
1800 if (from == stop) goto lose;
1801 break;
1802 case Sword:
1803 case Ssymbol:
1804 case Squote:
1805 break;
1806 default:
1807 goto done;
1809 INC_POS (from);
1811 goto done;
1813 case Scomment:
1814 case Scomment_fence:
1815 if (!parse_sexp_ignore_comments) break;
1816 while (1)
1818 if (from == stop)
1820 if (depth == 0)
1821 goto done;
1822 goto lose;
1824 UPDATE_SYNTAX_TABLE_FORWARD (from);
1825 c = FETCH_CHAR (from);
1826 if (code == Scomment
1827 ? (SYNTAX (c) == Sendcomment
1828 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1829 : (SYNTAX (c) == Scomment_fence))
1830 /* we have encountered a comment end of the same style
1831 as the comment sequence which began this comment
1832 section */
1833 break;
1834 INC_POS (from);
1835 if (from < stop && SYNTAX_COMEND_FIRST (c)
1836 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1837 && SYNTAX_COMMENT_STYLE (c) == comstyle
1838 && code == Scomment)
1839 /* we have encountered a comment end of the same style
1840 as the comment sequence which began this comment
1841 section */
1842 { INC_POS (from); break; }
1844 break;
1846 case Smath:
1847 if (!sexpflag)
1848 break;
1849 if (from != stop && c == FETCH_CHAR (from))
1850 INC_POS (from);
1851 if (mathexit)
1853 mathexit = 0;
1854 goto close1;
1856 mathexit = 1;
1858 case Sopen:
1859 if (!++depth) goto done;
1860 break;
1862 case Sclose:
1863 close1:
1864 if (!--depth) goto done;
1865 if (depth < min_depth)
1866 Fsignal (Qscan_error,
1867 Fcons (build_string ("Containing expression ends prematurely"),
1868 Fcons (make_number (last_good),
1869 Fcons (make_number (from), Qnil))));
1870 break;
1872 case Sstring:
1873 case Sstring_fence:
1874 temp_pos = from;
1875 DEC_POS (temp_pos);
1876 stringterm = FETCH_CHAR (temp_pos);
1877 while (1)
1879 if (from >= stop) goto lose;
1880 UPDATE_SYNTAX_TABLE_FORWARD (from);
1881 if (code == Sstring
1882 ? (FETCH_CHAR (from) == stringterm)
1883 : SYNTAX (FETCH_CHAR (from)) == Sstring_fence)
1884 break;
1885 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1887 case Scharquote:
1888 case Sescape:
1889 INC_POS (from);
1891 INC_POS (from);
1893 INC_POS (from);
1894 if (!depth && sexpflag) goto done;
1895 break;
1899 /* Reached end of buffer. Error if within object, return nil if between */
1900 if (depth) goto lose;
1902 immediate_quit = 0;
1903 return Qnil;
1905 /* End of object reached */
1906 done:
1907 count--;
1910 while (count < 0)
1912 while (from > stop)
1914 DEC_POS (from);
1915 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1916 if (quoted = char_quoted (from))
1918 DEC_POS (from);
1919 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1921 c = FETCH_CHAR (from);
1922 code = SYNTAX (c);
1923 if (depth == min_depth)
1924 last_good = from;
1925 comstyle = 0;
1926 if (code == Sendcomment)
1927 comstyle = SYNTAX_COMMENT_STYLE (c);
1928 temp_pos = from;
1929 DEC_POS (temp_pos);
1930 if (from > stop && SYNTAX_COMEND_SECOND (c)
1931 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1932 && !char_quoted (temp_pos)
1933 && parse_sexp_ignore_comments)
1935 /* we must record the comment style encountered so that
1936 later, we can match only the proper comment begin
1937 sequence of the same style */
1938 code = Sendcomment;
1939 comstyle = SYNTAX_COMMENT_STYLE (c1);
1940 from = temp_pos;
1943 if (SYNTAX_PREFIX (c))
1944 continue;
1946 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1948 case Sword:
1949 case Ssymbol:
1950 if (depth || !sexpflag) break;
1951 /* This word counts as a sexp; count object finished
1952 after passing it. */
1953 while (from > stop)
1955 temp_pos = from;
1956 DEC_POS (temp_pos);
1957 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1958 quoted = char_quoted (temp_pos);
1959 if (quoted)
1961 from = temp_pos;
1962 DEC_POS (temp_pos);
1963 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1965 c1 = FETCH_CHAR (temp_pos);
1966 temp_code = SYNTAX (c1);
1967 if (! (quoted || temp_code == Sword
1968 || temp_code == Ssymbol
1969 || temp_code == Squote))
1970 goto done2;
1971 from = temp_pos;
1973 goto done2;
1975 case Smath:
1976 if (!sexpflag)
1977 break;
1978 temp_pos = from;
1979 DEC_POS (temp_pos);
1980 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1981 if (from != stop && c == FETCH_CHAR (temp_pos))
1982 from = temp_pos;
1983 if (mathexit)
1985 mathexit = 0;
1986 goto open2;
1988 mathexit = 1;
1990 case Sclose:
1991 if (!++depth) goto done2;
1992 break;
1994 case Sopen:
1995 open2:
1996 if (!--depth) goto done2;
1997 if (depth < min_depth)
1998 Fsignal (Qscan_error,
1999 Fcons (build_string ("Containing expression ends prematurely"),
2000 Fcons (make_number (last_good),
2001 Fcons (make_number (from), Qnil))));
2002 break;
2004 case Sendcomment:
2005 if (!parse_sexp_ignore_comments)
2006 break;
2007 #if 0
2008 if (code != SYNTAX (c))
2009 /* For a two-char comment ender, we can assume
2010 it does end a comment. So scan back in a simple way. */
2012 if (from != stop) DEC_POS (from);
2013 while (1)
2015 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
2016 && SYNTAX_COMMENT_STYLE (c) == comstyle)
2017 break;
2018 if (from == stop)
2020 if (depth == 0)
2021 goto done2;
2022 goto lose;
2024 DEC_POS (from);
2025 if (SYNTAX_COMSTART_SECOND (c)
2026 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
2027 && SYNTAX_COMMENT_STYLE (c) == comstyle
2028 && !char_quoted (from))
2029 break;
2031 break;
2033 #endif /* 0 */
2034 found = back_comment (from, stop);
2035 if (found != -1) from = found;
2036 #if 0
2037 /* Look back, counting the parity of string-quotes,
2038 and recording the comment-starters seen.
2039 When we reach a safe place, assume that's not in a string;
2040 then step the main scan to the earliest comment-starter seen
2041 an even number of string quotes away from the safe place.
2043 OFROM[I] is position of the earliest comment-starter seen
2044 which is I+2X quotes from the comment-end.
2045 PARITY is current parity of quotes from the comment end. */
2047 int parity = 0;
2048 char my_stringend = 0;
2049 int string_lossage = 0;
2050 int comment_end = from;
2051 int comstart_pos = 0;
2052 int comstart_parity = 0;
2053 int scanstart = from;
2055 DEC_POS (scanstart);
2057 /* At beginning of range to scan, we're outside of strings;
2058 that determines quote parity to the comment-end. */
2059 while (from != stop)
2061 /* Move back and examine a character. */
2062 DEC_POS (from);
2064 c = FETCH_CHAR (from);
2065 code = SYNTAX (c);
2067 /* If this char is the second of a 2-char comment sequence,
2068 back up and give the pair the appropriate syntax. */
2069 temp_pos = from;
2070 DEC_POS (temp_pos);
2071 if (from > stop && SYNTAX_COMEND_SECOND (c)
2072 && (c1 = FETCH_CHAR (temp_pos),
2073 SYNTAX_COMEND_FIRST (c1)))
2075 code = Sendcomment;
2076 from = temp_pos;
2077 c = c1;
2080 /* If this char starts a 2-char comment start sequence,
2081 treat it like a 1-char comment starter. */
2082 temp_pos = from;
2083 INC_POS (temp_pos);
2084 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
2085 && (c1 = FETCH_CHAR (temp_pos),
2086 SYNTAX_COMSTART_SECOND (c1))
2087 && comstyle == SYNTAX_COMMENT_STYLE (c1))
2088 code = Scomment;
2090 /* Ignore escaped characters. */
2091 if (char_quoted (from))
2092 continue;
2094 /* Track parity of quotes. */
2095 if (code == Sstring)
2097 parity ^= 1;
2098 if (my_stringend == 0)
2099 my_stringend = c;
2100 /* If we have two kinds of string delimiters.
2101 There's no way to grok this scanning backwards. */
2102 else if (my_stringend != c)
2103 string_lossage = 1;
2106 /* Record comment-starters according to that
2107 quote-parity to the comment-end. */
2108 if (code == Scomment)
2110 comstart_parity = parity;
2111 comstart_pos = from;
2114 /* If we find another earlier comment-ender,
2115 any comment-starts earlier than that don't count
2116 (because they go with the earlier comment-ender). */
2117 if (code == Sendcomment
2118 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
2119 break;
2121 /* Assume a defun-start point is outside of strings. */
2122 if (code == Sopen
2123 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
2124 break;
2127 if (comstart_pos == 0)
2128 from = comment_end;
2129 /* If the earliest comment starter
2130 is followed by uniform paired string quotes or none,
2131 we know it can't be inside a string
2132 since if it were then the comment ender would be inside one.
2133 So it does start a comment. Skip back to it. */
2134 else if (comstart_parity == 0 && !string_lossage)
2135 from = comstart_pos;
2136 else
2138 /* We had two kinds of string delimiters mixed up
2139 together. Decode this going forwards.
2140 Scan fwd from the previous comment ender
2141 to the one in question; this records where we
2142 last passed a comment starter. */
2143 struct lisp_parse_state state;
2144 scan_sexps_forward (&state, find_defun_start (comment_end),
2145 comment_end - 1, -10000, 0, Qnil, 0);
2146 if (state.incomment)
2147 from = state.comstr_start;
2148 else
2149 /* We can't grok this as a comment; scan it normally. */
2150 from = comment_end;
2153 #endif /* 0 */
2154 break;
2156 case Scomment_fence:
2157 case Sstring_fence:
2158 while (1)
2160 DEC_POS (from);
2161 if (from == stop) goto lose;
2162 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2163 if (!char_quoted (from)
2164 && SYNTAX (FETCH_CHAR (from)) == code)
2165 break;
2167 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2168 break;
2170 case Sstring:
2171 stringterm = FETCH_CHAR (from);
2172 while (1)
2174 if (from == stop) goto lose;
2175 temp_pos = from;
2176 DEC_POS (temp_pos);
2177 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
2178 if (!char_quoted (temp_pos)
2179 && stringterm == FETCH_CHAR (temp_pos))
2180 break;
2181 from = temp_pos;
2183 DEC_POS (from);
2184 if (!depth && sexpflag) goto done2;
2185 break;
2189 /* Reached start of buffer. Error if within object, return nil if between */
2190 if (depth) goto lose;
2192 immediate_quit = 0;
2193 return Qnil;
2195 done2:
2196 count++;
2200 immediate_quit = 0;
2201 XSETFASTINT (val, from);
2202 return val;
2204 lose:
2205 Fsignal (Qscan_error,
2206 Fcons (build_string ("Unbalanced parentheses"),
2207 Fcons (make_number (last_good),
2208 Fcons (make_number (from), Qnil))));
2210 /* NOTREACHED */
2213 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2214 "Scan from character number FROM by COUNT lists.\n\
2215 Returns the character number of the position thus found.\n\
2217 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2218 only places where the depth in parentheses becomes zero\n\
2219 are candidates for stopping; COUNT such places are counted.\n\
2220 Thus, a positive value for DEPTH means go out levels.\n\
2222 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2224 If the beginning or end of (the accessible part of) the buffer is reached\n\
2225 and the depth is wrong, an error is signaled.\n\
2226 If the depth is right but the count is not used up, nil is returned.")
2227 (from, count, depth)
2228 Lisp_Object from, count, depth;
2230 CHECK_NUMBER (from, 0);
2231 CHECK_NUMBER (count, 1);
2232 CHECK_NUMBER (depth, 2);
2234 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2237 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2238 "Scan from character number FROM by COUNT balanced expressions.\n\
2239 If COUNT is negative, scan backwards.\n\
2240 Returns the character number of the position thus found.\n\
2242 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2244 If the beginning or end of (the accessible part of) the buffer is reached\n\
2245 in the middle of a parenthetical grouping, an error is signaled.\n\
2246 If the beginning or end is reached between groupings\n\
2247 but before count is used up, nil is returned.")
2248 (from, count)
2249 Lisp_Object from, count;
2251 CHECK_NUMBER (from, 0);
2252 CHECK_NUMBER (count, 1);
2254 return scan_lists (XINT (from), XINT (count), 0, 1);
2257 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2258 0, 0, 0,
2259 "Move point backward over any number of chars with prefix syntax.\n\
2260 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2263 int beg = BEGV;
2264 int pos = PT;
2265 int c;
2266 int temp_pos = pos;
2268 if (pos > beg)
2270 SETUP_SYNTAX_TABLE (pos, -1);
2272 DEC_POS (temp_pos);
2274 while (pos > beg && !char_quoted (temp_pos)
2275 /* Previous statement updates syntax table. */
2276 && ((c = FETCH_CHAR (temp_pos), SYNTAX (c) == Squote)
2277 || SYNTAX_PREFIX (c)))
2279 pos = temp_pos;
2280 DEC_POS (temp_pos);
2283 SET_PT (pos);
2285 return Qnil;
2288 /* Parse forward from FROM to END,
2289 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2290 and return a description of the state of the parse at END.
2291 If STOPBEFORE is nonzero, stop at the start of an atom.
2292 If COMMENTSTOP is nonzero, stop at the start of a comment. */
2294 static void
2295 scan_sexps_forward (stateptr, from, end, targetdepth,
2296 stopbefore, oldstate, commentstop)
2297 struct lisp_parse_state *stateptr;
2298 register int from;
2299 int end, targetdepth, stopbefore;
2300 Lisp_Object oldstate;
2301 int commentstop;
2303 struct lisp_parse_state state;
2305 register enum syntaxcode code;
2306 struct level { int last, prev; };
2307 struct level levelstart[100];
2308 register struct level *curlevel = levelstart;
2309 struct level *endlevel = levelstart + 100;
2310 int prev;
2311 register int depth; /* Paren depth of current scanning location.
2312 level - levelstart equals this except
2313 when the depth becomes negative. */
2314 int mindepth; /* Lowest DEPTH value seen. */
2315 int start_quoted = 0; /* Nonzero means starting after a char quote */
2316 Lisp_Object tem;
2317 int prev_from; /* Keep one character before FROM. */
2318 int boundary_stop = commentstop == -1;
2319 int nofence;
2321 prev_from = from;
2322 DEC_POS (prev_from);
2324 /* Use this macro instead of `from++'. */
2325 #define INC_FROM do { prev_from = from; INC_POS (from); } while (0)
2327 immediate_quit = 1;
2328 QUIT;
2330 SETUP_SYNTAX_TABLE (from, 1);
2332 if (NILP (oldstate))
2334 depth = 0;
2335 state.instring = -1;
2336 state.incomment = 0;
2337 state.comstyle = 0; /* comment style a by default. */
2338 state.comstr_start = -1; /* no comment/string seen. */
2340 else
2342 tem = Fcar (oldstate);
2343 if (!NILP (tem))
2344 depth = XINT (tem);
2345 else
2346 depth = 0;
2348 oldstate = Fcdr (oldstate);
2349 oldstate = Fcdr (oldstate);
2350 oldstate = Fcdr (oldstate);
2351 tem = Fcar (oldstate);
2352 /* Check whether we are inside string_fence-style string: */
2353 state.instring = ( !NILP (tem)
2354 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2355 : -1);
2357 oldstate = Fcdr (oldstate);
2358 tem = Fcar (oldstate);
2359 state.incomment = !NILP (tem);
2361 oldstate = Fcdr (oldstate);
2362 tem = Fcar (oldstate);
2363 start_quoted = !NILP (tem);
2365 /* if the eight element of the list is nil, we are in comment
2366 style a. If it is non-nil, we are in comment style b */
2367 oldstate = Fcdr (oldstate);
2368 oldstate = Fcdr (oldstate);
2369 tem = Fcar (oldstate);
2370 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2371 ? ST_COMMENT_STYLE : 1 );
2373 oldstate = Fcdr (oldstate);
2374 tem = Fcar (oldstate);
2375 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2377 state.quoted = 0;
2378 mindepth = depth;
2380 curlevel->prev = -1;
2381 curlevel->last = -1;
2383 /* Enter the loop at a place appropriate for initial state. */
2385 if (state.incomment) goto startincomment;
2386 if (state.instring >= 0)
2388 nofence = state.instring != ST_STRING_STYLE;
2389 if (start_quoted) goto startquotedinstring;
2390 goto startinstring;
2392 if (start_quoted) goto startquoted;
2394 while (from < end)
2396 UPDATE_SYNTAX_TABLE_FORWARD (from);
2397 code = SYNTAX (FETCH_CHAR (from));
2398 INC_FROM;
2399 if (code == Scomment)
2400 state.comstr_start = prev_from;
2402 else if (code == Scomment_fence
2403 || (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from))
2404 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))))
2406 /* Record the comment style we have entered so that only
2407 the comment-end sequence of the same style actually
2408 terminates the comment section. */
2409 state.comstyle = ( code == Scomment_fence
2410 ? ST_COMMENT_STYLE
2411 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2412 state.comstr_start = prev_from;
2413 if (code != Scomment_fence) INC_FROM;
2414 code = Scomment;
2417 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from)))
2418 continue;
2419 switch (SWITCH_ENUM_CAST (code))
2421 case Sescape:
2422 case Scharquote:
2423 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2424 curlevel->last = prev_from;
2425 startquoted:
2426 if (from == end) goto endquoted;
2427 INC_FROM;
2428 goto symstarted;
2429 /* treat following character as a word constituent */
2430 case Sword:
2431 case Ssymbol:
2432 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2433 curlevel->last = prev_from;
2434 symstarted:
2435 while (from < end)
2437 UPDATE_SYNTAX_TABLE_FORWARD (from);
2438 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
2440 case Scharquote:
2441 case Sescape:
2442 INC_FROM;
2443 if (from == end) goto endquoted;
2444 break;
2445 case Sword:
2446 case Ssymbol:
2447 case Squote:
2448 break;
2449 default:
2450 goto symdone;
2452 INC_FROM;
2454 symdone:
2455 curlevel->prev = curlevel->last;
2456 break;
2458 startincomment:
2459 if (commentstop == 1)
2460 goto done;
2461 if (from != BEGV)
2463 /* Enter the loop in the middle so that we find
2464 a 2-char comment ender if we start in the middle of it. */
2465 prev = FETCH_CHAR (prev_from);
2466 goto startincomment_1;
2468 /* At beginning of buffer, enter the loop the ordinary way. */
2469 state.incomment = 1;
2470 goto commentloop;
2472 case Scomment:
2473 state.incomment = 1;
2474 if (commentstop || boundary_stop) goto done;
2475 commentloop:
2476 while (1)
2478 if (from == end) goto done;
2479 UPDATE_SYNTAX_TABLE_FORWARD (from);
2480 prev = FETCH_CHAR (from);
2481 if (SYNTAX (prev) == Sendcomment
2482 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2483 /* Only terminate the comment section if the endcomment
2484 of the same style as the start sequence has been
2485 encountered. */
2486 break;
2487 if (state.comstyle == ST_COMMENT_STYLE
2488 && SYNTAX (prev) == Scomment_fence)
2489 break;
2490 INC_FROM;
2491 startincomment_1:
2492 if (from < end && SYNTAX_COMEND_FIRST (prev)
2493 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
2494 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2495 /* Only terminate the comment section if the end-comment
2496 sequence of the same style as the start sequence has
2497 been encountered. */
2498 { break; }
2500 INC_FROM;
2501 state.incomment = 0;
2502 state.comstyle = 0; /* reset the comment style */
2503 if (boundary_stop) goto done;
2504 break;
2506 case Sopen:
2507 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2508 depth++;
2509 /* curlevel++->last ran into compiler bug on Apollo */
2510 curlevel->last = prev_from;
2511 if (++curlevel == endlevel)
2512 error ("Nesting too deep for parser");
2513 curlevel->prev = -1;
2514 curlevel->last = -1;
2515 if (targetdepth == depth) goto done;
2516 break;
2518 case Sclose:
2519 depth--;
2520 if (depth < mindepth)
2521 mindepth = depth;
2522 if (curlevel != levelstart)
2523 curlevel--;
2524 curlevel->prev = curlevel->last;
2525 if (targetdepth == depth) goto done;
2526 break;
2528 case Sstring:
2529 case Sstring_fence:
2530 state.comstr_start = from - 1;
2531 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2532 curlevel->last = prev_from;
2533 state.instring = (code == Sstring
2534 ? (FETCH_CHAR (prev_from))
2535 : ST_STRING_STYLE);
2536 if (boundary_stop) goto done;
2537 startinstring:
2539 nofence = state.instring != ST_STRING_STYLE;
2541 while (1)
2543 int c;
2545 if (from >= end) goto done;
2546 c = FETCH_CHAR (from);
2547 if (nofence && c == state.instring) break;
2548 UPDATE_SYNTAX_TABLE_FORWARD (from);
2549 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2551 case Sstring_fence:
2552 if (!nofence) goto string_end;
2553 break;
2554 case Scharquote:
2555 case Sescape:
2556 INC_FROM;
2557 startquotedinstring:
2558 if (from >= end) goto endquoted;
2560 INC_FROM;
2563 string_end:
2564 state.instring = -1;
2565 curlevel->prev = curlevel->last;
2566 INC_FROM;
2567 if (boundary_stop) goto done;
2568 break;
2570 case Smath:
2571 break;
2574 goto done;
2576 stop: /* Here if stopping before start of sexp. */
2577 from = prev_from; /* We have just fetched the char that starts it; */
2578 goto done; /* but return the position before it. */
2580 endquoted:
2581 state.quoted = 1;
2582 done:
2583 state.depth = depth;
2584 state.mindepth = mindepth;
2585 state.thislevelstart = curlevel->prev;
2586 state.prevlevelstart
2587 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2588 state.location = from;
2589 immediate_quit = 0;
2591 *stateptr = state;
2594 /* This comment supplies the doc string for parse-partial-sexp,
2595 for make-docfile to see. We cannot put this in the real DEFUN
2596 due to limits in the Unix cpp.
2598 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2599 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2600 Parsing stops at TO or when certain criteria are met;\n\
2601 point is set to where parsing stops.\n\
2602 If fifth arg STATE is omitted or nil,\n\
2603 parsing assumes that FROM is the beginning of a function.\n\
2604 Value is a list of nine elements describing final state of parsing:\n\
2605 0. depth in parens.\n\
2606 1. character address of start of innermost containing list; nil if none.\n\
2607 2. character address of start of last complete sexp terminated.\n\
2608 3. non-nil if inside a string.\n\
2609 (it is the character that will terminate the string,\n\
2610 or t if the string should be terminated by an explicit\n\
2611 `syntax-table' property.)\n\
2612 4. t if inside a comment.\n\
2613 5. t if following a quote character.\n\
2614 6. the minimum paren-depth encountered during this scan.\n\
2615 7. t if in a comment of style `b'; `syntax-table' if given by an explicit\n\
2616 `syntax-table' property.\n\
2617 8. character address of start of last comment or string; nil if none.\n\
2618 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2619 in parentheses becomes equal to TARGETDEPTH.\n\
2620 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2621 any character that starts a sexp.\n\
2622 Fifth arg STATE is an eight-list like what this function returns.\n\
2623 It is used to initialize the state of the parse. Its second and third
2624 elements are ignored.
2625 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If\n\
2626 it is `syntax-table', stop after the start of a comment or a string, or\n\
2627 after end of a comment or a string.")
2628 (from, to, targetdepth, stopbefore, state, commentstop)
2631 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2632 0 /* See immediately above */)
2633 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2634 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2636 struct lisp_parse_state state;
2637 int target;
2639 if (!NILP (targetdepth))
2641 CHECK_NUMBER (targetdepth, 3);
2642 target = XINT (targetdepth);
2644 else
2645 target = -100000; /* We won't reach this depth */
2647 validate_region (&from, &to);
2648 scan_sexps_forward (&state, XINT (from), XINT (to),
2649 target, !NILP (stopbefore), oldstate,
2650 (NILP (commentstop)
2651 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2653 SET_PT (state.location);
2655 return Fcons (make_number (state.depth),
2656 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2657 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2658 Fcons (state.instring >= 0
2659 ? (state.instring == ST_STRING_STYLE
2660 ? Qt : make_number (state.instring)) : Qnil,
2661 Fcons (state.incomment ? Qt : Qnil,
2662 Fcons (state.quoted ? Qt : Qnil,
2663 Fcons (make_number (state.mindepth),
2664 Fcons (state.comstyle
2665 ? (state.comstyle == ST_COMMENT_STYLE
2666 ? Qsyntax_table : Qt) : Qnil,
2667 Fcons (state.comstr_start != -1 ? make_number (state.comstr_start) : Qnil,
2668 Qnil)))))))));
2671 init_syntax_once ()
2673 register int i, c;
2674 Lisp_Object temp;
2676 /* This has to be done here, before we call Fmake_char_table. */
2677 Qsyntax_table = intern ("syntax-table");
2678 staticpro (&Qsyntax_table);
2680 /* Intern this now in case it isn't already done.
2681 Setting this variable twice is harmless.
2682 But don't staticpro it here--that is done in alloc.c. */
2683 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2685 /* Create objects which can be shared among syntax tables. */
2686 Vsyntax_code_object = Fmake_vector (13, Qnil);
2687 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2688 XVECTOR (Vsyntax_code_object)->contents[i]
2689 = Fcons (make_number (i), Qnil);
2691 /* Now we are ready to set up this property, so we can
2692 create syntax tables. */
2693 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2695 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2697 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2699 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2700 for (i = 'a'; i <= 'z'; i++)
2701 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2702 for (i = 'A'; i <= 'Z'; i++)
2703 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2704 for (i = '0'; i <= '9'; i++)
2705 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2707 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2708 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2710 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2711 Fcons (make_number (Sopen), make_number (')')));
2712 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2713 Fcons (make_number (Sclose), make_number ('(')));
2714 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2715 Fcons (make_number (Sopen), make_number (']')));
2716 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2717 Fcons (make_number (Sclose), make_number ('[')));
2718 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2719 Fcons (make_number (Sopen), make_number ('}')));
2720 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2721 Fcons (make_number (Sclose), make_number ('{')));
2722 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2723 Fcons (make_number ((int) Sstring), Qnil));
2724 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2725 Fcons (make_number ((int) Sescape), Qnil));
2727 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2728 for (i = 0; i < 10; i++)
2730 c = "_-+*/&|<>="[i];
2731 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2734 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2735 for (i = 0; i < 12; i++)
2737 c = ".,;:?!#@~^'`"[i];
2738 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2742 syms_of_syntax ()
2744 Qsyntax_table_p = intern ("syntax-table-p");
2745 staticpro (&Qsyntax_table_p);
2747 staticpro (&Vsyntax_code_object);
2749 Qscan_error = intern ("scan-error");
2750 staticpro (&Qscan_error);
2751 Fput (Qscan_error, Qerror_conditions,
2752 Fcons (Qerror, Qnil));
2753 Fput (Qscan_error, Qerror_message,
2754 build_string ("Scan error"));
2756 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2757 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2759 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2760 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2761 The value of this property should be either a syntax table, or a cons\n\
2762 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2763 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2764 relevant only for open/close type.");
2766 words_include_escapes = 0;
2767 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2768 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2770 defsubr (&Ssyntax_table_p);
2771 defsubr (&Ssyntax_table);
2772 defsubr (&Sstandard_syntax_table);
2773 defsubr (&Scopy_syntax_table);
2774 defsubr (&Sset_syntax_table);
2775 defsubr (&Schar_syntax);
2776 defsubr (&Smatching_paren);
2777 defsubr (&Smodify_syntax_entry);
2778 defsubr (&Sdescribe_syntax);
2780 defsubr (&Sforward_word);
2782 defsubr (&Sskip_chars_forward);
2783 defsubr (&Sskip_chars_backward);
2784 defsubr (&Sskip_syntax_forward);
2785 defsubr (&Sskip_syntax_backward);
2787 defsubr (&Sforward_comment);
2788 defsubr (&Sscan_lists);
2789 defsubr (&Sscan_sexps);
2790 defsubr (&Sbackward_prefix_chars);
2791 defsubr (&Sparse_partial_sexp);