new version
[emacs.git] / src / syntax.c
blob371ddedbd74b3b279f2ef8489df37ec7879f7999
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 1997 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
32 #include "syntax.h"
33 #include "intervals.h"
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
41 #include "category.h"
43 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
45 static void scan_sexps_forward ();
46 static int char_quoted ();
48 int words_include_escapes;
49 int parse_sexp_lookup_properties;
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp;
56 /* This is the internal form of the parse state used in parse-partial-sexp. */
58 struct lisp_parse_state
60 int depth; /* Depth at end of parsing. */
61 int instring; /* -1 if not within string, else desired terminator. */
62 int incomment; /* Nonzero if within a comment at end of parsing. */
63 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
64 int quoted; /* Nonzero if just after an escape char at end of parsing */
65 int thislevelstart; /* Char number of most recent start-of-expression at current level */
66 int prevlevelstart; /* Char number of start of containing expression */
67 int location; /* Char number at which parsing stopped. */
68 int mindepth; /* Minimum depth seen while scanning. */
69 int comstr_start; /* Position just after last comment/string starter. */
72 /* These variables are a cache for finding the start of a defun.
73 find_start_pos is the place for which the defun start was found.
74 find_start_value is the defun start position found for it.
75 find_start_buffer is the buffer it was found in.
76 find_start_begv is the BEGV value when it was found.
77 find_start_modiff is the value of MODIFF when it was found. */
79 static int find_start_pos;
80 static int find_start_value;
81 static struct buffer *find_start_buffer;
82 static int find_start_begv;
83 static int find_start_modiff;
86 struct gl_state_s gl_state; /* Global state of syntax parser. */
88 INTERVAL interval_of ();
89 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
90 to scan to property-change. */
92 /*
93 Update gl_state to an appropriate interval which contains POS. The
94 sign of COUNT give the relative position of POS wrt the previously
95 valid interval. If INIT, only [be]_property fields of gl_state are
96 valid at start, the rest is filled basing on OBJECT.
98 `gl_state.*_i' are the intervals, and pos is further in the search
99 direction than the intervals - or in an interval. We update the
100 current syntax-table basing on the property of this interval, and
101 update the interval to start further than POS - or be
102 NULL_INTERVAL. We also update lim_property to be the next value of
103 pos to call this subroutine again - or be before/after the
104 start/end of OBJECT. */
106 void
107 update_syntax_table (pos, count, init, object)
108 int pos, count, init;
109 Lisp_Object object;
111 Lisp_Object tmp_table;
112 int cnt = 0, doing_extra = 0, invalidate = 1;
113 INTERVAL i, oldi;
115 if (init)
117 gl_state.start = gl_state.b_property;
118 gl_state.stop = gl_state.e_property;
119 gl_state.forward_i = interval_of (pos, object);
120 i = gl_state.backward_i = gl_state.forward_i;
121 gl_state.left_ok = gl_state.right_ok = 1;
122 invalidate = 0;
123 if (NULL_INTERVAL_P (i))
124 return;
125 gl_state.b_property = i->position - 1;
126 gl_state.e_property = INTERVAL_LAST_POS (i);
127 goto update;
129 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
131 /* We are guarantied to be called with pos either in i, of further off. */
132 if (NULL_INTERVAL_P (i))
133 error ("Error in syntax_table logic for to-the-end intervals");
134 else if (pos < i->position) /* Move left. */
136 if (count > 0)
137 error ("Error in syntax_table logic for intervals <-.");
138 /* Update the interval. */
139 i = update_interval (i, pos);
140 if (oldi->position != INTERVAL_LAST_POS (i))
142 invalidate = 0;
143 gl_state.right_ok = 1; /* Invalidate the other end. */
144 gl_state.forward_i = i;
145 gl_state.e_property = INTERVAL_LAST_POS (i);
148 else if (pos >= INTERVAL_LAST_POS (i)) /* Move right. */
150 if (count < 0)
151 error ("Error in syntax_table logic for intervals ->.");
152 /* Update the interval. */
153 i = update_interval (i, pos);
154 if (i->position != INTERVAL_LAST_POS (oldi))
156 invalidate = 0;
157 gl_state.left_ok = 1; /* Invalidate the other end. */
158 gl_state.backward_i = i;
159 gl_state.b_property = i->position - 1;
162 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
164 /* We do not need to recalculate tmp_table. */
165 tmp_table = gl_state.old_prop;
168 update:
169 tmp_table = textget (i->plist, Qsyntax_table);
171 if (invalidate)
172 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
174 if (invalidate) /* Did not get to adjacent interval. */
175 { /* with the same table => */
176 /* invalidate the old range. */
177 if (count > 0)
179 gl_state.backward_i = i;
180 gl_state.left_ok = 1; /* Invalidate the other end. */
181 gl_state.b_property = i->position - 1;
183 else
185 gl_state.forward_i = i;
186 gl_state.right_ok = 1; /* Invalidate the other end. */
187 gl_state.e_property = INTERVAL_LAST_POS (i);
191 gl_state.current_syntax_table = tmp_table;
192 gl_state.old_prop = tmp_table;
193 if (EQ (Fsyntax_table_p (tmp_table), Qt))
195 gl_state.use_global = 0;
197 else if (CONSP (tmp_table))
199 gl_state.use_global = 1;
200 gl_state.global_code = tmp_table;
202 else
204 gl_state.use_global = 0;
205 gl_state.current_syntax_table = current_buffer->syntax_table;
208 while (!NULL_INTERVAL_P (i))
210 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
212 if (count > 0)
213 gl_state.right_ok = 0;
214 else
215 gl_state.left_ok = 0;
216 break;
218 else if (cnt == INTERVALS_AT_ONCE)
220 if (count > 0)
221 gl_state.right_ok = 1;
222 else
223 gl_state.left_ok = 1;
224 break;
226 cnt++;
227 i = count > 0 ? next_interval (i) : previous_interval (i);
229 if (NULL_INTERVAL_P (i))
230 { /* This property goes to the end. */
231 if (count > 0)
232 gl_state.e_property = gl_state.stop;
233 else
234 gl_state.b_property = gl_state.start;
236 else
238 if (count > 0)
240 gl_state.e_property = i->position;
241 gl_state.forward_i = i;
243 else
245 gl_state.b_property = i->position + LENGTH (i) - 1;
246 gl_state.backward_i = i;
251 /* Returns TRUE if char at POS is quoted.
252 Global syntax-table data should be set up already to be good at pos
253 or after. On return global syntax data is good for lookup at POS. */
255 static int
256 char_quoted (pos)
257 register int pos;
259 register enum syntaxcode code;
260 register int beg = BEGV;
261 register int quoted = 0;
262 int temp_pos = pos;
264 DEC_POS (temp_pos);
265 while (temp_pos >= beg
266 && ( UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos), 1)
267 && ((code = SYNTAX (FETCH_CHAR (temp_pos))) == Scharquote
268 || code == Sescape))
270 temp_pos--, quoted = !quoted;
272 UPDATE_SYNTAX_TABLE (pos);
273 return quoted;
276 /* Find a defun-start that is the last one before POS (or nearly the last).
277 We record what we find, so that another call in the same area
278 can return the same value right away.
280 There is no promise at which position the global syntax data is
281 valid on return from the subroutine, so the caller should explicitly
282 update the global data. */
284 static int
285 find_defun_start (pos)
286 int pos;
288 int tem;
289 int shortage;
291 /* Use previous finding, if it's valid and applies to this inquiry. */
292 if (current_buffer == find_start_buffer
293 /* Reuse the defun-start even if POS is a little farther on.
294 POS might be in the next defun, but that's ok.
295 Our value may not be the best possible, but will still be usable. */
296 && pos <= find_start_pos + 1000
297 && pos >= find_start_value
298 && BEGV == find_start_begv
299 && MODIFF == find_start_modiff)
300 return find_start_value;
302 /* Back up to start of line. */
303 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
305 /* We optimize syntax-table lookup for rare updates. Thus we accept
306 only those `^\s(' which are good in global _and_ text-property
307 syntax-tables. */
308 gl_state.current_syntax_table = current_buffer->syntax_table;
309 gl_state.use_global = 0;
310 while (tem > BEGV)
312 /* Open-paren at start of line means we found our defun-start. */
313 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
315 SETUP_SYNTAX_TABLE (tem + 1, -1); /* Try again... */
316 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
317 break;
318 /* Now fallback to the default value. */
319 gl_state.current_syntax_table = current_buffer->syntax_table;
320 gl_state.use_global = 0;
322 /* Move to beg of previous line. */
323 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
326 /* Record what we found, for the next try. */
327 find_start_value = tem;
328 find_start_buffer = current_buffer;
329 find_start_modiff = MODIFF;
330 find_start_begv = BEGV;
331 find_start_pos = pos;
333 return find_start_value;
336 /* Checks whether FROM is the end of comment. Does not try to
337 fallback more than to STOP.
338 Returns -1 if cannot find comment ending at from, otherwise start
339 of comment. Global syntax data remains valid for
340 backward search starting at the returned value (or at FROM, if
341 the search was not successful). */
343 static int
344 back_comment (from, stop, comstyle)
345 int from, stop, comstyle;
347 /* Look back, counting the parity of string-quotes,
348 and recording the comment-starters seen.
349 When we reach a safe place, assume that's not in a string;
350 then step the main scan to the earliest comment-starter seen
351 an even number of string quotes away from the safe place.
353 OFROM[I] is position of the earliest comment-starter seen
354 which is I+2X quotes from the comment-end.
355 PARITY is current parity of quotes from the comment end. */
356 int parity = 0;
357 int my_stringend = 0;
358 int string_lossage = 0;
359 int comment_end = from;
360 int comstart_pos = 0;
361 int comstart_parity = 0;
362 int scanstart = from - 1;
363 register enum syntaxcode code;
364 int c;
366 /* At beginning of range to scan, we're outside of strings;
367 that determines quote parity to the comment-end. */
368 while (from != stop)
370 /* Move back and examine a character. */
371 DEC_POS (from);
372 UPDATE_SYNTAX_TABLE_BACKWARD (from);
374 c = FETCH_CHAR (from);
375 code = SYNTAX (c);
377 /* If this char is the second of a 2-char comment sequence,
378 back up and give the pair the appropriate syntax. */
379 if (from > stop && SYNTAX_COMEND_SECOND (c)
380 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
382 code = Sendcomment;
383 DEC_POS (from);
384 /* This is apparently the best we can do: */
385 UPDATE_SYNTAX_TABLE_BACKWARD (from);
386 c = FETCH_CHAR (from);
389 /* If this char starts a 2-char comment start sequence,
390 treat it like a 1-char comment starter. */
391 if (from < scanstart && SYNTAX_COMSTART_SECOND (c)
392 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
393 && comstyle == SYNTAX_COMMENT_STYLE (c))
395 code = Scomment;
396 DEC_POS (from);
397 /* This is apparently the best we can do: */
398 UPDATE_SYNTAX_TABLE_BACKWARD (from);
399 c = FETCH_CHAR (from);
402 /* Ignore escaped characters. */
403 if (char_quoted (from))
404 continue;
406 /* Track parity of quotes. */
407 if (code == Sstring)
409 parity ^= 1;
410 if (my_stringend == 0)
411 my_stringend = c;
412 /* If we have two kinds of string delimiters.
413 There's no way to grok this scanning backwards. */
414 else if (my_stringend != c)
415 string_lossage = 1;
418 if (code == Sstring_fence || code == Scomment_fence)
420 parity ^= 1;
421 if (my_stringend == 0)
422 my_stringend =
423 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
424 /* If we have two kinds of string delimiters.
425 There's no way to grok this scanning backwards. */
426 else if (my_stringend != (code == Sstring_fence
427 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
428 string_lossage = 1;
431 /* Record comment-starters according to that
432 quote-parity to the comment-end. */
433 if (code == Scomment)
435 comstart_parity = parity;
436 comstart_pos = from;
439 /* If we find another earlier comment-ender,
440 any comment-starts earlier than that don't count
441 (because they go with the earlier comment-ender). */
442 if (code == Sendcomment
443 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
444 break;
446 /* Assume a defun-start point is outside of strings. */
447 if (code == Sopen
448 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
449 break;
452 if (comstart_pos == 0)
454 from = comment_end;
455 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
457 /* If the earliest comment starter
458 is followed by uniform paired string quotes or none,
459 we know it can't be inside a string
460 since if it were then the comment ender would be inside one.
461 So it does start a comment. Skip back to it. */
462 else if (comstart_parity == 0 && !string_lossage)
464 from = comstart_pos;
465 /* Globals are correct now. */
467 else
469 /* We had two kinds of string delimiters mixed up
470 together. Decode this going forwards.
471 Scan fwd from the previous comment ender
472 to the one in question; this records where we
473 last passed a comment starter. */
474 struct lisp_parse_state state;
475 scan_sexps_forward (&state, find_defun_start (comment_end),
476 comment_end - 1, -10000, 0, Qnil, 0);
477 if (state.incomment)
479 /* scan_sexps_forward changed the direction of search in
480 global variables, so we need to update it completely. */
482 from = state.comstr_start;
484 else
486 from = comment_end;
488 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
491 return from;
494 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
495 "Return t if OBJECT is a syntax table.\n\
496 Currently, any char-table counts as a syntax table.")
497 (object)
498 Lisp_Object object;
500 if (CHAR_TABLE_P (object)
501 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
502 return Qt;
503 return Qnil;
506 static void
507 check_syntax_table (obj)
508 Lisp_Object obj;
510 if (!(CHAR_TABLE_P (obj)
511 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
512 wrong_type_argument (Qsyntax_table_p, obj);
515 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
516 "Return the current syntax table.\n\
517 This is the one specified by the current buffer.")
520 return current_buffer->syntax_table;
523 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
524 Sstandard_syntax_table, 0, 0, 0,
525 "Return the standard syntax table.\n\
526 This is the one used for new buffers.")
529 return Vstandard_syntax_table;
532 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
533 "Construct a new syntax table and return it.\n\
534 It is a copy of the TABLE, which defaults to the standard syntax table.")
535 (table)
536 Lisp_Object table;
538 Lisp_Object copy;
540 if (!NILP (table))
541 check_syntax_table (table);
542 else
543 table = Vstandard_syntax_table;
545 copy = Fcopy_sequence (table);
547 /* Only the standard syntax table should have a default element.
548 Other syntax tables should inherit from parents instead. */
549 XCHAR_TABLE (copy)->defalt = Qnil;
551 /* Copied syntax tables should all have parents.
552 If we copied one with no parent, such as the standard syntax table,
553 use the standard syntax table as the copy's parent. */
554 if (NILP (XCHAR_TABLE (copy)->parent))
555 Fset_char_table_parent (copy, Vstandard_syntax_table);
556 return copy;
559 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
560 "Select a new syntax table for the current buffer.\n\
561 One argument, a syntax table.")
562 (table)
563 Lisp_Object table;
565 check_syntax_table (table);
566 current_buffer->syntax_table = table;
567 /* Indicate that this buffer now has a specified syntax table. */
568 current_buffer->local_var_flags
569 |= XFASTINT (buffer_local_flags.syntax_table);
570 return table;
573 /* Convert a letter which signifies a syntax code
574 into the code it signifies.
575 This is used by modify-syntax-entry, and other things. */
577 unsigned char syntax_spec_code[0400] =
578 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
579 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
580 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
581 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
582 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
583 (char) Smath, 0377, 0377, (char) Squote,
584 (char) Sopen, (char) Sclose, 0377, 0377,
585 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
586 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
587 0377, 0377, 0377, 0377,
588 (char) Scomment, 0377, (char) Sendcomment, 0377,
589 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
590 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
591 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
592 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
593 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
594 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
595 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
596 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
599 /* Indexed by syntax code, give the letter that describes it. */
601 char syntax_code_spec[16] =
603 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
604 '!', '|'
607 /* Indexed by syntax code, give the object (cons of syntax code and
608 nil) to be stored in syntax table. Since these objects can be
609 shared among syntax tables, we generate them in advance. By
610 sharing objects, the function `describe-syntax' can give a more
611 compact listing. */
612 static Lisp_Object Vsyntax_code_object;
615 /* Look up the value for CHARACTER in syntax table TABLE's parent
616 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
617 for CHARACTER. It's actually used only when not compiled with GCC. */
619 Lisp_Object
620 syntax_parent_lookup (table, character)
621 Lisp_Object table;
622 int character;
624 Lisp_Object value;
626 while (1)
628 table = XCHAR_TABLE (table)->parent;
629 if (NILP (table))
630 return Qnil;
632 value = XCHAR_TABLE (table)->contents[character];
633 if (!NILP (value))
634 return value;
638 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
639 "Return the syntax code of CHARACTER, described by a character.\n\
640 For example, if CHARACTER is a word constituent,\n\
641 the character `w' is returned.\n\
642 The characters that correspond to various syntax codes\n\
643 are listed in the documentation of `modify-syntax-entry'.")
644 (character)
645 Lisp_Object character;
647 int char_int;
648 gl_state.current_syntax_table = current_buffer->syntax_table;
650 gl_state.use_global = 0;
651 CHECK_NUMBER (character, 0);
652 char_int = XINT (character);
653 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
656 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
657 "Return the matching parenthesis of CHARACTER, or nil if none.")
658 (character)
659 Lisp_Object character;
661 int char_int, code;
662 gl_state.current_syntax_table = current_buffer->syntax_table;
663 gl_state.use_global = 0;
664 CHECK_NUMBER (character, 0);
665 char_int = XINT (character);
666 code = SYNTAX (char_int);
667 if (code == Sopen || code == Sclose)
668 return SYNTAX_MATCH (char_int);
669 return Qnil;
672 /* This comment supplies the doc string for modify-syntax-entry,
673 for make-docfile to see. We cannot put this in the real DEFUN
674 due to limits in the Unix cpp.
676 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
677 "Set syntax for character CHAR according to string S.\n\
678 The syntax is changed only for table TABLE, which defaults to\n\
679 the current buffer's syntax table.\n\
680 The first character of S should be one of the following:\n\
681 Space or - whitespace syntax. w word constituent.\n\
682 _ symbol constituent. . punctuation.\n\
683 ( open-parenthesis. ) close-parenthesis.\n\
684 \" string quote. \\ escape.\n\
685 $ paired delimiter. ' expression quote or prefix operator.\n\
686 < comment starter. > comment ender.\n\
687 / character-quote. @ inherit from `standard-syntax-table'.\n\
689 Only single-character comment start and end sequences are represented thus.\n\
690 Two-character sequences are represented as described below.\n\
691 The second character of S is the matching parenthesis,\n\
692 used only if the first character is `(' or `)'.\n\
693 Any additional characters are flags.\n\
694 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
695 1 means CHAR is the start of a two-char comment start sequence.\n\
696 2 means CHAR is the second character of such a sequence.\n\
697 3 means CHAR is the start of a two-char comment end sequence.\n\
698 4 means CHAR is the second character of such a sequence.\n\
700 There can be up to two orthogonal comment sequences. This is to support\n\
701 language modes such as C++. By default, all comment sequences are of style\n\
702 a, but you can set the comment sequence style to b (on the second character\n\
703 of a comment-start, or the first character of a comment-end sequence) using\n\
704 this flag:\n\
705 b means CHAR is part of comment sequence b.\n\
707 p means CHAR is a prefix character for `backward-prefix-chars';\n\
708 such characters are treated as whitespace when they occur\n\
709 between expressions.")
710 (char, s, table)
713 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
714 /* I really don't know why this is interactive
715 help-form should at least be made useful whilst reading the second arg
717 "cSet syntax for character: \nsSet syntax for %s to: ",
718 0 /* See immediately above */)
719 (c, newentry, syntax_table)
720 Lisp_Object c, newentry, syntax_table;
722 register unsigned char *p;
723 register enum syntaxcode code;
724 int val;
725 Lisp_Object match;
727 CHECK_NUMBER (c, 0);
728 CHECK_STRING (newentry, 1);
730 if (NILP (syntax_table))
731 syntax_table = current_buffer->syntax_table;
732 else
733 check_syntax_table (syntax_table);
735 p = XSTRING (newentry)->data;
736 code = (enum syntaxcode) syntax_spec_code[*p++];
737 if (((int) code & 0377) == 0377)
738 error ("invalid syntax description letter: %c", p[-1]);
740 if (code == Sinherit)
742 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
743 return Qnil;
746 if (*p)
748 int len;
749 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
750 len);
751 XSETINT (match, character);
752 if (XFASTINT (match) == ' ')
753 match = Qnil;
754 p += len;
756 else
757 match = Qnil;
759 val = (int) code;
760 while (*p)
761 switch (*p++)
763 case '1':
764 val |= 1 << 16;
765 break;
767 case '2':
768 val |= 1 << 17;
769 break;
771 case '3':
772 val |= 1 << 18;
773 break;
775 case '4':
776 val |= 1 << 19;
777 break;
779 case 'p':
780 val |= 1 << 20;
781 break;
783 case 'b':
784 val |= 1 << 21;
785 break;
788 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
789 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
790 else
791 /* Since we can't use a shared object, let's make a new one. */
792 newentry = Fcons (make_number (val), match);
794 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
796 return Qnil;
799 /* Dump syntax table to buffer in human-readable format */
801 static void
802 describe_syntax (value)
803 Lisp_Object value;
805 register enum syntaxcode code;
806 char desc, match, start1, start2, end1, end2, prefix, comstyle;
807 char str[2];
808 Lisp_Object first, match_lisp;
810 Findent_to (make_number (16), make_number (1));
812 if (NILP (value))
814 insert_string ("default\n");
815 return;
818 if (CHAR_TABLE_P (value))
820 insert_string ("deeper char-table ...\n");
821 return;
824 if (!CONSP (value))
826 insert_string ("invalid\n");
827 return;
830 first = XCONS (value)->car;
831 match_lisp = XCONS (value)->cdr;
833 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
835 insert_string ("invalid\n");
836 return;
839 code = (enum syntaxcode) (XINT (first) & 0377);
840 start1 = (XINT (first) >> 16) & 1;
841 start2 = (XINT (first) >> 17) & 1;
842 end1 = (XINT (first) >> 18) & 1;
843 end2 = (XINT (first) >> 19) & 1;
844 prefix = (XINT (first) >> 20) & 1;
845 comstyle = (XINT (first) >> 21) & 1;
847 if ((int) code < 0 || (int) code >= (int) Smax)
849 insert_string ("invalid");
850 return;
852 desc = syntax_code_spec[(int) code];
854 str[0] = desc, str[1] = 0;
855 insert (str, 1);
857 if (NILP (match_lisp))
858 insert (" ", 1);
859 else
860 insert_char (XINT (match_lisp));
862 if (start1)
863 insert ("1", 1);
864 if (start2)
865 insert ("2", 1);
867 if (end1)
868 insert ("3", 1);
869 if (end2)
870 insert ("4", 1);
872 if (prefix)
873 insert ("p", 1);
874 if (comstyle)
875 insert ("b", 1);
877 insert_string ("\twhich means: ");
879 switch (SWITCH_ENUM_CAST (code))
881 case Swhitespace:
882 insert_string ("whitespace"); break;
883 case Spunct:
884 insert_string ("punctuation"); break;
885 case Sword:
886 insert_string ("word"); break;
887 case Ssymbol:
888 insert_string ("symbol"); break;
889 case Sopen:
890 insert_string ("open"); break;
891 case Sclose:
892 insert_string ("close"); break;
893 case Squote:
894 insert_string ("quote"); break;
895 case Sstring:
896 insert_string ("string"); break;
897 case Smath:
898 insert_string ("math"); break;
899 case Sescape:
900 insert_string ("escape"); break;
901 case Scharquote:
902 insert_string ("charquote"); break;
903 case Scomment:
904 insert_string ("comment"); break;
905 case Sendcomment:
906 insert_string ("endcomment"); break;
907 default:
908 insert_string ("invalid");
909 return;
912 if (!NILP (match_lisp))
914 insert_string (", matches ");
915 insert_char (XINT (match_lisp));
918 if (start1)
919 insert_string (",\n\t is the first character of a comment-start sequence");
920 if (start2)
921 insert_string (",\n\t is the second character of a comment-start sequence");
923 if (end1)
924 insert_string (",\n\t is the first character of a comment-end sequence");
925 if (end2)
926 insert_string (",\n\t is the second character of a comment-end sequence");
927 if (comstyle)
928 insert_string (" (comment style b)");
930 if (prefix)
931 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
933 insert_string ("\n");
936 static Lisp_Object
937 describe_syntax_1 (vector)
938 Lisp_Object vector;
940 struct buffer *old = current_buffer;
941 set_buffer_internal (XBUFFER (Vstandard_output));
942 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
943 while (! NILP (XCHAR_TABLE (vector)->parent))
945 vector = XCHAR_TABLE (vector)->parent;
946 insert_string ("\nThe parent syntax table is:");
947 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
948 (int *) 0, 0);
951 call0 (intern ("help-mode"));
952 set_buffer_internal (old);
953 return Qnil;
956 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
957 "Describe the syntax specifications in the syntax table.\n\
958 The descriptions are inserted in a buffer, which is then displayed.")
961 internal_with_output_to_temp_buffer
962 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
964 return Qnil;
967 int parse_sexp_ignore_comments;
969 /* Return the position across COUNT words from FROM.
970 If that many words cannot be found before the end of the buffer, return 0.
971 COUNT negative means scan backward and stop at word beginning. */
973 scan_words (from, count)
974 register int from, count;
976 register int beg = BEGV;
977 register int end = ZV;
978 register enum syntaxcode code;
979 int ch0, ch1;
980 int temp_pos;
982 immediate_quit = 1;
983 QUIT;
985 SETUP_SYNTAX_TABLE (from, count);
987 while (count > 0)
989 while (1)
991 if (from == end)
993 immediate_quit = 0;
994 return 0;
996 UPDATE_SYNTAX_TABLE_FORWARD (from);
997 ch0 = FETCH_CHAR (from);
998 code = SYNTAX (ch0);
999 INC_POS (from);
1000 if (words_include_escapes
1001 && (code == Sescape || code == Scharquote))
1002 break;
1003 if (code == Sword)
1004 break;
1006 /* Now CH0 is a character which begins a word and FROM is the
1007 position of the next character. */
1008 while (1)
1010 if (from == end) break;
1011 UPDATE_SYNTAX_TABLE_FORWARD (from);
1012 ch1 = FETCH_CHAR (from);
1013 code = SYNTAX (ch1);
1014 if (!(words_include_escapes
1015 && (code == Sescape || code == Scharquote)))
1016 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1017 break;
1018 INC_POS (from);
1019 ch0 = ch1;
1021 count--;
1023 while (count < 0)
1025 while (1)
1027 if (from == beg)
1029 immediate_quit = 0;
1030 return 0;
1032 DEC_POS (from);
1033 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1034 ch1 = FETCH_CHAR (from);
1035 code = SYNTAX (ch1);
1036 if (words_include_escapes
1037 && (code == Sescape || code == Scharquote))
1038 break;
1039 if (code == Sword)
1040 break;
1042 /* Now CH1 is a character which ends a word and FROM is the
1043 position of it. */
1044 while (1)
1046 if (from == beg) break;
1047 temp_pos = from;
1048 DEC_POS (temp_pos);
1049 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1050 ch0 = FETCH_CHAR (temp_pos);
1051 code = SYNTAX (ch0);
1052 if (!(words_include_escapes
1053 && (code == Sescape || code == Scharquote)))
1054 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
1055 break;
1056 from = temp_pos;
1057 ch1 = ch0;
1059 count++;
1062 immediate_quit = 0;
1064 return from;
1067 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1068 "Move point forward ARG words (backward if ARG is negative).\n\
1069 Normally returns t.\n\
1070 If an edge of the buffer is reached, point is left there\n\
1071 and nil is returned.")
1072 (count)
1073 Lisp_Object count;
1075 int val;
1076 CHECK_NUMBER (count, 0);
1078 if (!(val = scan_words (PT, XINT (count))))
1080 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1081 return Qnil;
1083 SET_PT (val);
1084 return Qt;
1087 Lisp_Object skip_chars ();
1089 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1090 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1091 STRING is like the inside of a `[...]' in a regular expression\n\
1092 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1093 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1094 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1095 Returns the distance traveled, either zero or positive.")
1096 (string, lim)
1097 Lisp_Object string, lim;
1099 return skip_chars (1, 0, string, lim);
1102 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1103 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1104 See `skip-chars-forward' for details.\n\
1105 Returns the distance traveled, either zero or negative.")
1106 (string, lim)
1107 Lisp_Object string, lim;
1109 return skip_chars (0, 0, string, lim);
1112 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1113 "Move point forward across chars in specified syntax classes.\n\
1114 SYNTAX is a string of syntax code characters.\n\
1115 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1116 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1117 This function returns the distance traveled, either zero or positive.")
1118 (syntax, lim)
1119 Lisp_Object syntax, lim;
1121 return skip_chars (1, 1, syntax, lim);
1124 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1125 "Move point backward across chars in specified syntax classes.\n\
1126 SYNTAX is a string of syntax code characters.\n\
1127 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1128 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1129 This function returns the distance traveled, either zero or negative.")
1130 (syntax, lim)
1131 Lisp_Object syntax, lim;
1133 return skip_chars (0, 1, syntax, lim);
1136 Lisp_Object
1137 skip_chars (forwardp, syntaxp, string, lim)
1138 int forwardp, syntaxp;
1139 Lisp_Object string, lim;
1141 register unsigned char *p, *pend;
1142 register unsigned int c;
1143 register int ch;
1144 unsigned char fastmap[0400];
1145 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1146 of which codes don't fit in FASTMAP. In that case, we set the
1147 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1148 and set the actual ranges of characters in CHAR_RANGES. In the
1149 form "X-Y" of STRING, both X and Y must belong to the same
1150 character set because a range striding across character sets is
1151 meaningless. */
1152 int *char_ranges
1153 = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1154 int n_char_ranges = 0;
1155 int negate = 0;
1156 register int i;
1157 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1159 CHECK_STRING (string, 0);
1161 if (NILP (lim))
1162 XSETINT (lim, forwardp ? ZV : BEGV);
1163 else
1164 CHECK_NUMBER_COERCE_MARKER (lim, 1);
1166 /* In any case, don't allow scan outside bounds of buffer. */
1167 /* jla turned this off, for no known reason.
1168 bfox turned the ZV part on, and rms turned the
1169 BEGV part back on. */
1170 if (XINT (lim) > ZV)
1171 XSETFASTINT (lim, ZV);
1172 if (XINT (lim) < BEGV)
1173 XSETFASTINT (lim, BEGV);
1175 p = XSTRING (string)->data;
1176 pend = p + XSTRING (string)->size;
1177 bzero (fastmap, sizeof fastmap);
1179 if (p != pend && *p == '^')
1181 negate = 1; p++;
1184 /* Find the characters specified and set their elements of fastmap.
1185 If syntaxp, each character counts as itself.
1186 Otherwise, handle backslashes and ranges specially. */
1188 while (p != pend)
1190 c = *p;
1191 if (multibyte)
1193 ch = STRING_CHAR (p, pend - p);
1194 p += BYTES_BY_CHAR_HEAD (*p);
1196 else
1198 ch = c;
1199 p++;
1201 if (syntaxp)
1202 fastmap[syntax_spec_code[c]] = 1;
1203 else
1205 if (c == '\\')
1207 if (p == pend) break;
1208 c = *p++;
1210 if (p != pend && *p == '-')
1212 unsigned int ch2;
1214 p++;
1215 if (p == pend) break;
1216 if (SINGLE_BYTE_CHAR_P (ch))
1217 while (c <= *p)
1219 fastmap[c] = 1;
1220 c++;
1222 else
1224 fastmap[c] = 1; /* C is the base leading-code. */
1225 ch2 = STRING_CHAR (p, pend - p);
1226 if (ch <= ch2)
1227 char_ranges[n_char_ranges++] = ch,
1228 char_ranges[n_char_ranges++] = ch2;
1230 p += multibyte ? BYTES_BY_CHAR_HEAD (*p) : 1;
1232 else
1234 fastmap[c] = 1;
1235 if (!SINGLE_BYTE_CHAR_P (ch))
1237 char_ranges[n_char_ranges++] = ch;
1238 char_ranges[n_char_ranges++] = ch;
1244 /* If ^ was the first character, complement the fastmap. In
1245 addition, as all multibyte characters have possibility of
1246 matching, set all entries for base leading codes, which is
1247 harmless even if SYNTAXP is 1. */
1249 if (negate)
1250 for (i = 0; i < sizeof fastmap; i++)
1252 if (!multibyte || !BASE_LEADING_CODE_P (i))
1253 fastmap[i] ^= 1;
1254 else
1255 fastmap[i] = 1;
1259 int start_point = PT;
1260 int pos = PT;
1262 immediate_quit = 1;
1263 if (syntaxp)
1265 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1266 if (forwardp)
1268 if (multibyte)
1270 while (pos < XINT (lim)
1271 && fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1273 INC_POS (pos);
1274 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1277 else
1279 while (pos < XINT (lim)
1280 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1282 pos++;
1283 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1287 else
1289 if (multibyte)
1291 while (pos > XINT (lim))
1293 int savepos = pos;
1294 DEC_POS (pos);
1295 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1296 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1298 pos = savepos;
1299 break;
1303 else
1305 while (pos > XINT (lim))
1307 pos--;
1308 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1309 if (!fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1311 pos++;
1312 break;
1318 else
1320 if (forwardp)
1322 if (multibyte)
1323 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos))])
1325 if (!BASE_LEADING_CODE_P (c))
1326 pos++;
1327 else if (n_char_ranges)
1329 /* We much check CHAR_RANGES for a multibyte
1330 character. */
1331 ch = FETCH_MULTIBYTE_CHAR (pos);
1332 for (i = 0; i < n_char_ranges; i += 2)
1333 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1334 break;
1335 if (!(negate ^ (i < n_char_ranges)))
1336 break;
1338 INC_POS (pos);
1340 else
1342 if (!negate) break;
1343 INC_POS (pos);
1346 else
1347 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1348 pos++;
1350 else
1352 if (multibyte)
1353 while (pos > XINT (lim))
1355 int savepos = pos;
1356 DEC_POS (pos);
1357 if (fastmap[(c = FETCH_BYTE (pos))])
1359 if (!BASE_LEADING_CODE_P (c))
1361 else if (n_char_ranges)
1363 /* We much check CHAR_RANGES for a multibyte
1364 character. */
1365 ch = FETCH_MULTIBYTE_CHAR (pos);
1366 for (i = 0; i < n_char_ranges; i += 2)
1367 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1368 break;
1369 if (!(negate ^ (i < n_char_ranges)))
1371 pos = savepos;
1372 break;
1375 else
1376 if (!negate)
1378 pos = savepos;
1379 break;
1382 else
1384 pos = savepos;
1385 break;
1388 else
1389 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1390 pos--;
1394 if (multibyte
1395 /* INC_POS or DEC_POS might have moved POS over LIM. */
1396 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1397 pos = XINT (lim);
1399 SET_PT (pos);
1400 immediate_quit = 0;
1402 return make_number (PT - start_point);
1406 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1407 "Move forward across up to N comments. If N is negative, move backward.\n\
1408 Stop scanning if we find something other than a comment or whitespace.\n\
1409 Set point to where scanning stops.\n\
1410 If N comments are found as expected, with nothing except whitespace\n\
1411 between them, return t; otherwise return nil.")
1412 (count)
1413 Lisp_Object count;
1415 register int from;
1416 register int stop;
1417 register int c, c1;
1418 register enum syntaxcode code;
1419 int comstyle = 0; /* style of comment encountered */
1420 int found;
1421 int count1;
1422 int temp_pos;
1424 CHECK_NUMBER (count, 0);
1425 count1 = XINT (count);
1426 stop = count1 > 0 ? ZV : BEGV;
1428 immediate_quit = 1;
1429 QUIT;
1431 from = PT;
1433 SETUP_SYNTAX_TABLE (from, count1);
1434 while (count1 > 0)
1438 if (from == stop)
1440 SET_PT (from);
1441 immediate_quit = 0;
1442 return Qnil;
1444 UPDATE_SYNTAX_TABLE_FORWARD (from);
1445 c = FETCH_CHAR (from);
1446 code = SYNTAX (c);
1447 INC_POS (from);
1448 comstyle = 0;
1449 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1450 && (c1 = FETCH_CHAR (from),
1451 SYNTAX_COMSTART_SECOND (c1)))
1453 /* We have encountered a comment start sequence and we
1454 are ignoring all text inside comments. We must record
1455 the comment style this sequence begins so that later,
1456 only a comment end of the same style actually ends
1457 the comment section. */
1458 code = Scomment;
1459 comstyle = SYNTAX_COMMENT_STYLE (c1);
1460 INC_POS (from);
1463 while (code == Swhitespace || code == Sendcomment);
1464 if (code != Scomment && code != Scomment_fence)
1466 immediate_quit = 0;
1467 DEC_POS (from);
1468 SET_PT (from);
1469 return Qnil;
1471 /* We're at the start of a comment. */
1472 while (1)
1474 if (from == stop)
1476 immediate_quit = 0;
1477 SET_PT (from);
1478 return Qnil;
1480 UPDATE_SYNTAX_TABLE_FORWARD (from);
1481 c = FETCH_CHAR (from);
1482 INC_POS (from);
1483 if (SYNTAX (c) == Sendcomment
1484 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1485 /* we have encountered a comment end of the same style
1486 as the comment sequence which began this comment
1487 section */
1488 break;
1489 if (SYNTAX (c) == Scomment_fence
1490 && comstyle == ST_COMMENT_STYLE)
1491 /* we have encountered a comment end of the same style
1492 as the comment sequence which began this comment
1493 section. */
1494 break;
1495 if (from < stop && SYNTAX_COMEND_FIRST (c)
1496 && (c1 = FETCH_CHAR (from),
1497 SYNTAX_COMEND_SECOND (c1))
1498 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1499 /* we have encountered a comment end of the same style
1500 as the comment sequence which began this comment
1501 section */
1502 { INC_POS (from); break; }
1504 /* We have skipped one comment. */
1505 count1--;
1508 while (count1 < 0)
1510 while (1)
1512 int quoted;
1513 if (from <= stop)
1515 SET_PT (stop);
1516 immediate_quit = 0;
1517 return Qnil;
1520 DEC_POS (from);
1521 quoted = char_quoted (from);
1522 if (quoted)
1524 DEC_POS (from);
1525 goto leave; /* ????? XXXXX */
1527 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1528 c = FETCH_CHAR (from);
1529 code = SYNTAX (c);
1530 comstyle = 0;
1531 if (code == Sendcomment)
1532 comstyle = SYNTAX_COMMENT_STYLE (c);
1533 temp_pos = from;
1534 DEC_POS (temp_pos);
1535 if (from > stop && SYNTAX_COMEND_SECOND (c)
1536 && (c1 = FETCH_CHAR (temp_pos),
1537 SYNTAX_COMEND_FIRST (c1))
1538 && !char_quoted (temp_pos))
1540 /* We must record the comment style encountered so that
1541 later, we can match only the proper comment begin
1542 sequence of the same style. */
1543 code = Sendcomment;
1544 comstyle = SYNTAX_COMMENT_STYLE (c1);
1545 from = temp_pos;
1547 if (from > stop && SYNTAX_COMSTART_SECOND (c)
1548 && (c1 = FETCH_CHAR (temp_pos),
1549 SYNTAX_COMSTART_FIRST (c1))
1550 && !char_quoted (temp_pos))
1552 /* We must record the comment style encountered so that
1553 later, we can match only the proper comment begin
1554 sequence of the same style. */
1555 code = Scomment;
1556 from = temp_pos;
1559 if (code == Scomment_fence)
1561 /* Skip until first preceding unquoted comment_fence. */
1562 int found = 0, ini = from;
1564 while (--from != stop)
1566 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1567 c = FETCH_CHAR (from);
1568 if (SYNTAX (c) == Scomment_fence && !char_quoted (from))
1570 found = 1;
1571 break;
1574 if (found == 0)
1576 from = ini; /* Set point to ini + 1. */
1577 goto leave;
1580 else if (code == Sendcomment)
1582 #if 0
1583 if (code != SYNTAX (c))
1584 /* For a two-char comment ender, we can assume
1585 it does end a comment. So scan back in a simple way. */
1587 if (from != stop) DEC_POS (from);
1588 while (1)
1590 if ((c = FETCH_CHAR (from),
1591 SYNTAX (c) == Scomment)
1592 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1593 break;
1594 if (from == stop)
1596 immediate_quit = 0;
1597 SET_PT (from);
1598 return Qnil;
1600 DEC_POS (from);
1601 if (SYNTAX_COMSTART_SECOND (c)
1602 && (c1 = FETCH_CHAR (from),
1603 SYNTAX_COMSTART_FIRST (c1))
1604 && SYNTAX_COMMENT_STYLE (c) == comstyle
1605 && !char_quoted (from))
1606 break;
1608 break;
1610 #endif /* 0 */
1611 found = back_comment (from, stop, comstyle);
1612 if (found != -1) from = found;
1613 #if 0
1614 /* Look back, counting the parity of string-quotes,
1615 and recording the comment-starters seen.
1616 When we reach a safe place, assume that's not in a string;
1617 then step the main scan to the earliest comment-starter seen
1618 an even number of string quotes away from the safe place.
1620 OFROM[I] is position of the earliest comment-starter seen
1621 which is I+2X quotes from the comment-end.
1622 PARITY is current parity of quotes from the comment end. */
1624 int parity = 0;
1625 char my_stringend = 0;
1626 int string_lossage = 0;
1627 int comment_end = from;
1628 int comstart_pos = 0;
1629 int comstart_parity = 0;
1630 int scanstart = from;
1632 DEC_POS (scanstart);
1633 /* At beginning of range to scan, we're outside of strings;
1634 that determines quote parity to the comment-end. */
1635 while (from != stop)
1637 /* Move back and examine a character. */
1638 DEC_POS (from);
1640 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1641 c = FETCH_CHAR (from);
1642 code = SYNTAX (c);
1644 /* If this char is the second of a 2-char comment sequence,
1645 back up and give the pair the appropriate syntax. */
1646 temp_pos = from;
1647 DEC_POS (temp_pos);
1648 if (from > stop && SYNTAX_COMEND_SECOND (c)
1649 && (c1 = FETCH_CHAR (temp_pos),
1650 SYNTAX_COMEND_FIRST (c1)))
1652 code = Sendcomment;
1653 from = temp_pos;
1654 c = c1;
1657 temp_pos = from;
1658 INC_POS (temp_pos);
1659 /* If this char starts a 2-char comment start sequence,
1660 treat it like a 1-char comment starter. */
1661 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1662 && (c1 = FETCH_CHAR (temp_pos),
1663 SYNTAX_COMSTART_SECOND (c1))
1664 && comstyle == SYNTAX_COMMENT_STYLE (c1))
1665 code = Scomment;
1667 /* Ignore escaped characters. */
1668 if (char_quoted (from))
1669 continue;
1671 /* Track parity of quotes. */
1672 if (code == Sstring)
1674 parity ^= 1;
1675 if (my_stringend == 0)
1676 my_stringend = c;
1677 /* If we have two kinds of string delimiters.
1678 There's no way to grok this scanning backwards. */
1679 else if (my_stringend != c)
1680 string_lossage = 1;
1683 /* Record comment-starters according to that
1684 quote-parity to the comment-end. */
1685 if (code == Scomment)
1687 comstart_parity = parity;
1688 comstart_pos = from;
1691 /* If we find another earlier comment-ender,
1692 any comment-starts earlier than that don't count
1693 (because they go with the earlier comment-ender). */
1694 if (code == Sendcomment
1695 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1696 break;
1698 /* Assume a defun-start point is outside of strings. */
1699 if (code == Sopen
1700 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
1701 break;
1704 if (comstart_pos == 0)
1705 from = comment_end;
1706 /* If the earliest comment starter
1707 is followed by uniform paired string quotes or none,
1708 we know it can't be inside a string
1709 since if it were then the comment ender would be inside one.
1710 So it does start a comment. Skip back to it. */
1711 else if (comstart_parity == 0 && !string_lossage)
1712 from = comstart_pos;
1713 else
1715 /* We had two kinds of string delimiters mixed up
1716 together. Decode this going forwards.
1717 Scan fwd from the previous comment ender
1718 to the one in question; this records where we
1719 last passed a comment starter. */
1720 struct lisp_parse_state state;
1721 scan_sexps_forward (&state, find_defun_start (comment_end),
1722 comment_end - 1, -10000, 0, Qnil, 0);
1723 if (state.incomment)
1724 from = state.comstr_start;
1725 else
1726 /* We can't grok this as a comment; scan it normally. */
1727 from = comment_end;
1730 #endif /* 0 */
1731 /* We have skipped one comment. */
1732 break;
1734 else if (code != Swhitespace && code != Scomment)
1736 leave:
1737 immediate_quit = 0;
1738 INC_POS (from);
1739 SET_PT (from);
1740 return Qnil;
1744 count1++;
1747 SET_PT (from);
1748 immediate_quit = 0;
1749 return Qt;
1752 Lisp_Object
1753 scan_lists (from, count, depth, sexpflag)
1754 register int from;
1755 int count, depth, sexpflag;
1757 Lisp_Object val;
1758 register int stop = count > 0 ? ZV : BEGV;
1759 register int c, c1;
1760 int stringterm;
1761 int quoted;
1762 int mathexit = 0;
1763 register enum syntaxcode code, temp_code;
1764 int min_depth = depth; /* Err out if depth gets less than this. */
1765 int comstyle = 0; /* style of comment encountered */
1766 int temp_pos;
1767 int last_good = from;
1768 int found;
1770 if (depth > 0) min_depth = 0;
1772 immediate_quit = 1;
1773 QUIT;
1775 SETUP_SYNTAX_TABLE (from, count);
1776 while (count > 0)
1778 while (from < stop)
1780 UPDATE_SYNTAX_TABLE_FORWARD (from);
1781 c = FETCH_CHAR (from);
1782 code = SYNTAX (c);
1783 if (depth == min_depth)
1784 last_good = from;
1785 INC_POS (from);
1786 UPDATE_SYNTAX_TABLE_FORWARD (from);
1787 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1788 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
1789 && parse_sexp_ignore_comments)
1791 /* we have encountered a comment start sequence and we
1792 are ignoring all text inside comments. We must record
1793 the comment style this sequence begins so that later,
1794 only a comment end of the same style actually ends
1795 the comment section */
1796 code = Scomment;
1797 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1798 INC_POS (from);
1801 UPDATE_SYNTAX_TABLE_FORWARD (from);
1802 if (SYNTAX_PREFIX (c))
1803 continue;
1805 switch (SWITCH_ENUM_CAST (code))
1807 case Sescape:
1808 case Scharquote:
1809 if (from == stop) goto lose;
1810 INC_POS (from);
1811 /* treat following character as a word constituent */
1812 case Sword:
1813 case Ssymbol:
1814 if (depth || !sexpflag) break;
1815 /* This word counts as a sexp; return at end of it. */
1816 while (from < stop)
1818 UPDATE_SYNTAX_TABLE_FORWARD (from);
1819 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1821 case Scharquote:
1822 case Sescape:
1823 INC_POS (from);
1824 if (from == stop) goto lose;
1825 break;
1826 case Sword:
1827 case Ssymbol:
1828 case Squote:
1829 break;
1830 default:
1831 goto done;
1833 INC_POS (from);
1835 goto done;
1837 case Scomment:
1838 case Scomment_fence:
1839 if (!parse_sexp_ignore_comments) break;
1840 while (1)
1842 if (from == stop)
1844 if (depth == 0)
1845 goto done;
1846 goto lose;
1848 UPDATE_SYNTAX_TABLE_FORWARD (from);
1849 c = FETCH_CHAR (from);
1850 if (code == Scomment
1851 ? (SYNTAX (c) == Sendcomment
1852 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1853 : (SYNTAX (c) == Scomment_fence))
1854 /* we have encountered a comment end of the same style
1855 as the comment sequence which began this comment
1856 section */
1857 break;
1858 INC_POS (from);
1859 if (from < stop && SYNTAX_COMEND_FIRST (c)
1860 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1861 && SYNTAX_COMMENT_STYLE (c) == comstyle
1862 && code == Scomment)
1863 /* we have encountered a comment end of the same style
1864 as the comment sequence which began this comment
1865 section */
1866 { INC_POS (from); break; }
1868 break;
1870 case Smath:
1871 if (!sexpflag)
1872 break;
1873 if (from != stop && c == FETCH_CHAR (from))
1874 INC_POS (from);
1875 if (mathexit)
1877 mathexit = 0;
1878 goto close1;
1880 mathexit = 1;
1882 case Sopen:
1883 if (!++depth) goto done;
1884 break;
1886 case Sclose:
1887 close1:
1888 if (!--depth) goto done;
1889 if (depth < min_depth)
1890 Fsignal (Qscan_error,
1891 Fcons (build_string ("Containing expression ends prematurely"),
1892 Fcons (make_number (last_good),
1893 Fcons (make_number (from), Qnil))));
1894 break;
1896 case Sstring:
1897 case Sstring_fence:
1898 temp_pos = from;
1899 DEC_POS (temp_pos);
1900 stringterm = FETCH_CHAR (temp_pos);
1901 while (1)
1903 if (from >= stop) goto lose;
1904 UPDATE_SYNTAX_TABLE_FORWARD (from);
1905 if (code == Sstring
1906 ? (FETCH_CHAR (from) == stringterm)
1907 : SYNTAX (FETCH_CHAR (from)) == Sstring_fence)
1908 break;
1909 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1911 case Scharquote:
1912 case Sescape:
1913 INC_POS (from);
1915 INC_POS (from);
1917 INC_POS (from);
1918 if (!depth && sexpflag) goto done;
1919 break;
1923 /* Reached end of buffer. Error if within object, return nil if between */
1924 if (depth) goto lose;
1926 immediate_quit = 0;
1927 return Qnil;
1929 /* End of object reached */
1930 done:
1931 count--;
1934 while (count < 0)
1936 while (from > stop)
1938 DEC_POS (from);
1939 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1940 if (quoted = char_quoted (from))
1942 DEC_POS (from);
1943 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1945 c = FETCH_CHAR (from);
1946 code = SYNTAX (c);
1947 if (depth == min_depth)
1948 last_good = from;
1949 comstyle = 0;
1950 if (code == Sendcomment)
1951 comstyle = SYNTAX_COMMENT_STYLE (c);
1952 temp_pos = from;
1953 DEC_POS (temp_pos);
1954 if (from > stop && SYNTAX_COMEND_SECOND (c)
1955 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1956 && !char_quoted (temp_pos)
1957 && parse_sexp_ignore_comments)
1959 /* we must record the comment style encountered so that
1960 later, we can match only the proper comment begin
1961 sequence of the same style */
1962 code = Sendcomment;
1963 comstyle = SYNTAX_COMMENT_STYLE (c1);
1964 from = temp_pos;
1967 if (SYNTAX_PREFIX (c))
1968 continue;
1970 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1972 case Sword:
1973 case Ssymbol:
1974 if (depth || !sexpflag) break;
1975 /* This word counts as a sexp; count object finished
1976 after passing it. */
1977 while (from > stop)
1979 temp_pos = from;
1980 DEC_POS (temp_pos);
1981 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1982 quoted = char_quoted (temp_pos);
1983 if (quoted)
1985 from = temp_pos;
1986 DEC_POS (temp_pos);
1987 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
1989 c1 = FETCH_CHAR (temp_pos);
1990 temp_code = SYNTAX (c1);
1991 if (! (quoted || temp_code == Sword
1992 || temp_code == Ssymbol
1993 || temp_code == Squote))
1994 goto done2;
1995 from = temp_pos;
1997 goto done2;
1999 case Smath:
2000 if (!sexpflag)
2001 break;
2002 temp_pos = from;
2003 DEC_POS (temp_pos);
2004 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
2005 if (from != stop && c == FETCH_CHAR (temp_pos))
2006 from = temp_pos;
2007 if (mathexit)
2009 mathexit = 0;
2010 goto open2;
2012 mathexit = 1;
2014 case Sclose:
2015 if (!++depth) goto done2;
2016 break;
2018 case Sopen:
2019 open2:
2020 if (!--depth) goto done2;
2021 if (depth < min_depth)
2022 Fsignal (Qscan_error,
2023 Fcons (build_string ("Containing expression ends prematurely"),
2024 Fcons (make_number (last_good),
2025 Fcons (make_number (from), Qnil))));
2026 break;
2028 case Sendcomment:
2029 if (!parse_sexp_ignore_comments)
2030 break;
2031 #if 0
2032 if (code != SYNTAX (c))
2033 /* For a two-char comment ender, we can assume
2034 it does end a comment. So scan back in a simple way. */
2036 if (from != stop) DEC_POS (from);
2037 while (1)
2039 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
2040 && SYNTAX_COMMENT_STYLE (c) == comstyle)
2041 break;
2042 if (from == stop)
2044 if (depth == 0)
2045 goto done2;
2046 goto lose;
2048 DEC_POS (from);
2049 if (SYNTAX_COMSTART_SECOND (c)
2050 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
2051 && SYNTAX_COMMENT_STYLE (c) == comstyle
2052 && !char_quoted (from))
2053 break;
2055 break;
2057 #endif /* 0 */
2058 found = back_comment (from, stop, comstyle);
2059 if (found != -1) from = found;
2060 #if 0
2061 /* Look back, counting the parity of string-quotes,
2062 and recording the comment-starters seen.
2063 When we reach a safe place, assume that's not in a string;
2064 then step the main scan to the earliest comment-starter seen
2065 an even number of string quotes away from the safe place.
2067 OFROM[I] is position of the earliest comment-starter seen
2068 which is I+2X quotes from the comment-end.
2069 PARITY is current parity of quotes from the comment end. */
2071 int parity = 0;
2072 char my_stringend = 0;
2073 int string_lossage = 0;
2074 int comment_end = from;
2075 int comstart_pos = 0;
2076 int comstart_parity = 0;
2077 int scanstart = from;
2079 DEC_POS (scanstart);
2081 /* At beginning of range to scan, we're outside of strings;
2082 that determines quote parity to the comment-end. */
2083 while (from != stop)
2085 /* Move back and examine a character. */
2086 DEC_POS (from);
2088 c = FETCH_CHAR (from);
2089 code = SYNTAX (c);
2091 /* If this char is the second of a 2-char comment sequence,
2092 back up and give the pair the appropriate syntax. */
2093 temp_pos = from;
2094 DEC_POS (temp_pos);
2095 if (from > stop && SYNTAX_COMEND_SECOND (c)
2096 && (c1 = FETCH_CHAR (temp_pos),
2097 SYNTAX_COMEND_FIRST (c1)))
2099 code = Sendcomment;
2100 from = temp_pos;
2101 c = c1;
2104 /* If this char starts a 2-char comment start sequence,
2105 treat it like a 1-char comment starter. */
2106 temp_pos = from;
2107 INC_POS (temp_pos);
2108 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
2109 && (c1 = FETCH_CHAR (temp_pos),
2110 SYNTAX_COMSTART_SECOND (c1))
2111 && comstyle == SYNTAX_COMMENT_STYLE (c1))
2112 code = Scomment;
2114 /* Ignore escaped characters. */
2115 if (char_quoted (from))
2116 continue;
2118 /* Track parity of quotes. */
2119 if (code == Sstring)
2121 parity ^= 1;
2122 if (my_stringend == 0)
2123 my_stringend = c;
2124 /* If we have two kinds of string delimiters.
2125 There's no way to grok this scanning backwards. */
2126 else if (my_stringend != c)
2127 string_lossage = 1;
2130 /* Record comment-starters according to that
2131 quote-parity to the comment-end. */
2132 if (code == Scomment)
2134 comstart_parity = parity;
2135 comstart_pos = from;
2138 /* If we find another earlier comment-ender,
2139 any comment-starts earlier than that don't count
2140 (because they go with the earlier comment-ender). */
2141 if (code == Sendcomment
2142 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
2143 break;
2145 /* Assume a defun-start point is outside of strings. */
2146 if (code == Sopen
2147 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
2148 break;
2151 if (comstart_pos == 0)
2152 from = comment_end;
2153 /* If the earliest comment starter
2154 is followed by uniform paired string quotes or none,
2155 we know it can't be inside a string
2156 since if it were then the comment ender would be inside one.
2157 So it does start a comment. Skip back to it. */
2158 else if (comstart_parity == 0 && !string_lossage)
2159 from = comstart_pos;
2160 else
2162 /* We had two kinds of string delimiters mixed up
2163 together. Decode this going forwards.
2164 Scan fwd from the previous comment ender
2165 to the one in question; this records where we
2166 last passed a comment starter. */
2167 struct lisp_parse_state state;
2168 scan_sexps_forward (&state, find_defun_start (comment_end),
2169 comment_end - 1, -10000, 0, Qnil, 0);
2170 if (state.incomment)
2171 from = state.comstr_start;
2172 else
2173 /* We can't grok this as a comment; scan it normally. */
2174 from = comment_end;
2177 #endif /* 0 */
2178 break;
2180 case Scomment_fence:
2181 case Sstring_fence:
2182 while (1)
2184 DEC_POS (from);
2185 if (from == stop) goto lose;
2186 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2187 if (!char_quoted (from)
2188 && SYNTAX (FETCH_CHAR (from)) == code)
2189 break;
2191 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2192 break;
2194 case Sstring:
2195 stringterm = FETCH_CHAR (from);
2196 while (1)
2198 if (from == stop) goto lose;
2199 temp_pos = from;
2200 DEC_POS (temp_pos);
2201 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
2202 if (!char_quoted (temp_pos)
2203 && stringterm == FETCH_CHAR (temp_pos))
2204 break;
2205 from = temp_pos;
2207 DEC_POS (from);
2208 if (!depth && sexpflag) goto done2;
2209 break;
2213 /* Reached start of buffer. Error if within object, return nil if between */
2214 if (depth) goto lose;
2216 immediate_quit = 0;
2217 return Qnil;
2219 done2:
2220 count++;
2224 immediate_quit = 0;
2225 XSETFASTINT (val, from);
2226 return val;
2228 lose:
2229 Fsignal (Qscan_error,
2230 Fcons (build_string ("Unbalanced parentheses"),
2231 Fcons (make_number (last_good),
2232 Fcons (make_number (from), Qnil))));
2234 /* NOTREACHED */
2237 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2238 "Scan from character number FROM by COUNT lists.\n\
2239 Returns the character number of the position thus found.\n\
2241 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2242 only places where the depth in parentheses becomes zero\n\
2243 are candidates for stopping; COUNT such places are counted.\n\
2244 Thus, a positive value for DEPTH means go out levels.\n\
2246 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2248 If the beginning or end of (the accessible part of) the buffer is reached\n\
2249 and the depth is wrong, an error is signaled.\n\
2250 If the depth is right but the count is not used up, nil is returned.")
2251 (from, count, depth)
2252 Lisp_Object from, count, depth;
2254 CHECK_NUMBER (from, 0);
2255 CHECK_NUMBER (count, 1);
2256 CHECK_NUMBER (depth, 2);
2258 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2261 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2262 "Scan from character number FROM by COUNT balanced expressions.\n\
2263 If COUNT is negative, scan backwards.\n\
2264 Returns the character number of the position thus found.\n\
2266 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2268 If the beginning or end of (the accessible part of) the buffer is reached\n\
2269 in the middle of a parenthetical grouping, an error is signaled.\n\
2270 If the beginning or end is reached between groupings\n\
2271 but before count is used up, nil is returned.")
2272 (from, count)
2273 Lisp_Object from, count;
2275 CHECK_NUMBER (from, 0);
2276 CHECK_NUMBER (count, 1);
2278 return scan_lists (XINT (from), XINT (count), 0, 1);
2281 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2282 0, 0, 0,
2283 "Move point backward over any number of chars with prefix syntax.\n\
2284 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2287 int beg = BEGV;
2288 int pos = PT;
2289 int c;
2290 int temp_pos = pos;
2292 if (pos > beg)
2294 SETUP_SYNTAX_TABLE (pos, -1);
2296 DEC_POS (temp_pos);
2298 while (pos > beg && !char_quoted (temp_pos)
2299 /* Previous statement updates syntax table. */
2300 && ((c = FETCH_CHAR (temp_pos), SYNTAX (c) == Squote)
2301 || SYNTAX_PREFIX (c)))
2303 pos = temp_pos;
2304 DEC_POS (temp_pos);
2307 SET_PT (pos);
2309 return Qnil;
2312 /* Parse forward from FROM to END,
2313 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2314 and return a description of the state of the parse at END.
2315 If STOPBEFORE is nonzero, stop at the start of an atom.
2316 If COMMENTSTOP is nonzero, stop at the start of a comment. */
2318 static void
2319 scan_sexps_forward (stateptr, from, end, targetdepth,
2320 stopbefore, oldstate, commentstop)
2321 struct lisp_parse_state *stateptr;
2322 register int from;
2323 int end, targetdepth, stopbefore;
2324 Lisp_Object oldstate;
2325 int commentstop;
2327 struct lisp_parse_state state;
2329 register enum syntaxcode code;
2330 struct level { int last, prev; };
2331 struct level levelstart[100];
2332 register struct level *curlevel = levelstart;
2333 struct level *endlevel = levelstart + 100;
2334 int prev;
2335 register int depth; /* Paren depth of current scanning location.
2336 level - levelstart equals this except
2337 when the depth becomes negative. */
2338 int mindepth; /* Lowest DEPTH value seen. */
2339 int start_quoted = 0; /* Nonzero means starting after a char quote */
2340 Lisp_Object tem;
2341 int prev_from; /* Keep one character before FROM. */
2342 int boundary_stop = commentstop == -1;
2343 int nofence;
2345 prev_from = from;
2346 DEC_POS (prev_from);
2348 /* Use this macro instead of `from++'. */
2349 #define INC_FROM do { prev_from = from; INC_POS (from); } while (0)
2351 immediate_quit = 1;
2352 QUIT;
2354 SETUP_SYNTAX_TABLE (from, 1);
2356 if (NILP (oldstate))
2358 depth = 0;
2359 state.instring = -1;
2360 state.incomment = 0;
2361 state.comstyle = 0; /* comment style a by default. */
2362 state.comstr_start = -1; /* no comment/string seen. */
2364 else
2366 tem = Fcar (oldstate);
2367 if (!NILP (tem))
2368 depth = XINT (tem);
2369 else
2370 depth = 0;
2372 oldstate = Fcdr (oldstate);
2373 oldstate = Fcdr (oldstate);
2374 oldstate = Fcdr (oldstate);
2375 tem = Fcar (oldstate);
2376 /* Check whether we are inside string_fence-style string: */
2377 state.instring = ( !NILP (tem)
2378 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2379 : -1);
2381 oldstate = Fcdr (oldstate);
2382 tem = Fcar (oldstate);
2383 state.incomment = !NILP (tem);
2385 oldstate = Fcdr (oldstate);
2386 tem = Fcar (oldstate);
2387 start_quoted = !NILP (tem);
2389 /* if the eight element of the list is nil, we are in comment
2390 style a. If it is non-nil, we are in comment style b */
2391 oldstate = Fcdr (oldstate);
2392 oldstate = Fcdr (oldstate);
2393 tem = Fcar (oldstate);
2394 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2395 ? ST_COMMENT_STYLE : 1 );
2397 oldstate = Fcdr (oldstate);
2398 tem = Fcar (oldstate);
2399 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
2401 state.quoted = 0;
2402 mindepth = depth;
2404 curlevel->prev = -1;
2405 curlevel->last = -1;
2407 /* Enter the loop at a place appropriate for initial state. */
2409 if (state.incomment) goto startincomment;
2410 if (state.instring >= 0)
2412 nofence = state.instring != ST_STRING_STYLE;
2413 if (start_quoted) goto startquotedinstring;
2414 goto startinstring;
2416 if (start_quoted) goto startquoted;
2418 while (from < end)
2420 UPDATE_SYNTAX_TABLE_FORWARD (from);
2421 code = SYNTAX (FETCH_CHAR (from));
2422 INC_FROM;
2424 if (code == Scomment)
2425 state.comstr_start = prev_from;
2426 else if (code == Scomment_fence)
2428 /* Record the comment style we have entered so that only
2429 the comment-end sequence of the same style actually
2430 terminates the comment section. */
2431 state.comstyle = ( code == Scomment_fence
2432 ? ST_COMMENT_STYLE
2433 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2434 state.comstr_start = prev_from;
2435 if (code != Scomment_fence) INC_FROM;
2436 code = Scomment;
2438 else if (from < end)
2439 if (SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from)))
2440 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
2441 /* Duplicate code to avoid a very complex if-expression
2442 which causes trouble for the SGI compiler. */
2444 /* Record the comment style we have entered so that only
2445 the comment-end sequence of the same style actually
2446 terminates the comment section. */
2447 state.comstyle = ( code == Scomment_fence
2448 ? ST_COMMENT_STYLE
2449 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2450 state.comstr_start = prev_from;
2451 if (code != Scomment_fence) INC_FROM;
2452 code = Scomment;
2455 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from)))
2456 continue;
2457 switch (SWITCH_ENUM_CAST (code))
2459 case Sescape:
2460 case Scharquote:
2461 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2462 curlevel->last = prev_from;
2463 startquoted:
2464 if (from == end) goto endquoted;
2465 INC_FROM;
2466 goto symstarted;
2467 /* treat following character as a word constituent */
2468 case Sword:
2469 case Ssymbol:
2470 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2471 curlevel->last = prev_from;
2472 symstarted:
2473 while (from < end)
2475 UPDATE_SYNTAX_TABLE_FORWARD (from);
2476 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
2478 case Scharquote:
2479 case Sescape:
2480 INC_FROM;
2481 if (from == end) goto endquoted;
2482 break;
2483 case Sword:
2484 case Ssymbol:
2485 case Squote:
2486 break;
2487 default:
2488 goto symdone;
2490 INC_FROM;
2492 symdone:
2493 curlevel->prev = curlevel->last;
2494 break;
2496 startincomment:
2497 if (commentstop == 1)
2498 goto done;
2499 if (from != BEGV)
2501 /* Enter the loop in the middle so that we find
2502 a 2-char comment ender if we start in the middle of it. */
2503 prev = FETCH_CHAR (prev_from);
2504 goto startincomment_1;
2506 /* At beginning of buffer, enter the loop the ordinary way. */
2507 state.incomment = 1;
2508 goto commentloop;
2510 case Scomment:
2511 state.incomment = 1;
2512 if (commentstop || boundary_stop) goto done;
2513 commentloop:
2514 while (1)
2516 if (from == end) goto done;
2517 UPDATE_SYNTAX_TABLE_FORWARD (from);
2518 prev = FETCH_CHAR (from);
2519 if (SYNTAX (prev) == Sendcomment
2520 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2521 /* Only terminate the comment section if the endcomment
2522 of the same style as the start sequence has been
2523 encountered. */
2524 break;
2525 if (state.comstyle == ST_COMMENT_STYLE
2526 && SYNTAX (prev) == Scomment_fence)
2527 break;
2528 INC_FROM;
2529 startincomment_1:
2530 if (from < end && SYNTAX_COMEND_FIRST (prev)
2531 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
2532 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2533 /* Only terminate the comment section if the end-comment
2534 sequence of the same style as the start sequence has
2535 been encountered. */
2536 { break; }
2538 INC_FROM;
2539 state.incomment = 0;
2540 state.comstyle = 0; /* reset the comment style */
2541 if (boundary_stop) goto done;
2542 break;
2544 case Sopen:
2545 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2546 depth++;
2547 /* curlevel++->last ran into compiler bug on Apollo */
2548 curlevel->last = prev_from;
2549 if (++curlevel == endlevel)
2550 error ("Nesting too deep for parser");
2551 curlevel->prev = -1;
2552 curlevel->last = -1;
2553 if (targetdepth == depth) goto done;
2554 break;
2556 case Sclose:
2557 depth--;
2558 if (depth < mindepth)
2559 mindepth = depth;
2560 if (curlevel != levelstart)
2561 curlevel--;
2562 curlevel->prev = curlevel->last;
2563 if (targetdepth == depth) goto done;
2564 break;
2566 case Sstring:
2567 case Sstring_fence:
2568 state.comstr_start = from - 1;
2569 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2570 curlevel->last = prev_from;
2571 state.instring = (code == Sstring
2572 ? (FETCH_CHAR (prev_from))
2573 : ST_STRING_STYLE);
2574 if (boundary_stop) goto done;
2575 startinstring:
2577 nofence = state.instring != ST_STRING_STYLE;
2579 while (1)
2581 int c;
2583 if (from >= end) goto done;
2584 c = FETCH_CHAR (from);
2585 if (nofence && c == state.instring) break;
2586 UPDATE_SYNTAX_TABLE_FORWARD (from);
2587 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2589 case Sstring_fence:
2590 if (!nofence) goto string_end;
2591 break;
2592 case Scharquote:
2593 case Sescape:
2594 INC_FROM;
2595 startquotedinstring:
2596 if (from >= end) goto endquoted;
2598 INC_FROM;
2601 string_end:
2602 state.instring = -1;
2603 curlevel->prev = curlevel->last;
2604 INC_FROM;
2605 if (boundary_stop) goto done;
2606 break;
2608 case Smath:
2609 break;
2612 goto done;
2614 stop: /* Here if stopping before start of sexp. */
2615 from = prev_from; /* We have just fetched the char that starts it; */
2616 goto done; /* but return the position before it. */
2618 endquoted:
2619 state.quoted = 1;
2620 done:
2621 state.depth = depth;
2622 state.mindepth = mindepth;
2623 state.thislevelstart = curlevel->prev;
2624 state.prevlevelstart
2625 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2626 state.location = from;
2627 immediate_quit = 0;
2629 *stateptr = state;
2632 /* This comment supplies the doc string for parse-partial-sexp,
2633 for make-docfile to see. We cannot put this in the real DEFUN
2634 due to limits in the Unix cpp.
2636 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2637 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2638 Parsing stops at TO or when certain criteria are met;\n\
2639 point is set to where parsing stops.\n\
2640 If fifth arg STATE is omitted or nil,\n\
2641 parsing assumes that FROM is the beginning of a function.\n\
2642 Value is a list of nine elements describing final state of parsing:\n\
2643 0. depth in parens.\n\
2644 1. character address of start of innermost containing list; nil if none.\n\
2645 2. character address of start of last complete sexp terminated.\n\
2646 3. non-nil if inside a string.\n\
2647 (it is the character that will terminate the string,\n\
2648 or t if the string should be terminated by an explicit\n\
2649 `syntax-table' property.)\n\
2650 4. t if inside a comment.\n\
2651 5. t if following a quote character.\n\
2652 6. the minimum paren-depth encountered during this scan.\n\
2653 7. t if in a comment of style `b'; `syntax-table' if given by an explicit\n\
2654 `syntax-table' property.\n\
2655 8. character address of start of last comment or string; nil if none.\n\
2656 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2657 in parentheses becomes equal to TARGETDEPTH.\n\
2658 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2659 any character that starts a sexp.\n\
2660 Fifth arg STATE is an eight-list like what this function returns.\n\
2661 It is used to initialize the state of the parse. Its second and third
2662 elements are ignored.
2663 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If\n\
2664 it is `syntax-table', stop after the start of a comment or a string, or\n\
2665 after end of a comment or a string.")
2666 (from, to, targetdepth, stopbefore, state, commentstop)
2669 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
2670 0 /* See immediately above */)
2671 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2672 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
2674 struct lisp_parse_state state;
2675 int target;
2677 if (!NILP (targetdepth))
2679 CHECK_NUMBER (targetdepth, 3);
2680 target = XINT (targetdepth);
2682 else
2683 target = -100000; /* We won't reach this depth */
2685 validate_region (&from, &to);
2686 scan_sexps_forward (&state, XINT (from), XINT (to),
2687 target, !NILP (stopbefore), oldstate,
2688 (NILP (commentstop)
2689 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
2691 SET_PT (state.location);
2693 return Fcons (make_number (state.depth),
2694 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2695 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2696 Fcons (state.instring >= 0
2697 ? (state.instring == ST_STRING_STYLE
2698 ? Qt : make_number (state.instring)) : Qnil,
2699 Fcons (state.incomment ? Qt : Qnil,
2700 Fcons (state.quoted ? Qt : Qnil,
2701 Fcons (make_number (state.mindepth),
2702 Fcons (state.comstyle
2703 ? (state.comstyle == ST_COMMENT_STYLE
2704 ? Qsyntax_table : Qt) : Qnil,
2705 Fcons (state.comstr_start != -1 ? make_number (state.comstr_start) : Qnil,
2706 Qnil)))))))));
2709 init_syntax_once ()
2711 register int i, c;
2712 Lisp_Object temp;
2714 /* This has to be done here, before we call Fmake_char_table. */
2715 Qsyntax_table = intern ("syntax-table");
2716 staticpro (&Qsyntax_table);
2718 /* Intern this now in case it isn't already done.
2719 Setting this variable twice is harmless.
2720 But don't staticpro it here--that is done in alloc.c. */
2721 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2723 /* Create objects which can be shared among syntax tables. */
2724 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
2725 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2726 XVECTOR (Vsyntax_code_object)->contents[i]
2727 = Fcons (make_number (i), Qnil);
2729 /* Now we are ready to set up this property, so we can
2730 create syntax tables. */
2731 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2733 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
2735 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
2737 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2738 for (i = 'a'; i <= 'z'; i++)
2739 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2740 for (i = 'A'; i <= 'Z'; i++)
2741 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2742 for (i = '0'; i <= '9'; i++)
2743 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2745 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2746 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2748 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2749 Fcons (make_number (Sopen), make_number (')')));
2750 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2751 Fcons (make_number (Sclose), make_number ('(')));
2752 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2753 Fcons (make_number (Sopen), make_number (']')));
2754 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2755 Fcons (make_number (Sclose), make_number ('[')));
2756 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2757 Fcons (make_number (Sopen), make_number ('}')));
2758 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2759 Fcons (make_number (Sclose), make_number ('{')));
2760 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2761 Fcons (make_number ((int) Sstring), Qnil));
2762 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2763 Fcons (make_number ((int) Sescape), Qnil));
2765 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
2766 for (i = 0; i < 10; i++)
2768 c = "_-+*/&|<>="[i];
2769 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2772 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
2773 for (i = 0; i < 12; i++)
2775 c = ".,;:?!#@~^'`"[i];
2776 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2780 syms_of_syntax ()
2782 Qsyntax_table_p = intern ("syntax-table-p");
2783 staticpro (&Qsyntax_table_p);
2785 staticpro (&Vsyntax_code_object);
2787 Qscan_error = intern ("scan-error");
2788 staticpro (&Qscan_error);
2789 Fput (Qscan_error, Qerror_conditions,
2790 Fcons (Qerror, Qnil));
2791 Fput (Qscan_error, Qerror_message,
2792 build_string ("Scan error"));
2794 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2795 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2797 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2798 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2799 The value of this property should be either a syntax table, or a cons\n\
2800 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2801 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2802 relevant only for open/close type.");
2804 words_include_escapes = 0;
2805 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2806 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2808 defsubr (&Ssyntax_table_p);
2809 defsubr (&Ssyntax_table);
2810 defsubr (&Sstandard_syntax_table);
2811 defsubr (&Scopy_syntax_table);
2812 defsubr (&Sset_syntax_table);
2813 defsubr (&Schar_syntax);
2814 defsubr (&Smatching_paren);
2815 defsubr (&Smodify_syntax_entry);
2816 defsubr (&Sdescribe_syntax);
2818 defsubr (&Sforward_word);
2820 defsubr (&Sskip_chars_forward);
2821 defsubr (&Sskip_chars_backward);
2822 defsubr (&Sskip_syntax_forward);
2823 defsubr (&Sskip_syntax_backward);
2825 defsubr (&Sforward_comment);
2826 defsubr (&Sscan_lists);
2827 defsubr (&Sscan_sexps);
2828 defsubr (&Sbackward_prefix_chars);
2829 defsubr (&Sparse_partial_sexp);