1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
28 Lisp_Object Qsyntax_table_p
;
30 static void scan_sexps_forward ();
31 static int char_quoted ();
33 int words_include_escapes
;
35 /* This is the internal form of the parse state used in parse-partial-sexp. */
37 struct lisp_parse_state
39 int depth
; /* Depth at end of parsing */
40 int instring
; /* -1 if not within string, else desired terminator. */
41 int incomment
; /* Nonzero if within a comment at end of parsing */
42 int comstyle
; /* comment style a=0, or b=1 */
43 int quoted
; /* Nonzero if just after an escape char at end of parsing */
44 int thislevelstart
; /* Char number of most recent start-of-expression at current level */
45 int prevlevelstart
; /* Char number of start of containing expression */
46 int location
; /* Char number at which parsing stopped. */
47 int mindepth
; /* Minimum depth seen while scanning. */
48 int comstart
; /* Position just after last comment starter. */
51 /* These variables are a cache for finding the start of a defun.
52 find_start_pos is the place for which the defun start was found.
53 find_start_value is the defun start position found for it.
54 find_start_buffer is the buffer it was found in.
55 find_start_begv is the BEGV value when it was found.
56 find_start_modiff is the value of MODIFF when it was found. */
58 static int find_start_pos
;
59 static int find_start_value
;
60 static struct buffer
*find_start_buffer
;
61 static int find_start_begv
;
62 static int find_start_modiff
;
64 /* Find a defun-start that is the last one before POS (or nearly the last).
65 We record what we find, so that another call in the same area
66 can return the same value right away. */
69 find_defun_start (pos
)
75 /* Use previous finding, if it's valid and applies to this inquiry. */
76 if (current_buffer
== find_start_buffer
77 /* Reuse the defun-start even if POS is a little farther on.
78 POS might be in the next defun, but that's ok.
79 Our value may not be the best possible, but will still be usable. */
80 && pos
<= find_start_pos
+ 1000
81 && pos
>= find_start_value
82 && BEGV
== find_start_begv
83 && MODIFF
== find_start_modiff
)
84 return find_start_value
;
86 /* Back up to start of line. */
87 tem
= scan_buffer ('\n', pos
, BEGV
, -1, &shortage
, 1);
91 /* Open-paren at start of line means we found our defun-start. */
92 if (SYNTAX (FETCH_CHAR (tem
)) == Sopen
)
94 /* Move to beg of previous line. */
95 tem
= scan_buffer ('\n', tem
, BEGV
, -2, &shortage
, 1);
98 /* Record what we found, for the next try. */
99 find_start_value
= tem
;
100 find_start_buffer
= current_buffer
;
101 find_start_modiff
= MODIFF
;
102 find_start_begv
= BEGV
;
103 find_start_pos
= pos
;
105 return find_start_value
;
108 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
109 "Return t if ARG is a syntax table.\n\
110 Any vector of 256 elements will do.")
114 if (VECTORP (obj
) && XVECTOR (obj
)->size
== 0400)
120 check_syntax_table (obj
)
123 register Lisp_Object tem
;
124 while (tem
= Fsyntax_table_p (obj
),
126 obj
= wrong_type_argument (Qsyntax_table_p
, obj
);
131 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
132 "Return the current syntax table.\n\
133 This is the one specified by the current buffer.")
136 return current_buffer
->syntax_table
;
139 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
140 Sstandard_syntax_table
, 0, 0, 0,
141 "Return the standard syntax table.\n\
142 This is the one used for new buffers.")
145 return Vstandard_syntax_table
;
148 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
149 "Construct a new syntax table and return it.\n\
150 It is a copy of the TABLE, which defaults to the standard syntax table.")
154 Lisp_Object size
, val
;
155 XSETFASTINT (size
, 0400);
156 XSETFASTINT (val
, 0);
157 val
= Fmake_vector (size
, val
);
159 table
= check_syntax_table (table
);
160 else if (NILP (Vstandard_syntax_table
))
161 /* Can only be null during initialization */
163 else table
= Vstandard_syntax_table
;
165 bcopy (XVECTOR (table
)->contents
,
166 XVECTOR (val
)->contents
, 0400 * sizeof (Lisp_Object
));
170 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
171 "Select a new syntax table for the current buffer.\n\
172 One argument, a syntax table.")
176 table
= check_syntax_table (table
);
177 current_buffer
->syntax_table
= table
;
178 /* Indicate that this buffer now has a specified syntax table. */
179 current_buffer
->local_var_flags
180 |= XFASTINT (buffer_local_flags
.syntax_table
);
184 /* Convert a letter which signifies a syntax code
185 into the code it signifies.
186 This is used by modify-syntax-entry, and other things. */
188 unsigned char syntax_spec_code
[0400] =
189 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
193 (char) Swhitespace
, 0377, (char) Sstring
, 0377,
194 (char) Smath
, 0377, 0377, (char) Squote
,
195 (char) Sopen
, (char) Sclose
, 0377, 0377,
196 0377, (char) Swhitespace
, (char) Spunct
, (char) Scharquote
,
197 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
198 0377, 0377, 0377, 0377,
199 (char) Scomment
, 0377, (char) Sendcomment
, 0377,
200 (char) Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
203 0377, 0377, 0377, 0377, (char) Sescape
, 0377, 0377, (char) Ssymbol
,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
207 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
210 /* Indexed by syntax code, give the letter that describes it. */
212 char syntax_code_spec
[14] =
214 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
217 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
218 "Return the syntax code of CHAR, described by a character.\n\
219 For example, if CHAR is a word constituent, the character `?w' is returned.\n\
220 The characters that correspond to various syntax codes\n\
221 are listed in the documentation of `modify-syntax-entry'.")
225 CHECK_NUMBER (ch
, 0);
226 return make_number (syntax_code_spec
[(int) SYNTAX (XINT (ch
))]);
229 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
230 "Return the matching parenthesis of CHAR, or nil if none.")
235 CHECK_NUMBER (ch
, 0);
236 code
= SYNTAX (XINT (ch
));
237 if (code
== Sopen
|| code
== Sclose
)
238 return make_number (SYNTAX_MATCH (XINT (ch
)));
242 /* This comment supplies the doc string for modify-syntax-entry,
243 for make-docfile to see. We cannot put this in the real DEFUN
244 due to limits in the Unix cpp.
246 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
247 "Set syntax for character CHAR according to string S.\n\
248 The syntax is changed only for table TABLE, which defaults to\n\
249 the current buffer's syntax table.\n\
250 The first character of S should be one of the following:\n\
251 Space or - whitespace syntax. w word constituent.\n\
252 _ symbol constituent. . punctuation.\n\
253 ( open-parenthesis. ) close-parenthesis.\n\
254 \" string quote. \\ escape.\n\
255 $ paired delimiter. ' expression quote or prefix operator.\n\
256 < comment starter. > comment ender.\n\
257 / character-quote. @ inherit from `standard-syntax-table'.\n\
259 Only single-character comment start and end sequences are represented thus.\n\
260 Two-character sequences are represented as described below.\n\
261 The second character of S is the matching parenthesis,\n\
262 used only if the first character is `(' or `)'.\n\
263 Any additional characters are flags.\n\
264 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
265 1 means C is the start of a two-char comment start sequence.\n\
266 2 means C is the second character of such a sequence.\n\
267 3 means C is the start of a two-char comment end sequence.\n\
268 4 means C is the second character of such a sequence.\n\
270 There can be up to two orthogonal comment sequences. This is to support\n\
271 language modes such as C++. By default, all comment sequences are of style\n\
272 a, but you can set the comment sequence style to b (on the second character\n\
273 of a comment-start, or the first character of a comment-end sequence) using\n\
275 b means C is part of comment sequence b.\n\
277 p means C is a prefix character for `backward-prefix-chars';\n\
278 such characters are treated as whitespace when they occur\n\
279 between expressions.")
283 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
284 /* I really don't know why this is interactive
285 help-form should at least be made useful whilst reading the second arg
287 "cSet syntax for character: \nsSet syntax for %s to: ",
288 0 /* See immediately above */)
289 (c
, newentry
, syntax_table
)
290 Lisp_Object c
, newentry
, syntax_table
;
292 register unsigned char *p
, match
;
293 register enum syntaxcode code
;
297 CHECK_STRING (newentry
, 1);
298 if (NILP (syntax_table
))
299 syntax_table
= current_buffer
->syntax_table
;
301 syntax_table
= check_syntax_table (syntax_table
);
303 p
= XSTRING (newentry
)->data
;
304 code
= (enum syntaxcode
) syntax_spec_code
[*p
++];
305 if (((int) code
& 0377) == 0377)
306 error ("invalid syntax description letter: %c", c
);
310 if (match
== ' ') match
= 0;
312 val
= (match
<< 8) + (int) code
;
341 XSETFASTINT (XVECTOR (syntax_table
)->contents
[0xFF & XINT (c
)], val
);
346 /* Dump syntax table to buffer in human-readable format */
349 describe_syntax (value
)
352 register enum syntaxcode code
;
353 char desc
, match
, start1
, start2
, end1
, end2
, prefix
, comstyle
;
356 Findent_to (make_number (16), make_number (1));
358 if (!INTEGERP (value
))
360 insert_string ("invalid");
364 code
= (enum syntaxcode
) (XINT (value
) & 0377);
365 match
= (XINT (value
) >> 8) & 0377;
366 start1
= (XINT (value
) >> 16) & 1;
367 start2
= (XINT (value
) >> 17) & 1;
368 end1
= (XINT (value
) >> 18) & 1;
369 end2
= (XINT (value
) >> 19) & 1;
370 prefix
= (XINT (value
) >> 20) & 1;
371 comstyle
= (XINT (value
) >> 21) & 1;
373 if ((int) code
< 0 || (int) code
>= (int) Smax
)
375 insert_string ("invalid");
378 desc
= syntax_code_spec
[(int) code
];
380 str
[0] = desc
, str
[1] = 0;
383 str
[0] = match
? match
: ' ';
402 insert_string ("\twhich means: ");
404 switch (SWITCH_ENUM_CAST (code
))
407 insert_string ("whitespace"); break;
409 insert_string ("punctuation"); break;
411 insert_string ("word"); break;
413 insert_string ("symbol"); break;
415 insert_string ("open"); break;
417 insert_string ("close"); break;
419 insert_string ("quote"); break;
421 insert_string ("string"); break;
423 insert_string ("math"); break;
425 insert_string ("escape"); break;
427 insert_string ("charquote"); break;
429 insert_string ("comment"); break;
431 insert_string ("endcomment"); break;
433 insert_string ("inherit"); break;
435 insert_string ("invalid");
441 insert_string (", matches ");
446 insert_string (",\n\t is the first character of a comment-start sequence");
448 insert_string (",\n\t is the second character of a comment-start sequence");
451 insert_string (",\n\t is the first character of a comment-end sequence");
453 insert_string (",\n\t is the second character of a comment-end sequence");
455 insert_string (" (comment style b)");
458 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
460 insert_string ("\n");
464 describe_syntax_1 (vector
)
467 struct buffer
*old
= current_buffer
;
468 set_buffer_internal (XBUFFER (Vstandard_output
));
469 describe_vector (vector
, Qnil
, describe_syntax
, 0, Qnil
);
470 call0 (intern ("help-mode"));
471 set_buffer_internal (old
);
475 DEFUN ("describe-syntax", Fdescribe_syntax
, Sdescribe_syntax
, 0, 0, "",
476 "Describe the syntax specifications in the syntax table.\n\
477 The descriptions are inserted in a buffer, which is then displayed.")
480 internal_with_output_to_temp_buffer
481 ("*Help*", describe_syntax_1
, current_buffer
->syntax_table
);
486 /* Return the position across COUNT words from FROM.
487 If that many words cannot be found before the end of the buffer, return 0.
488 COUNT negative means scan backward and stop at word beginning. */
490 scan_words (from
, count
)
491 register int from
, count
;
493 register int beg
= BEGV
;
494 register int end
= ZV
;
509 code
= SYNTAX (FETCH_CHAR (from
));
510 if (words_include_escapes
511 && (code
== Sescape
|| code
== Scharquote
))
519 if (from
== end
) break;
520 code
= SYNTAX (FETCH_CHAR (from
));
521 if (!(words_include_escapes
522 && (code
== Sescape
|| code
== Scharquote
)))
538 code
= SYNTAX (FETCH_CHAR (from
- 1));
539 if (words_include_escapes
540 && (code
== Sescape
|| code
== Scharquote
))
548 if (from
== beg
) break;
549 code
= SYNTAX (FETCH_CHAR (from
- 1));
550 if (!(words_include_escapes
551 && (code
== Sescape
|| code
== Scharquote
)))
564 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 1, 1, "p",
565 "Move point forward ARG words (backward if ARG is negative).\n\
566 Normally returns t.\n\
567 If an edge of the buffer is reached, point is left there\n\
568 and nil is returned.")
573 CHECK_NUMBER (count
, 0);
575 if (!(val
= scan_words (point
, XINT (count
))))
577 SET_PT (XINT (count
) > 0 ? ZV
: BEGV
);
584 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
585 "Move forward across up to N comments. If N is negative, move backward.\n\
586 Stop scanning if we find something other than a comment or whitespace.\n\
587 Set point to where scanning stops.\n\
588 If N comments are found as expected, with nothing except whitespace\n\
589 between them, return t; otherwise return nil.")
596 register enum syntaxcode code
;
597 int comstyle
= 0; /* style of comment encountered */
601 CHECK_NUMBER (count
, 0);
602 count1
= XINT (count
);
620 c
= FETCH_CHAR (from
);
624 if (from
< stop
&& SYNTAX_COMSTART_FIRST (c
)
625 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
)))
627 /* We have encountered a comment start sequence and we
628 are ignoring all text inside comments. We must record
629 the comment style this sequence begins so that later,
630 only a comment end of the same style actually ends
631 the comment section. */
633 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
));
637 while (code
== Swhitespace
|| code
== Sendcomment
);
638 if (code
!= Scomment
)
644 /* We're at the start of a comment. */
653 c
= FETCH_CHAR (from
);
655 if (SYNTAX (c
) == Sendcomment
656 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
657 /* we have encountered a comment end of the same style
658 as the comment sequence which began this comment
661 if (from
< stop
&& SYNTAX_COMEND_FIRST (c
)
662 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from
))
663 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
664 /* we have encountered a comment end of the same style
665 as the comment sequence which began this comment
669 /* We have skipped one comment. */
681 quoted
= char_quoted (from
);
684 c
= FETCH_CHAR (from
);
687 if (code
== Sendcomment
)
688 comstyle
= SYNTAX_COMMENT_STYLE (c
);
689 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
690 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1))
691 && !char_quoted (from
- 1))
693 /* We must record the comment style encountered so that
694 later, we can match only the proper comment begin
695 sequence of the same style. */
697 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
- 1));
701 if (code
== Sendcomment
&& !quoted
)
704 if (code
!= SYNTAX (c
))
705 /* For a two-char comment ender, we can assume
706 it does end a comment. So scan back in a simple way. */
708 if (from
!= stop
) from
--;
711 if (SYNTAX (c
= FETCH_CHAR (from
)) == Scomment
712 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
721 if (SYNTAX_COMSTART_SECOND (c
)
722 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from
))
723 && SYNTAX_COMMENT_STYLE (c
) == comstyle
724 && !char_quoted (from
))
731 /* Look back, counting the parity of string-quotes,
732 and recording the comment-starters seen.
733 When we reach a safe place, assume that's not in a string;
734 then step the main scan to the earliest comment-starter seen
735 an even number of string quotes away from the safe place.
737 OFROM[I] is position of the earliest comment-starter seen
738 which is I+2X quotes from the comment-end.
739 PARITY is current parity of quotes from the comment end. */
742 char my_stringend
= 0;
743 int string_lossage
= 0;
744 int comment_end
= from
;
745 int comstart_pos
= 0;
746 int comstart_parity
= 0;
747 int scanstart
= from
- 1;
749 /* At beginning of range to scan, we're outside of strings;
750 that determines quote parity to the comment-end. */
753 /* Move back and examine a character. */
756 c
= FETCH_CHAR (from
);
759 /* If this char is the second of a 2-char comment sequence,
760 back up and give the pair the appropriate syntax. */
761 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
762 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1)))
766 c
= FETCH_CHAR (from
);
769 /* If this char starts a 2-char comment start sequence,
770 treat it like a 1-char comment starter. */
771 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
)
772 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
+ 1))
773 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
+ 1)))
776 /* Ignore escaped characters. */
777 if (char_quoted (from
))
780 /* Track parity of quotes. */
784 if (my_stringend
== 0)
786 /* If we have two kinds of string delimiters.
787 There's no way to grok this scanning backwards. */
788 else if (my_stringend
!= c
)
792 /* Record comment-starters according to that
793 quote-parity to the comment-end. */
794 if (code
== Scomment
)
796 comstart_parity
= parity
;
800 /* If we find another earlier comment-ender,
801 any comment-starts earlier than that don't count
802 (because they go with the earlier comment-ender). */
803 if (code
== Sendcomment
804 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
)) == comstyle
)
807 /* Assume a defun-start point is outside of strings. */
809 && (from
== stop
|| FETCH_CHAR (from
- 1) == '\n'))
813 if (comstart_pos
== 0)
815 /* If the earliest comment starter
816 is followed by uniform paired string quotes or none,
817 we know it can't be inside a string
818 since if it were then the comment ender would be inside one.
819 So it does start a comment. Skip back to it. */
820 else if (comstart_parity
== 0 && !string_lossage
)
824 /* We had two kinds of string delimiters mixed up
825 together. Decode this going forwards.
826 Scan fwd from the previous comment ender
827 to the one in question; this records where we
828 last passed a comment starter. */
829 struct lisp_parse_state state
;
830 scan_sexps_forward (&state
, find_defun_start (comment_end
),
831 comment_end
- 1, -10000, 0, Qnil
, 0);
833 from
= state
.comstart
;
835 /* We can't grok this as a comment; scan it normally. */
839 /* We have skipped one comment. */
842 else if ((code
!= Swhitespace
&& code
!= Scomment
) || quoted
)
858 int parse_sexp_ignore_comments
;
861 scan_lists (from
, count
, depth
, sexpflag
)
863 int count
, depth
, sexpflag
;
871 register enum syntaxcode code
;
872 int min_depth
= depth
; /* Err out if depth gets less than this. */
873 int comstyle
= 0; /* style of comment encountered */
875 if (depth
> 0) min_depth
= 0;
885 c
= FETCH_CHAR (from
);
888 if (from
< stop
&& SYNTAX_COMSTART_FIRST (c
)
889 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
))
890 && parse_sexp_ignore_comments
)
892 /* we have encountered a comment start sequence and we
893 are ignoring all text inside comments. we must record
894 the comment style this sequence begins so that later,
895 only a comment end of the same style actually ends
896 the comment section */
898 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
));
902 if (SYNTAX_PREFIX (c
))
905 switch (SWITCH_ENUM_CAST (code
))
909 if (from
== stop
) goto lose
;
911 /* treat following character as a word constituent */
914 if (depth
|| !sexpflag
) break;
915 /* This word counts as a sexp; return at end of it. */
918 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
923 if (from
== stop
) goto lose
;
937 if (!parse_sexp_ignore_comments
) break;
946 c
= FETCH_CHAR (from
);
947 if (SYNTAX (c
) == Sendcomment
948 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
949 /* we have encountered a comment end of the same style
950 as the comment sequence which began this comment
954 if (from
< stop
&& SYNTAX_COMEND_FIRST (c
)
955 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from
))
956 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
957 /* we have encountered a comment end of the same style
958 as the comment sequence which began this comment
967 if (from
!= stop
&& c
== FETCH_CHAR (from
))
977 if (!++depth
) goto done
;
982 if (!--depth
) goto done
;
983 if (depth
< min_depth
)
984 error ("Containing expression ends prematurely");
988 stringterm
= FETCH_CHAR (from
- 1);
991 if (from
>= stop
) goto lose
;
992 if (FETCH_CHAR (from
) == stringterm
) break;
993 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1002 if (!depth
&& sexpflag
) goto done
;
1007 /* Reached end of buffer. Error if within object, return nil if between */
1008 if (depth
) goto lose
;
1013 /* End of object reached */
1024 if (quoted
= char_quoted (from
))
1026 c
= FETCH_CHAR (from
);
1029 if (code
== Sendcomment
)
1030 comstyle
= SYNTAX_COMMENT_STYLE (c
);
1031 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1032 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1))
1033 && !char_quoted (from
- 1)
1034 && parse_sexp_ignore_comments
)
1036 /* we must record the comment style encountered so that
1037 later, we can match only the proper comment begin
1038 sequence of the same style */
1040 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
- 1));
1044 if (SYNTAX_PREFIX (c
))
1047 switch (SWITCH_ENUM_CAST (quoted
? Sword
: code
))
1051 if (depth
|| !sexpflag
) break;
1052 /* This word counts as a sexp; count object finished after passing it. */
1055 quoted
= char_quoted (from
- 1);
1058 if (! (quoted
|| SYNTAX (FETCH_CHAR (from
- 1)) == Sword
1059 || SYNTAX (FETCH_CHAR (from
- 1)) == Ssymbol
1060 || SYNTAX (FETCH_CHAR (from
- 1)) == Squote
))
1069 if (from
!= stop
&& c
== FETCH_CHAR (from
- 1))
1079 if (!++depth
) goto done2
;
1084 if (!--depth
) goto done2
;
1085 if (depth
< min_depth
)
1086 error ("Containing expression ends prematurely");
1090 if (!parse_sexp_ignore_comments
)
1093 if (code
!= SYNTAX (c
))
1094 /* For a two-char comment ender, we can assume
1095 it does end a comment. So scan back in a simple way. */
1097 if (from
!= stop
) from
--;
1100 if (SYNTAX (c
= FETCH_CHAR (from
)) == Scomment
1101 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
1110 if (SYNTAX_COMSTART_SECOND (c
)
1111 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from
))
1112 && SYNTAX_COMMENT_STYLE (c
) == comstyle
1113 && !char_quoted (from
))
1120 /* Look back, counting the parity of string-quotes,
1121 and recording the comment-starters seen.
1122 When we reach a safe place, assume that's not in a string;
1123 then step the main scan to the earliest comment-starter seen
1124 an even number of string quotes away from the safe place.
1126 OFROM[I] is position of the earliest comment-starter seen
1127 which is I+2X quotes from the comment-end.
1128 PARITY is current parity of quotes from the comment end. */
1131 char my_stringend
= 0;
1132 int string_lossage
= 0;
1133 int comment_end
= from
;
1134 int comstart_pos
= 0;
1135 int comstart_parity
= 0;
1136 int scanstart
= from
- 1;
1138 /* At beginning of range to scan, we're outside of strings;
1139 that determines quote parity to the comment-end. */
1140 while (from
!= stop
)
1142 /* Move back and examine a character. */
1145 c
= FETCH_CHAR (from
);
1148 /* If this char is the second of a 2-char comment sequence,
1149 back up and give the pair the appropriate syntax. */
1150 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1151 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1)))
1155 c
= FETCH_CHAR (from
);
1158 /* If this char starts a 2-char comment start sequence,
1159 treat it like a 1-char comment starter. */
1160 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
)
1161 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
+ 1))
1162 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
+ 1)))
1165 /* Ignore escaped characters. */
1166 if (char_quoted (from
))
1169 /* Track parity of quotes. */
1170 if (code
== Sstring
)
1173 if (my_stringend
== 0)
1175 /* If we have two kinds of string delimiters.
1176 There's no way to grok this scanning backwards. */
1177 else if (my_stringend
!= c
)
1181 /* Record comment-starters according to that
1182 quote-parity to the comment-end. */
1183 if (code
== Scomment
)
1185 comstart_parity
= parity
;
1186 comstart_pos
= from
;
1189 /* If we find another earlier comment-ender,
1190 any comment-starts earlier than that don't count
1191 (because they go with the earlier comment-ender). */
1192 if (code
== Sendcomment
1193 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
)) == comstyle
)
1196 /* Assume a defun-start point is outside of strings. */
1198 && (from
== stop
|| FETCH_CHAR (from
- 1) == '\n'))
1202 if (comstart_pos
== 0)
1204 /* If the earliest comment starter
1205 is followed by uniform paired string quotes or none,
1206 we know it can't be inside a string
1207 since if it were then the comment ender would be inside one.
1208 So it does start a comment. Skip back to it. */
1209 else if (comstart_parity
== 0 && !string_lossage
)
1210 from
= comstart_pos
;
1213 /* We had two kinds of string delimiters mixed up
1214 together. Decode this going forwards.
1215 Scan fwd from the previous comment ender
1216 to the one in question; this records where we
1217 last passed a comment starter. */
1218 struct lisp_parse_state state
;
1219 scan_sexps_forward (&state
, find_defun_start (comment_end
),
1220 comment_end
- 1, -10000, 0, Qnil
, 0);
1221 if (state
.incomment
)
1222 from
= state
.comstart
;
1224 /* We can't grok this as a comment; scan it normally. */
1231 stringterm
= FETCH_CHAR (from
);
1234 if (from
== stop
) goto lose
;
1235 if (!char_quoted (from
- 1)
1236 && stringterm
== FETCH_CHAR (from
- 1))
1241 if (!depth
&& sexpflag
) goto done2
;
1246 /* Reached start of buffer. Error if within object, return nil if between */
1247 if (depth
) goto lose
;
1258 XSETFASTINT (val
, from
);
1262 error ("Unbalanced parentheses");
1270 register enum syntaxcode code
;
1271 register int beg
= BEGV
;
1272 register int quoted
= 0;
1275 && ((code
= SYNTAX (FETCH_CHAR (pos
- 1))) == Scharquote
1276 || code
== Sescape
))
1277 pos
--, quoted
= !quoted
;
1281 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
1282 "Scan from character number FROM by COUNT lists.\n\
1283 Returns the character number of the position thus found.\n\
1285 If DEPTH is nonzero, paren depth begins counting from that value,\n\
1286 only places where the depth in parentheses becomes zero\n\
1287 are candidates for stopping; COUNT such places are counted.\n\
1288 Thus, a positive value for DEPTH means go out levels.\n\
1290 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1292 If the beginning or end of (the accessible part of) the buffer is reached\n\
1293 and the depth is wrong, an error is signaled.\n\
1294 If the depth is right but the count is not used up, nil is returned.")
1295 (from
, count
, depth
)
1296 Lisp_Object from
, count
, depth
;
1298 CHECK_NUMBER (from
, 0);
1299 CHECK_NUMBER (count
, 1);
1300 CHECK_NUMBER (depth
, 2);
1302 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
1305 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
1306 "Scan from character number FROM by COUNT balanced expressions.\n\
1307 If COUNT is negative, scan backwards.\n\
1308 Returns the character number of the position thus found.\n\
1310 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1312 If the beginning or end of (the accessible part of) the buffer is reached\n\
1313 in the middle of a parenthetical grouping, an error is signaled.\n\
1314 If the beginning or end is reached between groupings\n\
1315 but before count is used up, nil is returned.")
1317 Lisp_Object from
, count
;
1319 CHECK_NUMBER (from
, 0);
1320 CHECK_NUMBER (count
, 1);
1322 return scan_lists (XINT (from
), XINT (count
), 0, 1);
1325 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
1327 "Move point backward over any number of chars with prefix syntax.\n\
1328 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1334 while (pos
> beg
&& !char_quoted (pos
- 1)
1335 && (SYNTAX (FETCH_CHAR (pos
- 1)) == Squote
1336 || SYNTAX_PREFIX (FETCH_CHAR (pos
- 1))))
1344 /* Parse forward from FROM to END,
1345 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1346 and return a description of the state of the parse at END.
1347 If STOPBEFORE is nonzero, stop at the start of an atom.
1348 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1351 scan_sexps_forward (stateptr
, from
, end
, targetdepth
,
1352 stopbefore
, oldstate
, commentstop
)
1353 struct lisp_parse_state
*stateptr
;
1355 int end
, targetdepth
, stopbefore
;
1356 Lisp_Object oldstate
;
1359 struct lisp_parse_state state
;
1361 register enum syntaxcode code
;
1362 struct level
{ int last
, prev
; };
1363 struct level levelstart
[100];
1364 register struct level
*curlevel
= levelstart
;
1365 struct level
*endlevel
= levelstart
+ 100;
1367 register int depth
; /* Paren depth of current scanning location.
1368 level - levelstart equals this except
1369 when the depth becomes negative. */
1370 int mindepth
; /* Lowest DEPTH value seen. */
1371 int start_quoted
= 0; /* Nonzero means starting after a char quote */
1377 if (NILP (oldstate
))
1380 state
.instring
= -1;
1381 state
.incomment
= 0;
1382 state
.comstyle
= 0; /* comment style a by default */
1386 tem
= Fcar (oldstate
);
1392 oldstate
= Fcdr (oldstate
);
1393 oldstate
= Fcdr (oldstate
);
1394 oldstate
= Fcdr (oldstate
);
1395 tem
= Fcar (oldstate
);
1396 state
.instring
= !NILP (tem
) ? XINT (tem
) : -1;
1398 oldstate
= Fcdr (oldstate
);
1399 tem
= Fcar (oldstate
);
1400 state
.incomment
= !NILP (tem
);
1402 oldstate
= Fcdr (oldstate
);
1403 tem
= Fcar (oldstate
);
1404 start_quoted
= !NILP (tem
);
1406 /* if the eight element of the list is nil, we are in comment
1407 style a. if it is non-nil, we are in comment style b */
1408 oldstate
= Fcdr (oldstate
);
1409 oldstate
= Fcdr (oldstate
);
1410 tem
= Fcar (oldstate
);
1411 state
.comstyle
= !NILP (tem
);
1416 curlevel
->prev
= -1;
1417 curlevel
->last
= -1;
1419 /* Enter the loop at a place appropriate for initial state. */
1421 if (state
.incomment
) goto startincomment
;
1422 if (state
.instring
>= 0)
1424 if (start_quoted
) goto startquotedinstring
;
1427 if (start_quoted
) goto startquoted
;
1431 code
= SYNTAX (FETCH_CHAR (from
));
1433 if (code
== Scomment
)
1434 state
.comstart
= from
-1;
1436 else if (from
< end
&& SYNTAX_COMSTART_FIRST (FETCH_CHAR (from
- 1))
1437 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
)))
1439 /* Record the comment style we have entered so that only
1440 the comment-end sequence of the same style actually
1441 terminates the comment section. */
1443 state
.comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
));
1444 state
.comstart
= from
-1;
1448 if (SYNTAX_PREFIX (FETCH_CHAR (from
- 1)))
1450 switch (SWITCH_ENUM_CAST (code
))
1454 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1455 curlevel
->last
= from
- 1;
1457 if (from
== end
) goto endquoted
;
1460 /* treat following character as a word constituent */
1463 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1464 curlevel
->last
= from
- 1;
1468 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1473 if (from
== end
) goto endquoted
;
1485 curlevel
->prev
= curlevel
->last
;
1493 /* Enter the loop in the middle so that we find
1494 a 2-char comment ender if we start in the middle of it. */
1495 prev
= FETCH_CHAR (from
- 1);
1496 goto startincomment_1
;
1498 /* At beginning of buffer, enter the loop the ordinary way. */
1501 state
.incomment
= 1;
1506 if (from
== end
) goto done
;
1507 prev
= FETCH_CHAR (from
);
1508 if (SYNTAX (prev
) == Sendcomment
1509 && SYNTAX_COMMENT_STYLE (prev
) == state
.comstyle
)
1510 /* Only terminate the comment section if the endcomment
1511 of the same style as the start sequence has been
1516 if (from
< end
&& SYNTAX_COMEND_FIRST (prev
)
1517 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from
))
1518 && SYNTAX_COMMENT_STYLE (prev
) == state
.comstyle
)
1519 /* Only terminate the comment section if the end-comment
1520 sequence of the same style as the start sequence has
1521 been encountered. */
1524 state
.incomment
= 0;
1525 state
.comstyle
= 0; /* reset the comment style */
1529 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1531 /* curlevel++->last ran into compiler bug on Apollo */
1532 curlevel
->last
= from
- 1;
1533 if (++curlevel
== endlevel
)
1534 error ("Nesting too deep for parser");
1535 curlevel
->prev
= -1;
1536 curlevel
->last
= -1;
1537 if (!--targetdepth
) goto done
;
1542 if (depth
< mindepth
)
1544 if (curlevel
!= levelstart
)
1546 curlevel
->prev
= curlevel
->last
;
1547 if (!++targetdepth
) goto done
;
1551 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1552 curlevel
->last
= from
- 1;
1553 state
.instring
= FETCH_CHAR (from
- 1);
1557 if (from
>= end
) goto done
;
1558 if (FETCH_CHAR (from
) == state
.instring
) break;
1559 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1564 startquotedinstring
:
1565 if (from
>= end
) goto endquoted
;
1569 state
.instring
= -1;
1570 curlevel
->prev
= curlevel
->last
;
1580 stop
: /* Here if stopping before start of sexp. */
1581 from
--; /* We have just fetched the char that starts it; */
1582 goto done
; /* but return the position before it. */
1587 state
.depth
= depth
;
1588 state
.mindepth
= mindepth
;
1589 state
.thislevelstart
= curlevel
->prev
;
1590 state
.prevlevelstart
1591 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
1592 state
.location
= from
;
1598 /* This comment supplies the doc string for parse-partial-sexp,
1599 for make-docfile to see. We cannot put this in the real DEFUN
1600 due to limits in the Unix cpp.
1602 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
1603 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1604 Parsing stops at TO or when certain criteria are met;\n\
1605 point is set to where parsing stops.\n\
1606 If fifth arg STATE is omitted or nil,\n\
1607 parsing assumes that FROM is the beginning of a function.\n\
1608 Value is a list of eight elements describing final state of parsing:\n\
1609 0. depth in parens.\n\
1610 1. character address of start of innermost containing list; nil if none.\n\
1611 2. character address of start of last complete sexp terminated.\n\
1612 3. non-nil if inside a string.\n\
1613 (it is the character that will terminate the string.)\n\
1614 4. t if inside a comment.\n\
1615 5. t if following a quote character.\n\
1616 6. the minimum paren-depth encountered during this scan.\n\
1617 7. t if in a comment of style `b'.\n\
1618 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1619 in parentheses becomes equal to TARGETDEPTH.\n\
1620 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1621 any character that starts a sexp.\n\
1622 Fifth arg STATE is an eight-list like what this function returns.\n\
1623 It is used to initialize the state of the parse. Its second and third
1624 elements are ignored.
1625 Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1626 (from, to, targetdepth, stopbefore, state, commentstop)
1629 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
1630 0 /* See immediately above */)
1631 (from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
)
1632 Lisp_Object from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
;
1634 struct lisp_parse_state state
;
1637 if (!NILP (targetdepth
))
1639 CHECK_NUMBER (targetdepth
, 3);
1640 target
= XINT (targetdepth
);
1643 target
= -100000; /* We won't reach this depth */
1645 validate_region (&from
, &to
);
1646 scan_sexps_forward (&state
, XINT (from
), XINT (to
),
1647 target
, !NILP (stopbefore
), oldstate
,
1648 !NILP (commentstop
));
1650 SET_PT (state
.location
);
1652 return Fcons (make_number (state
.depth
),
1653 Fcons (state
.prevlevelstart
< 0 ? Qnil
: make_number (state
.prevlevelstart
),
1654 Fcons (state
.thislevelstart
< 0 ? Qnil
: make_number (state
.thislevelstart
),
1655 Fcons (state
.instring
>= 0 ? make_number (state
.instring
) : Qnil
,
1656 Fcons (state
.incomment
? Qt
: Qnil
,
1657 Fcons (state
.quoted
? Qt
: Qnil
,
1658 Fcons (make_number (state
.mindepth
),
1659 Fcons (state
.comstyle
? Qt
: Qnil
,
1666 register struct Lisp_Vector
*v
;
1668 /* Set this now, so first buffer creation can refer to it. */
1669 /* Make it nil before calling copy-syntax-table
1670 so that copy-syntax-table will know not to try to copy from garbage */
1671 Vstandard_syntax_table
= Qnil
;
1672 Vstandard_syntax_table
= Fcopy_syntax_table (Qnil
);
1674 v
= XVECTOR (Vstandard_syntax_table
);
1676 for (i
= 'a'; i
<= 'z'; i
++)
1677 XSETFASTINT (v
->contents
[i
], (int) Sword
);
1678 for (i
= 'A'; i
<= 'Z'; i
++)
1679 XSETFASTINT (v
->contents
[i
], (int) Sword
);
1680 for (i
= '0'; i
<= '9'; i
++)
1681 XSETFASTINT (v
->contents
[i
], (int) Sword
);
1682 XSETFASTINT (v
->contents
['$'], (int) Sword
);
1683 XSETFASTINT (v
->contents
['%'], (int) Sword
);
1685 XSETFASTINT (v
->contents
['('], (int) Sopen
+ (')' << 8));
1686 XSETFASTINT (v
->contents
[')'], (int) Sclose
+ ('(' << 8));
1687 XSETFASTINT (v
->contents
['['], (int) Sopen
+ (']' << 8));
1688 XSETFASTINT (v
->contents
[']'], (int) Sclose
+ ('[' << 8));
1689 XSETFASTINT (v
->contents
['{'], (int) Sopen
+ ('}' << 8));
1690 XSETFASTINT (v
->contents
['}'], (int) Sclose
+ ('{' << 8));
1691 XSETFASTINT (v
->contents
['"'], (int) Sstring
);
1692 XSETFASTINT (v
->contents
['\\'], (int) Sescape
);
1694 for (i
= 0; i
< 10; i
++)
1695 XSETFASTINT (v
->contents
["_-+*/&|<>="[i
]], (int) Ssymbol
);
1697 for (i
= 0; i
< 12; i
++)
1698 XSETFASTINT (v
->contents
[".,;:?!#@~^'`"[i
]], (int) Spunct
);
1703 Qsyntax_table_p
= intern ("syntax-table-p");
1704 staticpro (&Qsyntax_table_p
);
1706 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments
,
1707 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1709 words_include_escapes
= 0;
1710 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes
,
1711 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1713 defsubr (&Ssyntax_table_p
);
1714 defsubr (&Ssyntax_table
);
1715 defsubr (&Sstandard_syntax_table
);
1716 defsubr (&Scopy_syntax_table
);
1717 defsubr (&Sset_syntax_table
);
1718 defsubr (&Schar_syntax
);
1719 defsubr (&Smatching_paren
);
1720 defsubr (&Smodify_syntax_entry
);
1721 defsubr (&Sdescribe_syntax
);
1723 defsubr (&Sforward_word
);
1725 defsubr (&Sforward_comment
);
1726 defsubr (&Sscan_lists
);
1727 defsubr (&Sscan_sexps
);
1728 defsubr (&Sbackward_prefix_chars
);
1729 defsubr (&Sparse_partial_sexp
);