1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
33 #include "intervals.h"
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
43 Lisp_Object Qsyntax_table_p
, Qsyntax_table
, Qscan_error
;
45 int words_include_escapes
;
46 int parse_sexp_lookup_properties
;
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol
;
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp
;
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
60 int open_paren_in_column_0_is_defun_start
;
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
64 struct lisp_parse_state
66 int depth
; /* Depth at end of parsing. */
67 int instring
; /* -1 if not within string, else desired terminator. */
68 int incomment
; /* -1 if in unnestable comment else comment nesting */
69 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted
; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart
; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart
; /* Char number of start of containing expression */
73 int location
; /* Char number at which parsing stopped. */
74 int mindepth
; /* Minimum depth seen while scanning. */
75 int comstr_start
; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
88 static int find_start_pos
;
89 static int find_start_value
;
90 static int find_start_value_byte
;
91 static struct buffer
*find_start_buffer
;
92 static int find_start_begv
;
93 static int find_start_modiff
;
96 static int find_defun_start
P_ ((int, int));
97 static int back_comment
P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted
P_ ((int, int));
99 static Lisp_Object skip_chars
P_ ((int, int, Lisp_Object
, Lisp_Object
));
100 static Lisp_Object scan_lists
P_ ((int, int, int, int));
101 static void scan_sexps_forward
P_ ((struct lisp_parse_state
*,
103 int, Lisp_Object
, int));
106 struct gl_state_s gl_state
; /* Global state of syntax parser. */
108 INTERVAL
interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
126 update_syntax_table (charpos
, count
, init
, object
)
127 int charpos
, count
, init
;
130 Lisp_Object tmp_table
;
131 int cnt
= 0, invalidate
= 1;
136 gl_state
.start
= gl_state
.b_property
;
137 gl_state
.stop
= gl_state
.e_property
;
138 gl_state
.forward_i
= interval_of (charpos
, object
);
139 i
= gl_state
.backward_i
= gl_state
.forward_i
;
140 gl_state
.left_ok
= gl_state
.right_ok
= 1;
142 if (NULL_INTERVAL_P (i
))
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i
))
148 if (AM_RIGHT_CHILD (i
))
149 INTERVAL_PARENT (i
)->position
= i
->position
150 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
154 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
156 i
= INTERVAL_PARENT (i
);
158 i
= gl_state
.forward_i
;
159 gl_state
.b_property
= i
->position
- 1 - gl_state
.offset
;
160 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
163 oldi
= i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
165 /* We are guarantied to be called with CHARPOS either in i,
167 if (NULL_INTERVAL_P (i
))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos
< i
->position
) /* Move left. */
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i
= update_interval (i
, charpos
);
175 if (oldi
->position
!= INTERVAL_LAST_POS (i
))
178 gl_state
.right_ok
= 1; /* Invalidate the other end. */
179 gl_state
.forward_i
= i
;
180 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
183 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i
= update_interval (i
, charpos
);
189 if (i
->position
!= INTERVAL_LAST_POS (oldi
))
192 gl_state
.left_ok
= 1; /* Invalidate the other end. */
193 gl_state
.backward_i
= i
;
194 gl_state
.b_property
= i
->position
- 1 - gl_state
.offset
;
197 else if (count
> 0 ? gl_state
.right_ok
: gl_state
.left_ok
)
199 /* We do not need to recalculate tmp_table. */
200 tmp_table
= gl_state
.old_prop
;
204 tmp_table
= textget (i
->plist
, Qsyntax_table
);
207 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
209 if (invalidate
) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
214 gl_state
.backward_i
= i
;
215 gl_state
.left_ok
= 1; /* Invalidate the other end. */
216 gl_state
.b_property
= i
->position
- 1 - gl_state
.offset
;
220 gl_state
.forward_i
= i
;
221 gl_state
.right_ok
= 1; /* Invalidate the other end. */
222 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
226 gl_state
.current_syntax_table
= tmp_table
;
227 gl_state
.old_prop
= tmp_table
;
228 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
230 gl_state
.use_global
= 0;
232 else if (CONSP (tmp_table
))
234 gl_state
.use_global
= 1;
235 gl_state
.global_code
= tmp_table
;
239 gl_state
.use_global
= 0;
240 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
243 while (!NULL_INTERVAL_P (i
))
245 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
248 gl_state
.right_ok
= 0;
250 gl_state
.left_ok
= 0;
253 else if (cnt
== INTERVALS_AT_ONCE
)
256 gl_state
.right_ok
= 1;
258 gl_state
.left_ok
= 1;
262 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
264 if (NULL_INTERVAL_P (i
))
265 { /* This property goes to the end. */
267 gl_state
.e_property
= gl_state
.stop
;
269 gl_state
.b_property
= gl_state
.start
;
275 gl_state
.e_property
= i
->position
- gl_state
.offset
;
276 gl_state
.forward_i
= i
;
280 gl_state
.b_property
= i
->position
+ LENGTH (i
) - 1 - gl_state
.offset
;
281 gl_state
.backward_i
= i
;
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
291 char_quoted (charpos
, bytepos
)
292 register int charpos
, bytepos
;
294 register enum syntaxcode code
;
295 register int beg
= BEGV
;
296 register int quoted
= 0;
299 DEC_BOTH (charpos
, bytepos
);
301 while (bytepos
>= beg
)
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
304 code
= SYNTAX (FETCH_CHAR (bytepos
));
305 if (! (code
== Scharquote
|| code
== Sescape
))
308 DEC_BOTH (charpos
, bytepos
);
312 UPDATE_SYNTAX_TABLE (orig
);
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
320 inc_bytepos (bytepos
)
323 if (NILP (current_buffer
->enable_multibyte_characters
))
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
334 dec_bytepos (bytepos
)
337 if (NILP (current_buffer
->enable_multibyte_characters
))
344 /* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
346 can return the same value right away.
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
353 find_defun_start (pos
, pos_byte
)
356 int opoint
= PT
, opoint_byte
= PT_BYTE
;
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer
== find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos
<= find_start_pos
+ 1000
364 && pos
>= find_start_value
365 && BEGV
== find_start_begv
366 && MODIFF
== find_start_modiff
)
367 return find_start_value
;
369 /* Back up to start of line. */
370 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
375 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
376 gl_state
.use_global
= 0;
377 if (open_paren_in_column_0_is_defun_start
)
381 /* Open-paren at start of line means we may have found our
383 if (SYNTAX (FETCH_CHAR (PT_BYTE
)) == Sopen
)
385 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE
)) == Sopen
)
388 /* Now fallback to the default value. */
389 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
390 gl_state
.use_global
= 0;
392 /* Move to beg of previous line. */
393 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
397 /* Record what we found, for the next try. */
398 find_start_value
= PT
;
399 find_start_value_byte
= PT_BYTE
;
400 find_start_buffer
= current_buffer
;
401 find_start_modiff
= MODIFF
;
402 find_start_begv
= BEGV
;
403 find_start_pos
= pos
;
405 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
407 return find_start_value
;
410 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
413 prev_char_comend_first (pos
, pos_byte
)
418 DEC_BOTH (pos
, pos_byte
);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
420 c
= FETCH_CHAR (pos_byte
);
421 val
= SYNTAX_COMEND_FIRST (c
);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
426 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
429 prev_char_comstart_first (pos
, pos_byte
)
434 DEC_BOTH (pos
, pos_byte
);
435 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
436 c
= FETCH_CHAR (pos_byte
);
437 val
= SYNTAX_COMSTART_FIRST (c
);
438 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
442 /* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
456 back_comment (from
, from_byte
, stop
, comnested
, comstyle
, charpos_ptr
, bytepos_ptr
)
457 int from
, from_byte
, stop
;
458 int comnested
, comstyle
;
459 int *charpos_ptr
, *bytepos_ptr
;
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
470 int string_style
= -1; /* Presumed outside of any string. */
471 int string_lossage
= 0;
472 int comment_end
= from
;
473 int comment_end_byte
= from_byte
;
474 int comstart_pos
= 0;
476 /* Value that PARITY had, when we reached the position
478 int scanstart
= from
- 1;
479 /* Place where the containing defun starts,
480 or 0 if we didn't come across it yet. */
482 int defun_start_byte
= 0;
483 register enum syntaxcode code
;
484 int nesting
= 1; /* current comment nesting */
487 /* At beginning of range to scan, we're outside of strings;
488 that determines quote parity to the comment-end. */
493 /* Move back and examine a character. */
494 DEC_BOTH (from
, from_byte
);
495 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
497 c
= FETCH_CHAR (from_byte
);
500 /* If this char is the second of a 2-char comment end sequence,
501 back up and give the pair the appropriate syntax. */
502 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
503 && prev_char_comend_first (from
, from_byte
))
506 DEC_BOTH (from
, from_byte
);
507 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
508 c
= FETCH_CHAR (from_byte
);
511 /* If this char starts a 2-char comment start sequence,
512 treat it like a 1-char comment starter. */
513 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
))
515 temp_byte
= inc_bytepos (from_byte
);
516 UPDATE_SYNTAX_TABLE_FORWARD (from
+ 1);
517 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte
))
518 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte
)))
520 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
522 else if (code
== Scomment
&& comstyle
!= SYNTAX_COMMENT_STYLE (c
))
523 /* Ignore comment starters of a different style. */
526 /* Ignore escaped characters, except comment-enders. */
527 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
534 c
= (code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
);
536 /* Track parity of quotes. */
537 if (string_style
== -1)
538 /* Entering a string. */
540 else if (string_style
== c
)
541 /* Leaving the string. */
544 /* If we have two kinds of string delimiters.
545 There's no way to grok this scanning backwards. */
550 /* We've already checked that it is the relevant comstyle. */
551 if (string_style
!= -1 || string_lossage
)
552 /* There are odd string quotes involved, so let's be careful.
553 Test case in Pascal: " { " a { " } */
556 if (comnested
&& --nesting
<= 0)
557 /* nested comments have to be balanced, so we don't need to
558 keep looking for earlier ones. We use here the same (slightly
559 incorrect) reasoning as below: since it is followed by uniform
560 paired string quotes, this comment-start has to be outside of
561 strings, else the comment-end itself would be inside a string. */
564 /* Record comment-starters according to that
565 quote-parity to the comment-end. */
567 comstart_byte
= from_byte
;
571 if (SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte
)) == comstyle
)
572 /* This is the same style of comment ender as ours. */
577 /* Anything before that can't count because it would match
578 this comment-ender rather than ours. */
579 from
= stop
; /* Break out of the loop. */
584 /* Assume a defun-start point is outside of strings. */
585 if (open_paren_in_column_0_is_defun_start
587 || (temp_byte
= dec_bytepos (from_byte
),
588 FETCH_CHAR (temp_byte
) == '\n')))
591 defun_start_byte
= from_byte
;
592 from
= stop
; /* Break out of the loop. */
601 if (comstart_pos
== 0)
604 from_byte
= comment_end_byte
;
605 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
- 1);
607 /* If the earliest comment starter
608 is followed by uniform paired string quotes or none,
609 we know it can't be inside a string
610 since if it were then the comment ender would be inside one.
611 So it does start a comment. Skip back to it. */
615 from_byte
= comstart_byte
;
616 /* Globals are correct now. */
620 struct lisp_parse_state state
;
622 /* We had two kinds of string delimiters mixed up
623 together. Decode this going forwards.
624 Scan fwd from a known safe place (beginning-of-defun)
625 to the one in question; this records where we
626 last passed a comment starter. */
627 /* If we did not already find the defun start, find it now. */
628 if (defun_start
== 0)
630 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
631 defun_start_byte
= find_start_value_byte
;
635 scan_sexps_forward (&state
,
636 defun_start
, defun_start_byte
,
637 comment_end
, -10000, 0, Qnil
, 0);
638 defun_start
= comment_end
;
639 if (state
.incomment
== (comnested
? 1 : -1)
640 && state
.comstyle
== comstyle
)
641 from
= state
.comstr_start
;
646 /* If comment_end is inside some other comment, maybe ours
647 is nested, so we need to try again from within the
648 surrounding comment. Example: { a (* " *) */
650 /* FIXME: We should advance by one or two chars. */
651 defun_start
= state
.comstr_start
+ 2;
652 defun_start_byte
= CHAR_TO_BYTE (defun_start
);
655 } while (defun_start
< comment_end
);
657 from_byte
= CHAR_TO_BYTE (from
);
658 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
663 *bytepos_ptr
= from_byte
;
665 return (from
== comment_end
) ? -1 : from
;
668 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
669 "Return t if OBJECT is a syntax table.\n\
670 Currently, any char-table counts as a syntax table.")
674 if (CHAR_TABLE_P (object
)
675 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
681 check_syntax_table (obj
)
684 if (!(CHAR_TABLE_P (obj
)
685 && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
)))
686 wrong_type_argument (Qsyntax_table_p
, obj
);
689 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
690 "Return the current syntax table.\n\
691 This is the one specified by the current buffer.")
694 return current_buffer
->syntax_table
;
697 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
698 Sstandard_syntax_table
, 0, 0, 0,
699 "Return the standard syntax table.\n\
700 This is the one used for new buffers.")
703 return Vstandard_syntax_table
;
706 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
707 "Construct a new syntax table and return it.\n\
708 It is a copy of the TABLE, which defaults to the standard syntax table.")
715 check_syntax_table (table
);
717 table
= Vstandard_syntax_table
;
719 copy
= Fcopy_sequence (table
);
721 /* Only the standard syntax table should have a default element.
722 Other syntax tables should inherit from parents instead. */
723 XCHAR_TABLE (copy
)->defalt
= Qnil
;
725 /* Copied syntax tables should all have parents.
726 If we copied one with no parent, such as the standard syntax table,
727 use the standard syntax table as the copy's parent. */
728 if (NILP (XCHAR_TABLE (copy
)->parent
))
729 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
733 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
734 "Select a new syntax table for the current buffer.\n\
735 One argument, a syntax table.")
740 check_syntax_table (table
);
741 current_buffer
->syntax_table
= table
;
742 /* Indicate that this buffer now has a specified syntax table. */
743 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
744 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
748 /* Convert a letter which signifies a syntax code
749 into the code it signifies.
750 This is used by modify-syntax-entry, and other things. */
752 unsigned char syntax_spec_code
[0400] =
753 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
754 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
755 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
756 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
757 (char) Swhitespace
, (char) Scomment_fence
, (char) Sstring
, 0377,
758 (char) Smath
, 0377, 0377, (char) Squote
,
759 (char) Sopen
, (char) Sclose
, 0377, 0377,
760 0377, (char) Swhitespace
, (char) Spunct
, (char) Scharquote
,
761 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
762 0377, 0377, 0377, 0377,
763 (char) Scomment
, 0377, (char) Sendcomment
, 0377,
764 (char) Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
765 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
766 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
767 0377, 0377, 0377, 0377, (char) Sescape
, 0377, 0377, (char) Ssymbol
,
768 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
769 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
770 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
771 0377, 0377, 0377, 0377, (char) Sstring_fence
, 0377, 0377, 0377
774 /* Indexed by syntax code, give the letter that describes it. */
776 char syntax_code_spec
[16] =
778 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
782 /* Indexed by syntax code, give the object (cons of syntax code and
783 nil) to be stored in syntax table. Since these objects can be
784 shared among syntax tables, we generate them in advance. By
785 sharing objects, the function `describe-syntax' can give a more
787 static Lisp_Object Vsyntax_code_object
;
790 /* Look up the value for CHARACTER in syntax table TABLE's parent
791 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
792 for CHARACTER. It's actually used only when not compiled with GCC. */
795 syntax_parent_lookup (table
, character
)
803 table
= XCHAR_TABLE (table
)->parent
;
807 value
= XCHAR_TABLE (table
)->contents
[character
];
813 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
814 "Return the syntax code of CHARACTER, described by a character.\n\
815 For example, if CHARACTER is a word constituent,\n\
816 the character `w' is returned.\n\
817 The characters that correspond to various syntax codes\n\
818 are listed in the documentation of `modify-syntax-entry'.")
820 Lisp_Object character
;
823 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
825 gl_state
.use_global
= 0;
826 CHECK_NUMBER (character
, 0);
827 char_int
= XINT (character
);
828 return make_number (syntax_code_spec
[(int) SYNTAX (char_int
)]);
831 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
832 "Return the matching parenthesis of CHARACTER, or nil if none.")
834 Lisp_Object character
;
837 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
838 gl_state
.use_global
= 0;
839 CHECK_NUMBER (character
, 0);
840 char_int
= XINT (character
);
841 code
= SYNTAX (char_int
);
842 if (code
== Sopen
|| code
== Sclose
)
843 return SYNTAX_MATCH (char_int
);
847 /* This comment supplies the doc string for modify-syntax-entry,
848 for make-docfile to see. We cannot put this in the real DEFUN
849 due to limits in the Unix cpp.
851 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
852 "Set syntax for character CHAR according to string S.\n\
853 The syntax is changed only for table TABLE, which defaults to\n\
854 the current buffer's syntax table.\n\
855 The first character of S should be one of the following:\n\
856 Space or - whitespace syntax. w word constituent.\n\
857 _ symbol constituent. . punctuation.\n\
858 ( open-parenthesis. ) close-parenthesis.\n\
859 \" string quote. \\ escape.\n\
860 $ paired delimiter. ' expression quote or prefix operator.\n\
861 < comment starter. > comment ender.\n\
862 / character-quote. @ inherit from `standard-syntax-table'.\n\
864 Only single-character comment start and end sequences are represented thus.\n\
865 Two-character sequences are represented as described below.\n\
866 The second character of S is the matching parenthesis,\n\
867 used only if the first character is `(' or `)'.\n\
868 Any additional characters are flags.\n\
869 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
870 1 means CHAR is the start of a two-char comment start sequence.\n\
871 2 means CHAR is the second character of such a sequence.\n\
872 3 means CHAR is the start of a two-char comment end sequence.\n\
873 4 means CHAR is the second character of such a sequence.\n\
875 There can be up to two orthogonal comment sequences. This is to support\n\
876 language modes such as C++. By default, all comment sequences are of style\n\
877 a, but you can set the comment sequence style to b (on the second character\n\
878 of a comment-start, or the first character of a comment-end sequence) using\n\
880 b means CHAR is part of comment sequence b.\n\
881 n means CHAR is part of a nestable comment sequence.\n\
883 p means CHAR is a prefix character for `backward-prefix-chars';\n\
884 such characters are treated as whitespace when they occur\n\
885 between expressions.")
889 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
890 /* I really don't know why this is interactive
891 help-form should at least be made useful whilst reading the second arg
893 "cSet syntax for character: \nsSet syntax for %s to: ",
894 0 /* See immediately above */)
895 (c
, newentry
, syntax_table
)
896 Lisp_Object c
, newentry
, syntax_table
;
898 register unsigned char *p
;
899 register enum syntaxcode code
;
904 CHECK_STRING (newentry
, 1);
906 if (NILP (syntax_table
))
907 syntax_table
= current_buffer
->syntax_table
;
909 check_syntax_table (syntax_table
);
911 p
= XSTRING (newentry
)->data
;
912 code
= (enum syntaxcode
) syntax_spec_code
[*p
++];
913 if (((int) code
& 0377) == 0377)
914 error ("invalid syntax description letter: %c", p
[-1]);
916 if (code
== Sinherit
)
918 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), Qnil
);
925 int character
= (STRING_CHAR_AND_LENGTH
926 (p
, STRING_BYTES (XSTRING (newentry
)) - 1, len
));
927 XSETINT (match
, character
);
928 if (XFASTINT (match
) == ' ')
968 if (val
< XVECTOR (Vsyntax_code_object
)->size
&& NILP (match
))
969 newentry
= XVECTOR (Vsyntax_code_object
)->contents
[val
];
971 /* Since we can't use a shared object, let's make a new one. */
972 newentry
= Fcons (make_number (val
), match
);
974 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
979 /* Dump syntax table to buffer in human-readable format */
982 describe_syntax (value
)
985 register enum syntaxcode code
;
986 char desc
, start1
, start2
, end1
, end2
, prefix
, comstyle
, comnested
;
988 Lisp_Object first
, match_lisp
;
990 Findent_to (make_number (16), make_number (1));
994 insert_string ("default\n");
998 if (CHAR_TABLE_P (value
))
1000 insert_string ("deeper char-table ...\n");
1006 insert_string ("invalid\n");
1010 first
= XCAR (value
);
1011 match_lisp
= XCDR (value
);
1013 if (!INTEGERP (first
) || !(NILP (match_lisp
) || INTEGERP (match_lisp
)))
1015 insert_string ("invalid\n");
1019 code
= (enum syntaxcode
) (XINT (first
) & 0377);
1020 start1
= (XINT (first
) >> 16) & 1;
1021 start2
= (XINT (first
) >> 17) & 1;
1022 end1
= (XINT (first
) >> 18) & 1;
1023 end2
= (XINT (first
) >> 19) & 1;
1024 prefix
= (XINT (first
) >> 20) & 1;
1025 comstyle
= (XINT (first
) >> 21) & 1;
1026 comnested
= (XINT (first
) >> 22) & 1;
1028 if ((int) code
< 0 || (int) code
>= (int) Smax
)
1030 insert_string ("invalid");
1033 desc
= syntax_code_spec
[(int) code
];
1035 str
[0] = desc
, str
[1] = 0;
1038 if (NILP (match_lisp
))
1041 insert_char (XINT (match_lisp
));
1060 insert_string ("\twhich means: ");
1062 switch (SWITCH_ENUM_CAST (code
))
1065 insert_string ("whitespace"); break;
1067 insert_string ("punctuation"); break;
1069 insert_string ("word"); break;
1071 insert_string ("symbol"); break;
1073 insert_string ("open"); break;
1075 insert_string ("close"); break;
1077 insert_string ("quote"); break;
1079 insert_string ("string"); break;
1081 insert_string ("math"); break;
1083 insert_string ("escape"); break;
1085 insert_string ("charquote"); break;
1087 insert_string ("comment"); break;
1089 insert_string ("endcomment"); break;
1091 insert_string ("invalid");
1095 if (!NILP (match_lisp
))
1097 insert_string (", matches ");
1098 insert_char (XINT (match_lisp
));
1102 insert_string (",\n\t is the first character of a comment-start sequence");
1104 insert_string (",\n\t is the second character of a comment-start sequence");
1107 insert_string (",\n\t is the first character of a comment-end sequence");
1109 insert_string (",\n\t is the second character of a comment-end sequence");
1111 insert_string (" (comment style b)");
1113 insert_string (" (nestable)");
1116 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1118 insert_string ("\n");
1122 describe_syntax_1 (vector
)
1125 struct buffer
*old
= current_buffer
;
1126 set_buffer_internal (XBUFFER (Vstandard_output
));
1127 describe_vector (vector
, Qnil
, describe_syntax
, 0, Qnil
, Qnil
, (int *) 0, 0);
1128 while (! NILP (XCHAR_TABLE (vector
)->parent
))
1130 vector
= XCHAR_TABLE (vector
)->parent
;
1131 insert_string ("\nThe parent syntax table is:");
1132 describe_vector (vector
, Qnil
, describe_syntax
, 0, Qnil
, Qnil
,
1136 call0 (intern ("help-mode"));
1137 set_buffer_internal (old
);
1141 DEFUN ("describe-syntax", Fdescribe_syntax
, Sdescribe_syntax
, 0, 0, "",
1142 "Describe the syntax specifications in the syntax table.\n\
1143 The descriptions are inserted in a buffer, which is then displayed.")
1146 internal_with_output_to_temp_buffer
1147 ("*Help*", describe_syntax_1
, current_buffer
->syntax_table
);
1152 int parse_sexp_ignore_comments
;
1154 /* Return the position across COUNT words from FROM.
1155 If that many words cannot be found before the end of the buffer, return 0.
1156 COUNT negative means scan backward and stop at word beginning. */
1159 scan_words (from
, count
)
1160 register int from
, count
;
1162 register int beg
= BEGV
;
1163 register int end
= ZV
;
1164 register int from_byte
= CHAR_TO_BYTE (from
);
1165 register enum syntaxcode code
;
1171 SETUP_SYNTAX_TABLE (from
, count
);
1182 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1183 ch0
= FETCH_CHAR (from_byte
);
1184 code
= SYNTAX (ch0
);
1185 INC_BOTH (from
, from_byte
);
1186 if (words_include_escapes
1187 && (code
== Sescape
|| code
== Scharquote
))
1192 /* Now CH0 is a character which begins a word and FROM is the
1193 position of the next character. */
1196 if (from
== end
) break;
1197 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1198 ch1
= FETCH_CHAR (from_byte
);
1199 code
= SYNTAX (ch1
);
1200 if (!(words_include_escapes
1201 && (code
== Sescape
|| code
== Scharquote
)))
1202 if (code
!= Sword
|| WORD_BOUNDARY_P (ch0
, ch1
))
1204 INC_BOTH (from
, from_byte
);
1218 DEC_BOTH (from
, from_byte
);
1219 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1220 ch1
= FETCH_CHAR (from_byte
);
1221 code
= SYNTAX (ch1
);
1222 if (words_include_escapes
1223 && (code
== Sescape
|| code
== Scharquote
))
1228 /* Now CH1 is a character which ends a word and FROM is the
1236 temp_byte
= dec_bytepos (from_byte
);
1237 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1238 ch0
= FETCH_CHAR (temp_byte
);
1239 code
= SYNTAX (ch0
);
1240 if (!(words_include_escapes
1241 && (code
== Sescape
|| code
== Scharquote
)))
1242 if (code
!= Sword
|| WORD_BOUNDARY_P (ch0
, ch1
))
1244 DEC_BOTH (from
, from_byte
);
1255 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 1, 1, "p",
1256 "Move point forward ARG words (backward if ARG is negative).\n\
1257 Normally returns t.\n\
1258 If an edge of the buffer or a field boundary is reached, point is left there\n\
1259 and the function returns nil. Field boundaries are not noticed if\n\
1260 `inhibit-field-text-motion' is non-nil.")
1265 CHECK_NUMBER (count
, 0);
1267 val
= orig_val
= scan_words (PT
, XINT (count
));
1269 val
= XINT (count
) > 0 ? ZV
: BEGV
;
1271 /* Avoid jumping out of an input field. */
1272 val
= XFASTINT (Fconstrain_to_field (make_number (val
), make_number (PT
),
1276 return val
== orig_val
? Qt
: Qnil
;
1279 Lisp_Object
skip_chars ();
1281 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1282 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1283 STRING is like the inside of a `[...]' in a regular expression\n\
1284 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1285 (but not as the end of a range; quoting is never needed there).\n\
1286 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1287 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1288 Returns the distance traveled, either zero or positive.")
1290 Lisp_Object string
, lim
;
1292 return skip_chars (1, 0, string
, lim
);
1295 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1296 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1297 See `skip-chars-forward' for details.\n\
1298 Returns the distance traveled, either zero or negative.")
1300 Lisp_Object string
, lim
;
1302 return skip_chars (0, 0, string
, lim
);
1305 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1306 "Move point forward across chars in specified syntax classes.\n\
1307 SYNTAX is a string of syntax code characters.\n\
1308 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1309 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1310 This function returns the distance traveled, either zero or positive.")
1312 Lisp_Object syntax
, lim
;
1314 return skip_chars (1, 1, syntax
, lim
);
1317 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1318 "Move point backward across chars in specified syntax classes.\n\
1319 SYNTAX is a string of syntax code characters.\n\
1320 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1321 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1322 This function returns the distance traveled, either zero or negative.")
1324 Lisp_Object syntax
, lim
;
1326 return skip_chars (0, 1, syntax
, lim
);
1330 skip_chars (forwardp
, syntaxp
, string
, lim
)
1331 int forwardp
, syntaxp
;
1332 Lisp_Object string
, lim
;
1334 register unsigned int c
;
1336 unsigned char fastmap
[0400];
1337 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1338 of which codes don't fit in FASTMAP. In that case, we set the
1339 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1340 and set the actual ranges of characters in CHAR_RANGES. In the
1341 form "X-Y" of STRING, both X and Y must belong to the same
1342 character set because a range striding across character sets is
1345 int n_char_ranges
= 0;
1347 register int i
, i_byte
;
1348 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1349 int string_multibyte
;
1354 CHECK_STRING (string
, 0);
1355 char_ranges
= (int *) alloca (XSTRING (string
)->size
* (sizeof (int)) * 2);
1356 string_multibyte
= STRING_MULTIBYTE (string
);
1357 str
= XSTRING (string
)->data
;
1358 size_byte
= STRING_BYTES (XSTRING (string
));
1360 /* Adjust the multibyteness of the string to that of the buffer. */
1361 if (multibyte
!= string_multibyte
)
1366 nbytes
= count_size_as_multibyte (XSTRING (string
)->data
,
1367 XSTRING (string
)->size
);
1369 nbytes
= XSTRING (string
)->size
;
1370 if (nbytes
!= size_byte
)
1372 str
= (unsigned char *) alloca (nbytes
);
1373 copy_text (XSTRING (string
)->data
, str
, size_byte
,
1374 string_multibyte
, multibyte
);
1380 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1382 CHECK_NUMBER_COERCE_MARKER (lim
, 0);
1384 /* In any case, don't allow scan outside bounds of buffer. */
1385 if (XINT (lim
) > ZV
)
1386 XSETFASTINT (lim
, ZV
);
1387 if (XINT (lim
) < BEGV
)
1388 XSETFASTINT (lim
, BEGV
);
1390 bzero (fastmap
, sizeof fastmap
);
1394 if (i_byte
< size_byte
1395 && XSTRING (string
)->data
[0] == '^')
1397 negate
= 1; i_byte
++;
1400 /* Find the characters specified and set their elements of fastmap.
1401 If syntaxp, each character counts as itself.
1402 Otherwise, handle backslashes and ranges specially. */
1404 while (i_byte
< size_byte
)
1406 int c_leading_code
= str
[i_byte
];
1408 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, size_byte
- i_byte
, len
);
1412 fastmap
[syntax_spec_code
[c
& 0377]] = 1;
1417 if (i_byte
== size_byte
)
1420 c_leading_code
= str
[i_byte
];
1421 c
= STRING_CHAR_AND_LENGTH (str
+i_byte
, size_byte
-i_byte
, len
);
1424 if (i_byte
< size_byte
1425 && str
[i_byte
] == '-')
1427 unsigned int c2
, c2_leading_code
;
1429 /* Skip over the dash. */
1432 if (i_byte
== size_byte
)
1435 /* Get the end of the range. */
1436 c2_leading_code
= str
[i_byte
];
1437 c2
=STRING_CHAR_AND_LENGTH (str
+i_byte
, size_byte
-i_byte
, len
);
1440 if (SINGLE_BYTE_CHAR_P (c
))
1442 if (! SINGLE_BYTE_CHAR_P (c2
))
1444 /* Handle a range such as \177-\377 in multibyte
1445 mode. Split that into two ranges, the low
1446 one ending at 0237, and the high one starting
1447 at the smallest character in the charset of
1448 C2 and ending at C2. */
1449 int charset
= CHAR_CHARSET (c2
);
1450 int c1
= MAKE_CHAR (charset
, 0, 0);
1452 fastmap
[c2_leading_code
] = 1;
1453 char_ranges
[n_char_ranges
++] = c1
;
1454 char_ranges
[n_char_ranges
++] = c2
;
1463 else if (! SINGLE_BYTE_CHAR_P (c2
))
1465 if (c_leading_code
!= c2_leading_code
)
1466 error ("Invalid character range: %s",
1467 XSTRING (string
)->data
);
1470 fastmap
[c_leading_code
] = 1;
1471 char_ranges
[n_char_ranges
++] = c
;
1472 char_ranges
[n_char_ranges
++] = c2
;
1478 if (SINGLE_BYTE_CHAR_P (c
))
1482 fastmap
[c_leading_code
] = 1;
1483 char_ranges
[n_char_ranges
++] = c
;
1484 char_ranges
[n_char_ranges
++] = c
;
1490 /* If ^ was the first character, complement the fastmap. In
1491 addition, as all multibyte characters have possibility of
1492 matching, set all entries for base leading codes, which is
1493 harmless even if SYNTAXP is 1. */
1496 for (i
= 0; i
< sizeof fastmap
; i
++)
1498 if (!multibyte
|| !BASE_LEADING_CODE_P (i
))
1505 int start_point
= PT
;
1507 int pos_byte
= PT_BYTE
;
1512 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
1517 if (pos
< XINT (lim
))
1518 while (fastmap
[(int) SYNTAX (FETCH_CHAR (pos_byte
))])
1520 /* Since we already checked for multibyteness,
1521 avoid using INC_BOTH which checks again. */
1524 if (pos
>= XINT (lim
))
1526 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
1531 while (pos
< XINT (lim
)
1532 && fastmap
[(int) SYNTAX (FETCH_BYTE (pos
))])
1535 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
1543 while (pos
> XINT (lim
))
1545 int savepos
= pos_byte
;
1546 /* Since we already checked for multibyteness,
1547 avoid using DEC_BOTH which checks again. */
1550 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
1551 if (!fastmap
[(int) SYNTAX (FETCH_CHAR (pos_byte
))])
1561 if (pos
> XINT (lim
))
1562 while (fastmap
[(int) SYNTAX (FETCH_BYTE (pos
- 1))])
1565 if (pos
<= XINT (lim
))
1567 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
1577 while (pos
< XINT (lim
) && fastmap
[(c
= FETCH_BYTE (pos_byte
))])
1579 /* If we are looking at a multibyte character, we
1580 must look up the character in the table
1581 CHAR_RANGES. If there's no data in the table,
1582 that character is not what we want to skip. */
1583 if (BASE_LEADING_CODE_P (c
)
1584 && (c
= FETCH_MULTIBYTE_CHAR (pos_byte
),
1585 ! SINGLE_BYTE_CHAR_P (c
)))
1587 /* The following code do the right thing even if
1588 n_char_ranges is zero (i.e. no data in
1590 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1591 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1593 if (!(negate
^ (i
< n_char_ranges
)))
1596 INC_BOTH (pos
, pos_byte
);
1599 while (pos
< XINT (lim
) && fastmap
[FETCH_BYTE (pos
)])
1605 while (pos
> XINT (lim
))
1607 int prev_pos_byte
= pos_byte
;
1609 DEC_POS (prev_pos_byte
);
1610 if (!fastmap
[(c
= FETCH_BYTE (prev_pos_byte
))])
1613 /* See the comment in the previous similar code. */
1614 if (BASE_LEADING_CODE_P (c
)
1615 && (c
= FETCH_MULTIBYTE_CHAR (prev_pos_byte
),
1616 ! SINGLE_BYTE_CHAR_P (c
)))
1618 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1619 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1621 if (!(negate
^ (i
< n_char_ranges
)))
1625 pos_byte
= prev_pos_byte
;
1628 while (pos
> XINT (lim
) && fastmap
[FETCH_BYTE (pos
- 1)])
1633 #if 0 /* Not needed now that a position in mid-character
1634 cannot be specified in Lisp. */
1636 /* INC_POS or DEC_POS might have moved POS over LIM. */
1637 && (forwardp
? (pos
> XINT (lim
)) : (pos
< XINT (lim
))))
1644 SET_PT_BOTH (pos
, pos_byte
);
1647 return make_number (PT
- start_point
);
1651 /* Jump over a comment, assuming we are at the beginning of one.
1652 FROM is the current position.
1653 FROM_BYTE is the bytepos corresponding to FROM.
1654 Do not move past STOP (a charpos).
1655 The comment over which we have to jump is of style STYLE
1656 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1657 NESTING should be positive to indicate the nesting at the beginning
1658 for nested comments and should be zero or negative else.
1659 ST_COMMENT_STYLE cannot be nested.
1660 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1661 (or 0 If the search cannot start in the middle of a two-character).
1663 If successful, return 1 and store the charpos of the comment's end
1664 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1665 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1666 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1667 (as defined for state.incomment) in *INCOMMENT_PTR.
1669 The comment end is the last character of the comment rather than the
1670 character just after the comment.
1672 Global syntax data is assumed to initially be valid for FROM and
1673 remains valid for forward search starting at the returned position. */
1676 forw_comment (from
, from_byte
, stop
, nesting
, style
, prev_syntax
,
1677 charpos_ptr
, bytepos_ptr
, incomment_ptr
)
1678 int from
, from_byte
, stop
;
1679 int nesting
, style
, prev_syntax
;
1680 int *charpos_ptr
, *bytepos_ptr
, *incomment_ptr
;
1683 register enum syntaxcode code
;
1684 register int syntax
;
1686 if (nesting
<= 0) nesting
= -1;
1688 /* Enter the loop in the middle so that we find
1689 a 2-char comment ender if we start in the middle of it. */
1690 syntax
= prev_syntax
;
1691 if (syntax
!= 0) goto forw_incomment
;
1697 *incomment_ptr
= nesting
;
1698 *charpos_ptr
= from
;
1699 *bytepos_ptr
= from_byte
;
1702 c
= FETCH_CHAR (from_byte
);
1703 syntax
= SYNTAX_WITH_FLAGS (c
);
1704 code
= syntax
& 0xff;
1705 if (code
== Sendcomment
1706 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
1708 /* we have encountered a comment end of the same style
1709 as the comment sequence which began this comment
1712 if (code
== Scomment_fence
1713 && style
== ST_COMMENT_STYLE
)
1714 /* we have encountered a comment end of the same style
1715 as the comment sequence which began this comment
1720 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
)
1721 /* we have encountered a nested comment of the same style
1722 as the comment sequence which began this comment section */
1724 INC_BOTH (from
, from_byte
);
1725 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1728 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
1729 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
1730 && (c1
= FETCH_CHAR (from_byte
),
1731 SYNTAX_COMEND_SECOND (c1
)))
1734 /* we have encountered a comment end of the same style
1735 as the comment sequence which began this comment
1740 INC_BOTH (from
, from_byte
);
1741 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1746 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
1747 && (c1
= FETCH_CHAR (from_byte
),
1748 SYNTAX_COMMENT_STYLE (c1
) == style
1749 && SYNTAX_COMSTART_SECOND (c1
)))
1750 /* we have encountered a nested comment of the same style
1751 as the comment sequence which began this comment
1754 INC_BOTH (from
, from_byte
);
1755 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1759 *charpos_ptr
= from
;
1760 *bytepos_ptr
= from_byte
;
1764 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
1765 "Move forward across up to N comments. If N is negative, move backward.\n\
1766 Stop scanning if we find something other than a comment or whitespace.\n\
1767 Set point to where scanning stops.\n\
1768 If N comments are found as expected, with nothing except whitespace\n\
1769 between them, return t; otherwise return nil.")
1777 register enum syntaxcode code
;
1778 int comstyle
= 0; /* style of comment encountered */
1779 int comnested
= 0; /* whether the comment is nestable or not */
1782 int out_charpos
, out_bytepos
;
1785 CHECK_NUMBER (count
, 0);
1786 count1
= XINT (count
);
1787 stop
= count1
> 0 ? ZV
: BEGV
;
1793 from_byte
= PT_BYTE
;
1795 SETUP_SYNTAX_TABLE (from
, count1
);
1804 SET_PT_BOTH (from
, from_byte
);
1808 c
= FETCH_CHAR (from_byte
);
1810 comstart_first
= SYNTAX_COMSTART_FIRST (c
);
1811 comnested
= SYNTAX_COMMENT_NESTED (c
);
1812 comstyle
= SYNTAX_COMMENT_STYLE (c
);
1813 INC_BOTH (from
, from_byte
);
1814 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1815 if (from
< stop
&& comstart_first
1816 && (c1
= FETCH_CHAR (from_byte
),
1817 SYNTAX_COMSTART_SECOND (c1
)))
1819 /* We have encountered a comment start sequence and we
1820 are ignoring all text inside comments. We must record
1821 the comment style this sequence begins so that later,
1822 only a comment end of the same style actually ends
1823 the comment section. */
1825 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
1826 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
1827 INC_BOTH (from
, from_byte
);
1828 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1830 /* FIXME: here we ignore 2-char endcomments while we don't
1831 when going backwards. */
1833 while (code
== Swhitespace
|| code
== Sendcomment
);
1835 if (code
== Scomment_fence
)
1836 comstyle
= ST_COMMENT_STYLE
;
1837 else if (code
!= Scomment
)
1840 DEC_BOTH (from
, from_byte
);
1841 SET_PT_BOTH (from
, from_byte
);
1844 /* We're at the start of a comment. */
1845 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
1846 &out_charpos
, &out_bytepos
, &dummy
);
1847 from
= out_charpos
; from_byte
= out_bytepos
;
1851 SET_PT_BOTH (from
, from_byte
);
1854 INC_BOTH (from
, from_byte
);
1855 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1856 /* We have skipped one comment. */
1864 int quoted
, comstart_second
;
1868 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
1873 DEC_BOTH (from
, from_byte
);
1874 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1875 quoted
= char_quoted (from
, from_byte
);
1878 DEC_BOTH (from
, from_byte
);
1881 c
= FETCH_CHAR (from_byte
);
1884 comnested
= SYNTAX_COMMENT_NESTED (c
);
1885 if (code
== Sendcomment
)
1886 comstyle
= SYNTAX_COMMENT_STYLE (c
);
1887 comstart_second
= SYNTAX_COMSTART_SECOND (c
);
1888 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1889 && prev_char_comend_first (from
, from_byte
)
1890 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
1892 /* We must record the comment style encountered so that
1893 later, we can match only the proper comment begin
1894 sequence of the same style. */
1895 DEC_BOTH (from
, from_byte
);
1897 /* Calling char_quoted, above, set up global syntax position
1898 at the new value of FROM. */
1899 c1
= FETCH_CHAR (from_byte
);
1900 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
1901 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
1903 if (from
> stop
&& comstart_second
1904 && prev_char_comstart_first (from
, from_byte
)
1905 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
1908 DEC_BOTH (from
, from_byte
);
1911 if (code
== Scomment_fence
)
1913 /* Skip until first preceding unquoted comment_fence. */
1914 int found
= 0, ini
= from
, ini_byte
= from_byte
;
1918 DEC_BOTH (from
, from_byte
);
1921 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1922 c
= FETCH_CHAR (from_byte
);
1923 if (SYNTAX (c
) == Scomment_fence
1924 && !char_quoted (from
, from_byte
))
1932 from
= ini
; /* Set point to ini + 1. */
1933 from_byte
= ini_byte
;
1937 else if (code
== Sendcomment
)
1939 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
1940 &out_charpos
, &out_bytepos
);
1943 #if 0 /* cc-mode (and maybe others) relies on the bogus behavior. */
1944 /* Failure: we should go back to the end of this
1945 not-quite-endcomment. */
1946 if (SYNTAX(c
) != code
)
1947 /* It was a two-char Sendcomment. */
1948 INC_BOTH (from
, from_byte
);
1953 /* We have skipped one comment. */
1954 from
= out_charpos
, from_byte
= out_bytepos
;
1957 else if (code
!= Swhitespace
&& code
!= Scomment
)
1961 INC_BOTH (from
, from_byte
);
1962 SET_PT_BOTH (from
, from_byte
);
1970 SET_PT_BOTH (from
, from_byte
);
1975 /* Return syntax code of character C if C is a single byte character
1976 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
1978 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
1979 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
1980 ? SYNTAX (c) : Ssymbol)
1983 scan_lists (from
, count
, depth
, sexpflag
)
1985 int count
, depth
, sexpflag
;
1988 register int stop
= count
> 0 ? ZV
: BEGV
;
1993 register enum syntaxcode code
, temp_code
;
1994 int min_depth
= depth
; /* Err out if depth gets less than this. */
1995 int comstyle
= 0; /* style of comment encountered */
1996 int comnested
= 0; /* whether the comment is nestable or not */
1998 int last_good
= from
;
2001 int out_bytepos
, out_charpos
;
2003 int multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2005 if (depth
> 0) min_depth
= 0;
2007 if (from
> ZV
) from
= ZV
;
2008 if (from
< BEGV
) from
= BEGV
;
2010 from_byte
= CHAR_TO_BYTE (from
);
2015 SETUP_SYNTAX_TABLE (from
, count
);
2020 int comstart_first
, prefix
;
2021 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2022 c
= FETCH_CHAR (from_byte
);
2023 code
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2024 comstart_first
= SYNTAX_COMSTART_FIRST (c
);
2025 comnested
= SYNTAX_COMMENT_NESTED (c
);
2026 comstyle
= SYNTAX_COMMENT_STYLE (c
);
2027 prefix
= SYNTAX_PREFIX (c
);
2028 if (depth
== min_depth
)
2030 INC_BOTH (from
, from_byte
);
2031 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2032 if (from
< stop
&& comstart_first
2033 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte
))
2034 && parse_sexp_ignore_comments
)
2036 /* we have encountered a comment start sequence and we
2037 are ignoring all text inside comments. We must record
2038 the comment style this sequence begins so that later,
2039 only a comment end of the same style actually ends
2040 the comment section */
2042 c1
= FETCH_CHAR (from_byte
);
2043 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2044 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2045 INC_BOTH (from
, from_byte
);
2046 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2052 switch (SWITCH_ENUM_CAST (code
))
2056 if (from
== stop
) goto lose
;
2057 INC_BOTH (from
, from_byte
);
2058 /* treat following character as a word constituent */
2061 if (depth
|| !sexpflag
) break;
2062 /* This word counts as a sexp; return at end of it. */
2065 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2067 /* Some compilers can't handle this inside the switch. */
2068 c
= FETCH_CHAR (from_byte
);
2069 temp
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2074 INC_BOTH (from
, from_byte
);
2075 if (from
== stop
) goto lose
;
2084 INC_BOTH (from
, from_byte
);
2088 case Scomment_fence
:
2089 comstyle
= ST_COMMENT_STYLE
;
2092 if (!parse_sexp_ignore_comments
) break;
2093 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2094 found
= forw_comment (from
, from_byte
, stop
,
2095 comnested
, comstyle
, 0,
2096 &out_charpos
, &out_bytepos
, &dummy
);
2097 from
= out_charpos
, from_byte
= out_bytepos
;
2104 INC_BOTH (from
, from_byte
);
2105 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2111 if (from
!= stop
&& c
== FETCH_CHAR (from_byte
))
2113 INC_BOTH (from
, from_byte
);
2123 if (!++depth
) goto done
;
2128 if (!--depth
) goto done
;
2129 if (depth
< min_depth
)
2130 Fsignal (Qscan_error
,
2131 Fcons (build_string ("Containing expression ends prematurely"),
2132 Fcons (make_number (last_good
),
2133 Fcons (make_number (from
), Qnil
))));
2138 temp_pos
= dec_bytepos (from_byte
);
2139 stringterm
= FETCH_CHAR (temp_pos
);
2142 if (from
>= stop
) goto lose
;
2143 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2144 c
= FETCH_CHAR (from_byte
);
2147 : SYNTAX_WITH_MULTIBYTE_CHECK (c
) == Sstring_fence
)
2150 /* Some compilers can't handle this inside the switch. */
2151 temp
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2156 INC_BOTH (from
, from_byte
);
2158 INC_BOTH (from
, from_byte
);
2160 INC_BOTH (from
, from_byte
);
2161 if (!depth
&& sexpflag
) goto done
;
2166 /* Reached end of buffer. Error if within object, return nil if between */
2167 if (depth
) goto lose
;
2172 /* End of object reached */
2181 DEC_BOTH (from
, from_byte
);
2182 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2183 c
= FETCH_CHAR (from_byte
);
2184 code
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2185 if (depth
== min_depth
)
2188 comnested
= SYNTAX_COMMENT_NESTED (c
);
2189 if (code
== Sendcomment
)
2190 comstyle
= SYNTAX_COMMENT_STYLE (c
);
2191 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
2192 && prev_char_comend_first (from
, from_byte
)
2193 && parse_sexp_ignore_comments
)
2195 /* We must record the comment style encountered so that
2196 later, we can match only the proper comment begin
2197 sequence of the same style. */
2198 DEC_BOTH (from
, from_byte
);
2199 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2201 c1
= FETCH_CHAR (from_byte
);
2202 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2203 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2206 /* Quoting turns anything except a comment-ender
2207 into a word character. Note that this cannot be true
2208 if we decremented FROM in the if-statement above. */
2209 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2211 else if (SYNTAX_PREFIX (c
))
2214 switch (SWITCH_ENUM_CAST (code
))
2220 if (depth
|| !sexpflag
) break;
2221 /* This word counts as a sexp; count object finished
2222 after passing it. */
2225 temp_pos
= from_byte
;
2226 if (! NILP (current_buffer
->enable_multibyte_characters
))
2230 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2231 c1
= FETCH_CHAR (temp_pos
);
2232 temp_code
= SYNTAX_WITH_MULTIBYTE_CHECK (c1
);
2233 /* Don't allow comment-end to be quoted. */
2234 if (temp_code
== Sendcomment
)
2236 quoted
= char_quoted (from
- 1, temp_pos
);
2239 DEC_BOTH (from
, from_byte
);
2240 temp_pos
= dec_bytepos (temp_pos
);
2241 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2243 c1
= FETCH_CHAR (temp_pos
);
2244 temp_code
= SYNTAX_WITH_MULTIBYTE_CHECK (c1
);
2245 if (! (quoted
|| temp_code
== Sword
2246 || temp_code
== Ssymbol
2247 || temp_code
== Squote
))
2249 DEC_BOTH (from
, from_byte
);
2256 temp_pos
= dec_bytepos (from_byte
);
2257 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2258 if (from
!= stop
&& c
== FETCH_CHAR (temp_pos
))
2259 DEC_BOTH (from
, from_byte
);
2268 if (!++depth
) goto done2
;
2273 if (!--depth
) goto done2
;
2274 if (depth
< min_depth
)
2275 Fsignal (Qscan_error
,
2276 Fcons (build_string ("Containing expression ends prematurely"),
2277 Fcons (make_number (last_good
),
2278 Fcons (make_number (from
), Qnil
))));
2282 if (!parse_sexp_ignore_comments
)
2284 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2285 &out_charpos
, &out_bytepos
);
2286 /* FIXME: if found == -1, then it really wasn't a comment-end.
2287 For single-char Sendcomment, we can't do much about it apart
2288 from skipping the char.
2289 For 2-char endcomments, we could try again, taking both
2290 chars as separate entities, but it's a lot of trouble
2291 for very little gain, so we don't bother either. -sm */
2293 from
= out_charpos
, from_byte
= out_bytepos
;
2296 case Scomment_fence
:
2300 DEC_BOTH (from
, from_byte
);
2301 if (from
== stop
) goto lose
;
2302 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2303 if (!char_quoted (from
, from_byte
)
2304 && (c
= FETCH_CHAR (from_byte
),
2305 SYNTAX_WITH_MULTIBYTE_CHECK (c
) == code
))
2308 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2312 stringterm
= FETCH_CHAR (from_byte
);
2315 if (from
== stop
) goto lose
;
2316 temp_pos
= from_byte
;
2317 if (! NILP (current_buffer
->enable_multibyte_characters
))
2321 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2322 if (!char_quoted (from
- 1, temp_pos
)
2323 && stringterm
== FETCH_CHAR (temp_pos
))
2325 DEC_BOTH (from
, from_byte
);
2327 DEC_BOTH (from
, from_byte
);
2328 if (!depth
&& sexpflag
) goto done2
;
2333 /* Reached start of buffer. Error if within object, return nil if between */
2334 if (depth
) goto lose
;
2345 XSETFASTINT (val
, from
);
2349 Fsignal (Qscan_error
,
2350 Fcons (build_string ("Unbalanced parentheses"),
2351 Fcons (make_number (last_good
),
2352 Fcons (make_number (from
), Qnil
))));
2357 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
2358 "Scan from character number FROM by COUNT lists.\n\
2359 Returns the character number of the position thus found.\n\
2361 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2362 only places where the depth in parentheses becomes zero\n\
2363 are candidates for stopping; COUNT such places are counted.\n\
2364 Thus, a positive value for DEPTH means go out levels.\n\
2366 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2368 If the beginning or end of (the accessible part of) the buffer is reached\n\
2369 and the depth is wrong, an error is signaled.\n\
2370 If the depth is right but the count is not used up, nil is returned.")
2371 (from
, count
, depth
)
2372 Lisp_Object from
, count
, depth
;
2374 CHECK_NUMBER (from
, 0);
2375 CHECK_NUMBER (count
, 1);
2376 CHECK_NUMBER (depth
, 2);
2378 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
2381 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
2382 "Scan from character number FROM by COUNT balanced expressions.\n\
2383 If COUNT is negative, scan backwards.\n\
2384 Returns the character number of the position thus found.\n\
2386 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2388 If the beginning or end of (the accessible part of) the buffer is reached\n\
2389 in the middle of a parenthetical grouping, an error is signaled.\n\
2390 If the beginning or end is reached between groupings\n\
2391 but before count is used up, nil is returned.")
2393 Lisp_Object from
, count
;
2395 CHECK_NUMBER (from
, 0);
2396 CHECK_NUMBER (count
, 1);
2398 return scan_lists (XINT (from
), XINT (count
), 0, 1);
2401 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
2403 "Move point backward over any number of chars with prefix syntax.\n\
2404 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2409 int opoint_byte
= PT_BYTE
;
2411 int pos_byte
= PT_BYTE
;
2416 SET_PT_BOTH (opoint
, opoint_byte
);
2421 SETUP_SYNTAX_TABLE (pos
, -1);
2423 DEC_BOTH (pos
, pos_byte
);
2425 while (!char_quoted (pos
, pos_byte
)
2426 /* Previous statement updates syntax table. */
2427 && ((c
= FETCH_CHAR (pos_byte
), SYNTAX (c
) == Squote
)
2428 || SYNTAX_PREFIX (c
)))
2431 opoint_byte
= pos_byte
;
2434 DEC_BOTH (pos
, pos_byte
);
2437 SET_PT_BOTH (opoint
, opoint_byte
);
2442 /* Parse forward from FROM / FROM_BYTE to END,
2443 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2444 and return a description of the state of the parse at END.
2445 If STOPBEFORE is nonzero, stop at the start of an atom.
2446 If COMMENTSTOP is 1, stop at the start of a comment.
2447 If COMMENTSTOP is -1, stop at the start or end of a comment,
2448 after the beginning of a string, or after the end of a string. */
2451 scan_sexps_forward (stateptr
, from
, from_byte
, end
, targetdepth
,
2452 stopbefore
, oldstate
, commentstop
)
2453 struct lisp_parse_state
*stateptr
;
2455 int end
, targetdepth
, stopbefore
;
2456 Lisp_Object oldstate
;
2459 struct lisp_parse_state state
;
2461 register enum syntaxcode code
;
2464 struct level
{ int last
, prev
; };
2465 struct level levelstart
[100];
2466 register struct level
*curlevel
= levelstart
;
2467 struct level
*endlevel
= levelstart
+ 100;
2468 register int depth
; /* Paren depth of current scanning location.
2469 level - levelstart equals this except
2470 when the depth becomes negative. */
2471 int mindepth
; /* Lowest DEPTH value seen. */
2472 int start_quoted
= 0; /* Nonzero means starting after a char quote */
2474 int prev_from
; /* Keep one character before FROM. */
2476 int prev_from_syntax
;
2477 int boundary_stop
= commentstop
== -1;
2480 int out_bytepos
, out_charpos
;
2484 prev_from_byte
= from_byte
;
2486 DEC_BOTH (prev_from
, prev_from_byte
);
2488 /* Use this macro instead of `from++'. */
2490 do { prev_from = from; \
2491 prev_from_byte = from_byte; \
2493 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2494 INC_BOTH (from, from_byte); \
2495 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2501 if (NILP (oldstate
))
2504 state
.instring
= -1;
2505 state
.incomment
= 0;
2506 state
.comstyle
= 0; /* comment style a by default. */
2507 state
.comstr_start
= -1; /* no comment/string seen. */
2511 tem
= Fcar (oldstate
);
2517 oldstate
= Fcdr (oldstate
);
2518 oldstate
= Fcdr (oldstate
);
2519 oldstate
= Fcdr (oldstate
);
2520 tem
= Fcar (oldstate
);
2521 /* Check whether we are inside string_fence-style string: */
2522 state
.instring
= (!NILP (tem
)
2523 ? (INTEGERP (tem
) ? XINT (tem
) : ST_STRING_STYLE
)
2526 oldstate
= Fcdr (oldstate
);
2527 tem
= Fcar (oldstate
);
2528 state
.incomment
= (!NILP (tem
)
2529 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
2532 oldstate
= Fcdr (oldstate
);
2533 tem
= Fcar (oldstate
);
2534 start_quoted
= !NILP (tem
);
2536 /* if the eighth element of the list is nil, we are in comment
2537 style a. If it is non-nil, we are in comment style b */
2538 oldstate
= Fcdr (oldstate
);
2539 oldstate
= Fcdr (oldstate
);
2540 tem
= Fcar (oldstate
);
2541 state
.comstyle
= NILP (tem
) ? 0 : (EQ (tem
, Qsyntax_table
)
2542 ? ST_COMMENT_STYLE
: 1);
2544 oldstate
= Fcdr (oldstate
);
2545 tem
= Fcar (oldstate
);
2546 state
.comstr_start
= NILP (tem
) ? -1 : XINT (tem
) ;
2547 oldstate
= Fcdr (oldstate
);
2548 tem
= Fcar (oldstate
);
2549 while (!NILP (tem
)) /* >= second enclosing sexps. */
2551 /* curlevel++->last ran into compiler bug on Apollo */
2552 curlevel
->last
= XINT (Fcar (tem
));
2553 if (++curlevel
== endlevel
)
2554 curlevel
--; /* error ("Nesting too deep for parser"); */
2555 curlevel
->prev
= -1;
2556 curlevel
->last
= -1;
2563 curlevel
->prev
= -1;
2564 curlevel
->last
= -1;
2566 SETUP_SYNTAX_TABLE (prev_from
, 1);
2567 prev_from_syntax
= SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte
));
2568 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2570 /* Enter the loop at a place appropriate for initial state. */
2572 if (state
.incomment
)
2573 goto startincomment
;
2574 if (state
.instring
>= 0)
2576 nofence
= state
.instring
!= ST_STRING_STYLE
;
2578 goto startquotedinstring
;
2581 else if (start_quoted
)
2584 #if 0 /* This seems to be redundant with the identical code above. */
2585 SETUP_SYNTAX_TABLE (prev_from
, 1);
2586 prev_from_syntax
= SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte
));
2587 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2593 code
= prev_from_syntax
& 0xff;
2595 if (code
== Scomment
)
2597 state
.comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
);
2598 state
.incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
2600 state
.comstr_start
= prev_from
;
2602 else if (code
== Scomment_fence
)
2604 /* Record the comment style we have entered so that only
2605 the comment-end sequence of the same style actually
2606 terminates the comment section. */
2607 state
.comstyle
= ST_COMMENT_STYLE
;
2608 state
.incomment
= -1;
2609 state
.comstr_start
= prev_from
;
2612 else if (from
< end
)
2613 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
))
2614 if (c1
= FETCH_CHAR (from_byte
),
2615 SYNTAX_COMSTART_SECOND (c1
))
2616 /* Duplicate code to avoid a complex if-expression
2617 which causes trouble for the SGI compiler. */
2619 /* Record the comment style we have entered so that only
2620 the comment-end sequence of the same style actually
2621 terminates the comment section. */
2622 state
.comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte
));
2623 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
);
2624 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2625 state
.incomment
= comnested
? 1 : -1;
2626 state
.comstr_start
= prev_from
;
2631 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
2633 switch (SWITCH_ENUM_CAST (code
))
2637 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2638 curlevel
->last
= prev_from
;
2640 if (from
== end
) goto endquoted
;
2643 /* treat following character as a word constituent */
2646 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2647 curlevel
->last
= prev_from
;
2651 /* Some compilers can't handle this inside the switch. */
2652 temp
= SYNTAX (FETCH_CHAR (from_byte
));
2658 if (from
== end
) goto endquoted
;
2670 curlevel
->prev
= curlevel
->last
;
2674 if (commentstop
|| boundary_stop
) goto done
;
2676 /* The (from == BEGV) test was to enter the loop in the middle so
2677 that we find a 2-char comment ender even if we start in the
2678 middle of it. We don't want to do that if we're just at the
2679 beginning of the comment (think of (*) ... (*)). */
2680 found
= forw_comment (from
, from_byte
, end
,
2681 state
.incomment
, state
.comstyle
,
2682 (from
== BEGV
|| from
< state
.comstr_start
+ 3)
2683 ? 0 : prev_from_syntax
,
2684 &out_charpos
, &out_bytepos
, &state
.incomment
);
2685 from
= out_charpos
; from_byte
= out_bytepos
;
2686 /* Beware! prev_from and friends are invalid now.
2687 Luckily, the `done' doesn't use them and the INC_FROM
2688 sets them to a sane value without looking at them. */
2689 if (!found
) goto done
;
2691 state
.incomment
= 0;
2692 state
.comstyle
= 0; /* reset the comment style */
2693 if (boundary_stop
) goto done
;
2697 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2699 /* curlevel++->last ran into compiler bug on Apollo */
2700 curlevel
->last
= prev_from
;
2701 if (++curlevel
== endlevel
)
2702 curlevel
--; /* error ("Nesting too deep for parser"); */
2703 curlevel
->prev
= -1;
2704 curlevel
->last
= -1;
2705 if (targetdepth
== depth
) goto done
;
2710 if (depth
< mindepth
)
2712 if (curlevel
!= levelstart
)
2714 curlevel
->prev
= curlevel
->last
;
2715 if (targetdepth
== depth
) goto done
;
2720 state
.comstr_start
= from
- 1;
2721 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2722 curlevel
->last
= prev_from
;
2723 state
.instring
= (code
== Sstring
2724 ? (FETCH_CHAR (prev_from_byte
))
2726 if (boundary_stop
) goto done
;
2729 nofence
= state
.instring
!= ST_STRING_STYLE
;
2735 if (from
>= end
) goto done
;
2736 c
= FETCH_CHAR (from_byte
);
2737 /* Some compilers can't handle this inside the switch. */
2740 /* Check TEMP here so that if the char has
2741 a syntax-table property which says it is NOT
2742 a string character, it does not end the string. */
2743 if (nofence
&& c
== state
.instring
&& temp
== Sstring
)
2749 if (!nofence
) goto string_end
;
2754 startquotedinstring
:
2755 if (from
>= end
) goto endquoted
;
2761 state
.instring
= -1;
2762 curlevel
->prev
= curlevel
->last
;
2764 if (boundary_stop
) goto done
;
2773 stop
: /* Here if stopping before start of sexp. */
2774 from
= prev_from
; /* We have just fetched the char that starts it; */
2775 goto done
; /* but return the position before it. */
2780 state
.depth
= depth
;
2781 state
.mindepth
= mindepth
;
2782 state
.thislevelstart
= curlevel
->prev
;
2783 state
.prevlevelstart
2784 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
2785 state
.location
= from
;
2786 state
.levelstarts
= Qnil
;
2787 while (--curlevel
>= levelstart
)
2788 state
.levelstarts
= Fcons (make_number (curlevel
->last
),
2795 /* This comment supplies the doc string for parse-partial-sexp,
2796 for make-docfile to see. We cannot put this in the real DEFUN
2797 due to limits in the Unix cpp.
2799 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2800 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2801 Parsing stops at TO or when certain criteria are met;\n\
2802 point is set to where parsing stops.\n\
2803 If fifth arg STATE is omitted or nil,\n\
2804 parsing assumes that FROM is the beginning of a function.\n\
2805 Value is a list of ten elements describing final state of parsing:\n\
2806 0. depth in parens.\n\
2807 1. character address of start of innermost containing list; nil if none.\n\
2808 2. character address of start of last complete sexp terminated.\n\
2809 3. non-nil if inside a string.\n\
2810 (it is the character that will terminate the string,\n\
2811 or t if the string should be terminated by a generic string delimiter.)\n\
2812 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2813 else an integer (the current comment nesting).\n\
2814 5. t if following a quote character.\n\
2815 6. the minimum paren-depth encountered during this scan.\n\
2816 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
2817 should be terminated by a generic comment delimiter.\n\
2818 8. character address of start of comment or string; nil if not in one.\n\
2819 9. Intermediate data for continuation of parsing (subject to change).\n\
2820 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2821 in parentheses becomes equal to TARGETDEPTH.\n\
2822 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2823 any character that starts a sexp.\n\
2824 Fifth arg STATE is a nine-element list like what this function returns.\n\
2825 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2826 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2827 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2828 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2829 string, or after end of a comment or a string.")
2830 (from, to, targetdepth, stopbefore, state, commentstop)
2833 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
2834 0 /* See immediately above */)
2835 (from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
)
2836 Lisp_Object from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
;
2838 struct lisp_parse_state state
;
2841 if (!NILP (targetdepth
))
2843 CHECK_NUMBER (targetdepth
, 3);
2844 target
= XINT (targetdepth
);
2847 target
= -100000; /* We won't reach this depth */
2849 validate_region (&from
, &to
);
2850 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
2852 target
, !NILP (stopbefore
), oldstate
,
2854 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
2856 SET_PT (state
.location
);
2858 return Fcons (make_number (state
.depth
),
2859 Fcons (state
.prevlevelstart
< 0 ? Qnil
: make_number (state
.prevlevelstart
),
2860 Fcons (state
.thislevelstart
< 0 ? Qnil
: make_number (state
.thislevelstart
),
2861 Fcons (state
.instring
>= 0
2862 ? (state
.instring
== ST_STRING_STYLE
2863 ? Qt
: make_number (state
.instring
)) : Qnil
,
2864 Fcons (state
.incomment
< 0 ? Qt
:
2865 (state
.incomment
== 0 ? Qnil
:
2866 make_number (state
.incomment
)),
2867 Fcons (state
.quoted
? Qt
: Qnil
,
2868 Fcons (make_number (state
.mindepth
),
2869 Fcons ((state
.comstyle
2870 ? (state
.comstyle
== ST_COMMENT_STYLE
2871 ? Qsyntax_table
: Qt
) :
2873 Fcons (((state
.incomment
2874 || (state
.instring
>= 0))
2875 ? make_number (state
.comstr_start
)
2877 Fcons (state
.levelstarts
, Qnil
))))))))));
2886 /* This has to be done here, before we call Fmake_char_table. */
2887 Qsyntax_table
= intern ("syntax-table");
2888 staticpro (&Qsyntax_table
);
2890 /* Intern this now in case it isn't already done.
2891 Setting this variable twice is harmless.
2892 But don't staticpro it here--that is done in alloc.c. */
2893 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
2895 /* Create objects which can be shared among syntax tables. */
2896 Vsyntax_code_object
= Fmake_vector (make_number (13), Qnil
);
2897 for (i
= 0; i
< XVECTOR (Vsyntax_code_object
)->size
; i
++)
2898 XVECTOR (Vsyntax_code_object
)->contents
[i
]
2899 = Fcons (make_number (i
), Qnil
);
2901 /* Now we are ready to set up this property, so we can
2902 create syntax tables. */
2903 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
2905 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Swhitespace
];
2907 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
2909 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Sword
];
2910 for (i
= 'a'; i
<= 'z'; i
++)
2911 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
2912 for (i
= 'A'; i
<= 'Z'; i
++)
2913 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
2914 for (i
= '0'; i
<= '9'; i
++)
2915 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
2917 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
2918 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
2920 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
2921 Fcons (make_number (Sopen
), make_number (')')));
2922 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
2923 Fcons (make_number (Sclose
), make_number ('(')));
2924 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
2925 Fcons (make_number (Sopen
), make_number (']')));
2926 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
2927 Fcons (make_number (Sclose
), make_number ('[')));
2928 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
2929 Fcons (make_number (Sopen
), make_number ('}')));
2930 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
2931 Fcons (make_number (Sclose
), make_number ('{')));
2932 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
2933 Fcons (make_number ((int) Sstring
), Qnil
));
2934 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
2935 Fcons (make_number ((int) Sescape
), Qnil
));
2937 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Ssymbol
];
2938 for (i
= 0; i
< 10; i
++)
2940 c
= "_-+*/&|<>="[i
];
2941 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
2944 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Spunct
];
2945 for (i
= 0; i
< 12; i
++)
2947 c
= ".,;:?!#@~^'`"[i
];
2948 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
2951 /* All multibyte characters have syntax `word' by default. */
2952 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Sword
];
2953 for (i
= CHAR_TABLE_SINGLE_BYTE_SLOTS
; i
< CHAR_TABLE_ORDINARY_SLOTS
; i
++)
2954 XCHAR_TABLE (Vstandard_syntax_table
)->contents
[i
] = temp
;
2960 Qsyntax_table_p
= intern ("syntax-table-p");
2961 staticpro (&Qsyntax_table_p
);
2963 staticpro (&Vsyntax_code_object
);
2965 Qscan_error
= intern ("scan-error");
2966 staticpro (&Qscan_error
);
2967 Fput (Qscan_error
, Qerror_conditions
,
2968 Fcons (Qscan_error
, Fcons (Qerror
, Qnil
)));
2969 Fput (Qscan_error
, Qerror_message
,
2970 build_string ("Scan error"));
2972 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments
,
2973 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2975 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties
,
2976 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2977 The value of this property should be either a syntax table, or a cons\n\
2978 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2979 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2980 relevant only for open/close type.");
2982 words_include_escapes
= 0;
2983 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes
,
2984 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2986 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol
,
2987 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
2988 multibyte_syntax_as_symbol
= 0;
2990 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
2991 &open_paren_in_column_0_is_defun_start
,
2992 "Non-nil means an open paren in column 0 denotes the start of a defun.");
2993 open_paren_in_column_0_is_defun_start
= 1;
2995 defsubr (&Ssyntax_table_p
);
2996 defsubr (&Ssyntax_table
);
2997 defsubr (&Sstandard_syntax_table
);
2998 defsubr (&Scopy_syntax_table
);
2999 defsubr (&Sset_syntax_table
);
3000 defsubr (&Schar_syntax
);
3001 defsubr (&Smatching_paren
);
3002 defsubr (&Smodify_syntax_entry
);
3003 defsubr (&Sdescribe_syntax
);
3005 defsubr (&Sforward_word
);
3007 defsubr (&Sskip_chars_forward
);
3008 defsubr (&Sskip_chars_backward
);
3009 defsubr (&Sskip_syntax_forward
);
3010 defsubr (&Sskip_syntax_backward
);
3012 defsubr (&Sforward_comment
);
3013 defsubr (&Sscan_lists
);
3014 defsubr (&Sscan_sexps
);
3015 defsubr (&Sbackward_prefix_chars
);
3016 defsubr (&Sparse_partial_sexp
);