1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
29 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY
33 #include "intervals.h"
35 /* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
39 #define ST_COMMENT_STYLE (256 + 1)
40 #define ST_STRING_STYLE (256 + 2)
43 Lisp_Object Qsyntax_table_p
, Qsyntax_table
, Qscan_error
;
45 int words_include_escapes
;
46 int parse_sexp_lookup_properties
;
48 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49 int multibyte_syntax_as_symbol
;
51 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54 Lisp_Object syntax_temp
;
56 /* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
60 int open_paren_in_column_0_is_defun_start
;
62 /* This is the internal form of the parse state used in parse-partial-sexp. */
64 struct lisp_parse_state
66 int depth
; /* Depth at end of parsing. */
67 int instring
; /* -1 if not within string, else desired terminator. */
68 int incomment
; /* -1 if in unnestable comment else comment nesting */
69 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
70 int quoted
; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart
; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart
; /* Char number of start of containing expression */
73 int location
; /* Char number at which parsing stopped. */
74 int mindepth
; /* Minimum depth seen while scanning. */
75 int comstr_start
; /* Position just after last comment/string starter. */
76 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
80 /* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
83 find_start_value_byte is the corresponding byte position.
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
88 static int find_start_pos
;
89 static int find_start_value
;
90 static int find_start_value_byte
;
91 static struct buffer
*find_start_buffer
;
92 static int find_start_begv
;
93 static int find_start_modiff
;
96 static int find_defun_start
P_ ((int, int));
97 static int back_comment
P_ ((int, int, int, int, int, int *, int *));
98 static int char_quoted
P_ ((int, int));
99 static Lisp_Object skip_chars
P_ ((int, int, Lisp_Object
, Lisp_Object
));
100 static Lisp_Object scan_lists
P_ ((int, int, int, int));
101 static void scan_sexps_forward
P_ ((struct lisp_parse_state
*,
103 int, Lisp_Object
, int));
106 struct gl_state_s gl_state
; /* Global state of syntax parser. */
108 INTERVAL
interval_of ();
109 #define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
112 /* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
114 valid interval. If INIT, only [be]_property fields of gl_state are
115 valid at start, the rest is filled basing on OBJECT.
117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
120 update the interval to start further than CHARPOS - or be
121 NULL_INTERVAL. We also update lim_property to be the next value of
122 charpos to call this subroutine again - or be before/after the
123 start/end of OBJECT. */
126 update_syntax_table (charpos
, count
, init
, object
)
127 int charpos
, count
, init
;
130 Lisp_Object tmp_table
;
131 int cnt
= 0, invalidate
= 1;
136 gl_state
.start
= gl_state
.b_property
;
137 gl_state
.stop
= gl_state
.e_property
;
138 gl_state
.forward_i
= interval_of (charpos
, object
);
139 i
= gl_state
.backward_i
= gl_state
.forward_i
;
140 gl_state
.left_ok
= gl_state
.right_ok
= 1;
142 if (NULL_INTERVAL_P (i
))
144 /* interval_of updates only ->position of the return value, so
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i
))
148 if (AM_RIGHT_CHILD (i
))
149 INTERVAL_PARENT (i
)->position
= i
->position
150 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
151 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
154 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
156 i
= INTERVAL_PARENT (i
);
158 i
= gl_state
.forward_i
;
159 gl_state
.b_property
= i
->position
- 1 - gl_state
.offset
;
160 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
163 oldi
= i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
165 /* We are guarantied to be called with CHARPOS either in i,
167 if (NULL_INTERVAL_P (i
))
168 error ("Error in syntax_table logic for to-the-end intervals");
169 else if (charpos
< i
->position
) /* Move left. */
172 error ("Error in syntax_table logic for intervals <-");
173 /* Update the interval. */
174 i
= update_interval (i
, charpos
);
175 if (oldi
->position
!= INTERVAL_LAST_POS (i
))
178 gl_state
.right_ok
= 1; /* Invalidate the other end. */
179 gl_state
.forward_i
= i
;
180 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
183 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
186 error ("Error in syntax_table logic for intervals ->");
187 /* Update the interval. */
188 i
= update_interval (i
, charpos
);
189 if (i
->position
!= INTERVAL_LAST_POS (oldi
))
192 gl_state
.left_ok
= 1; /* Invalidate the other end. */
193 gl_state
.backward_i
= i
;
194 gl_state
.b_property
= i
->position
- 1 - gl_state
.offset
;
197 else if (count
> 0 ? gl_state
.right_ok
: gl_state
.left_ok
)
199 /* We do not need to recalculate tmp_table. */
200 tmp_table
= gl_state
.old_prop
;
204 tmp_table
= textget (i
->plist
, Qsyntax_table
);
207 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
209 if (invalidate
) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
214 gl_state
.backward_i
= i
;
215 gl_state
.left_ok
= 1; /* Invalidate the other end. */
216 gl_state
.b_property
= i
->position
- 1 - gl_state
.offset
;
220 gl_state
.forward_i
= i
;
221 gl_state
.right_ok
= 1; /* Invalidate the other end. */
222 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
226 gl_state
.current_syntax_table
= tmp_table
;
227 gl_state
.old_prop
= tmp_table
;
228 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
230 gl_state
.use_global
= 0;
232 else if (CONSP (tmp_table
))
234 gl_state
.use_global
= 1;
235 gl_state
.global_code
= tmp_table
;
239 gl_state
.use_global
= 0;
240 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
243 while (!NULL_INTERVAL_P (i
))
245 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
248 gl_state
.right_ok
= 0;
250 gl_state
.left_ok
= 0;
253 else if (cnt
== INTERVALS_AT_ONCE
)
256 gl_state
.right_ok
= 1;
258 gl_state
.left_ok
= 1;
262 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
264 if (NULL_INTERVAL_P (i
))
265 { /* This property goes to the end. */
267 gl_state
.e_property
= gl_state
.stop
;
269 gl_state
.b_property
= gl_state
.start
;
275 gl_state
.e_property
= i
->position
- gl_state
.offset
;
276 gl_state
.forward_i
= i
;
280 gl_state
.b_property
= i
->position
+ LENGTH (i
) - 1 - gl_state
.offset
;
281 gl_state
.backward_i
= i
;
286 /* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
291 char_quoted (charpos
, bytepos
)
292 register int charpos
, bytepos
;
294 register enum syntaxcode code
;
295 register int beg
= BEGV
;
296 register int quoted
= 0;
299 DEC_BOTH (charpos
, bytepos
);
301 while (bytepos
>= beg
)
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
304 code
= SYNTAX (FETCH_CHAR (bytepos
));
305 if (! (code
== Scharquote
|| code
== Sescape
))
308 DEC_BOTH (charpos
, bytepos
);
312 UPDATE_SYNTAX_TABLE (orig
);
316 /* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
320 inc_bytepos (bytepos
)
323 if (NILP (current_buffer
->enable_multibyte_characters
))
330 /* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
334 dec_bytepos (bytepos
)
337 if (NILP (current_buffer
->enable_multibyte_characters
))
344 /* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
346 can return the same value right away.
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
353 find_defun_start (pos
, pos_byte
)
356 int opoint
= PT
, opoint_byte
= PT_BYTE
;
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer
== find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos
<= find_start_pos
+ 1000
364 && pos
>= find_start_value
365 && BEGV
== find_start_begv
366 && MODIFF
== find_start_modiff
)
367 return find_start_value
;
369 /* Back up to start of line. */
370 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
375 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
376 gl_state
.use_global
= 0;
377 if (open_paren_in_column_0_is_defun_start
)
381 /* Open-paren at start of line means we may have found our
383 if (SYNTAX (FETCH_CHAR (PT_BYTE
)) == Sopen
)
385 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE
)) == Sopen
)
388 /* Now fallback to the default value. */
389 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
390 gl_state
.use_global
= 0;
392 /* Move to beg of previous line. */
393 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
397 /* Record what we found, for the next try. */
398 find_start_value
= PT
;
399 find_start_value_byte
= PT_BYTE
;
400 find_start_buffer
= current_buffer
;
401 find_start_modiff
= MODIFF
;
402 find_start_begv
= BEGV
;
403 find_start_pos
= pos
;
405 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
407 return find_start_value
;
410 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
413 prev_char_comend_first (pos
, pos_byte
)
418 DEC_BOTH (pos
, pos_byte
);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
420 c
= FETCH_CHAR (pos_byte
);
421 val
= SYNTAX_COMEND_FIRST (c
);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
426 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
429 prev_char_comstart_first (pos
, pos_byte
)
434 DEC_BOTH (pos
, pos_byte
);
435 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
436 c
= FETCH_CHAR (pos_byte
);
437 val
= SYNTAX_COMSTART_FIRST (c
);
438 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
442 /* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
456 back_comment (from
, from_byte
, stop
, comnested
, comstyle
, charpos_ptr
, bytepos_ptr
)
457 int from
, from_byte
, stop
;
458 int comnested
, comstyle
;
459 int *charpos_ptr
, *bytepos_ptr
;
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
471 int my_stringend
= 0;
472 int string_lossage
= 0;
473 int comment_end
= from
;
474 int comment_end_byte
= from_byte
;
475 int comstart_pos
= 0;
477 /* Value that PARITY had, when we reached the position
479 int comstart_parity
= 0;
480 int scanstart
= from
- 1;
481 /* Place where the containing defun starts,
482 or 0 if we didn't come across it yet. */
484 int defun_start_byte
= 0;
485 register enum syntaxcode code
;
486 int nesting
= 1; /* current comment nesting */
489 /* At beginning of range to scan, we're outside of strings;
490 that determines quote parity to the comment-end. */
495 /* Move back and examine a character. */
496 DEC_BOTH (from
, from_byte
);
497 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
499 c
= FETCH_CHAR (from_byte
);
502 /* If this char is the second of a 2-char comment end sequence,
503 back up and give the pair the appropriate syntax. */
504 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
505 && prev_char_comend_first (from
, from_byte
))
508 DEC_BOTH (from
, from_byte
);
509 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
510 c
= FETCH_CHAR (from_byte
);
513 /* If this char starts a 2-char comment start sequence,
514 treat it like a 1-char comment starter. */
515 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
))
517 temp_byte
= inc_bytepos (from_byte
);
518 UPDATE_SYNTAX_TABLE_FORWARD (from
+ 1);
519 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte
))
520 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte
)))
522 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
524 else if (code
== Scomment
&& comstyle
!= SYNTAX_COMMENT_STYLE (c
))
525 /* Ignore comment starters of a different style. */
528 /* Ignore escaped characters, except comment-enders. */
529 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
532 /* Track parity of quotes. */
536 if (my_stringend
== 0)
538 /* If we have two kinds of string delimiters.
539 There's no way to grok this scanning backwards. */
540 else if (my_stringend
!= c
)
544 if (code
== Sstring_fence
|| code
== Scomment_fence
)
547 if (my_stringend
== 0)
549 = code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
;
550 /* If we have two kinds of string delimiters.
551 There's no way to grok this scanning backwards. */
552 else if (my_stringend
!= (code
== Sstring_fence
553 ? ST_STRING_STYLE
: ST_COMMENT_STYLE
))
557 if (code
== Scomment
)
558 /* We've already checked that it is the relevant comstyle. */
560 if (comnested
&& --nesting
<= 0 && parity
== 0 && !string_lossage
)
561 /* nested comments have to be balanced, so we don't need to
562 keep looking for earlier ones. We use here the same (slightly
563 incorrect) reasoning as below: since it is followed by uniform
564 paired string quotes, this comment-start has to be outside of
565 strings, else the comment-end itself would be inside a string. */
568 /* Record comment-starters according to that
569 quote-parity to the comment-end. */
570 comstart_parity
= parity
;
572 comstart_byte
= from_byte
;
575 /* If we find another earlier comment-ender,
576 any comment-starts earlier than that don't count
577 (because they go with the earlier comment-ender). */
578 if (code
== Sendcomment
579 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte
)) == comstyle
)
587 /* Assume a defun-start point is outside of strings. */
590 || (temp_byte
= dec_bytepos (from_byte
),
591 FETCH_CHAR (temp_byte
) == '\n')))
594 defun_start_byte
= from_byte
;
599 if (comstart_pos
== 0)
602 from_byte
= comment_end_byte
;
603 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
- 1);
605 /* If the earliest comment starter
606 is followed by uniform paired string quotes or none,
607 we know it can't be inside a string
608 since if it were then the comment ender would be inside one.
609 So it does start a comment. Skip back to it. */
610 else if (!comnested
&& comstart_parity
== 0 && !string_lossage
)
613 from_byte
= comstart_byte
;
614 /* Globals are correct now. */
618 /* We had two kinds of string delimiters mixed up
619 together. Decode this going forwards.
620 Scan fwd from the previous comment ender
621 to the one in question; this records where we
622 last passed a comment starter. */
623 struct lisp_parse_state state
;
624 /* If we did not already find the defun start, find it now. */
625 if (defun_start
== 0)
627 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
628 defun_start_byte
= find_start_value_byte
;
630 scan_sexps_forward (&state
,
631 defun_start
, defun_start_byte
,
632 comment_end
- 1, -10000, 0, Qnil
, 0);
635 /* scan_sexps_forward changed the direction of search in
636 global variables, so we need to update it completely. */
638 from
= state
.comstr_start
;
644 from_byte
= CHAR_TO_BYTE (from
);
645 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
650 *bytepos_ptr
= from_byte
;
652 return (from
== comment_end
) ? -1 : from
;
655 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
656 "Return t if OBJECT is a syntax table.\n\
657 Currently, any char-table counts as a syntax table.")
661 if (CHAR_TABLE_P (object
)
662 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
668 check_syntax_table (obj
)
671 if (!(CHAR_TABLE_P (obj
)
672 && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
)))
673 wrong_type_argument (Qsyntax_table_p
, obj
);
676 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
677 "Return the current syntax table.\n\
678 This is the one specified by the current buffer.")
681 return current_buffer
->syntax_table
;
684 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
685 Sstandard_syntax_table
, 0, 0, 0,
686 "Return the standard syntax table.\n\
687 This is the one used for new buffers.")
690 return Vstandard_syntax_table
;
693 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
694 "Construct a new syntax table and return it.\n\
695 It is a copy of the TABLE, which defaults to the standard syntax table.")
702 check_syntax_table (table
);
704 table
= Vstandard_syntax_table
;
706 copy
= Fcopy_sequence (table
);
708 /* Only the standard syntax table should have a default element.
709 Other syntax tables should inherit from parents instead. */
710 XCHAR_TABLE (copy
)->defalt
= Qnil
;
712 /* Copied syntax tables should all have parents.
713 If we copied one with no parent, such as the standard syntax table,
714 use the standard syntax table as the copy's parent. */
715 if (NILP (XCHAR_TABLE (copy
)->parent
))
716 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
720 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
721 "Select a new syntax table for the current buffer.\n\
722 One argument, a syntax table.")
727 check_syntax_table (table
);
728 current_buffer
->syntax_table
= table
;
729 /* Indicate that this buffer now has a specified syntax table. */
730 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
731 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
735 /* Convert a letter which signifies a syntax code
736 into the code it signifies.
737 This is used by modify-syntax-entry, and other things. */
739 unsigned char syntax_spec_code
[0400] =
740 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
741 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
742 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
743 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
744 (char) Swhitespace
, (char) Scomment_fence
, (char) Sstring
, 0377,
745 (char) Smath
, 0377, 0377, (char) Squote
,
746 (char) Sopen
, (char) Sclose
, 0377, 0377,
747 0377, (char) Swhitespace
, (char) Spunct
, (char) Scharquote
,
748 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
749 0377, 0377, 0377, 0377,
750 (char) Scomment
, 0377, (char) Sendcomment
, 0377,
751 (char) Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
752 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
753 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
754 0377, 0377, 0377, 0377, (char) Sescape
, 0377, 0377, (char) Ssymbol
,
755 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
756 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
757 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
758 0377, 0377, 0377, 0377, (char) Sstring_fence
, 0377, 0377, 0377
761 /* Indexed by syntax code, give the letter that describes it. */
763 char syntax_code_spec
[16] =
765 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
769 /* Indexed by syntax code, give the object (cons of syntax code and
770 nil) to be stored in syntax table. Since these objects can be
771 shared among syntax tables, we generate them in advance. By
772 sharing objects, the function `describe-syntax' can give a more
774 static Lisp_Object Vsyntax_code_object
;
777 /* Look up the value for CHARACTER in syntax table TABLE's parent
778 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
779 for CHARACTER. It's actually used only when not compiled with GCC. */
782 syntax_parent_lookup (table
, character
)
790 table
= XCHAR_TABLE (table
)->parent
;
794 value
= XCHAR_TABLE (table
)->contents
[character
];
800 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
801 "Return the syntax code of CHARACTER, described by a character.\n\
802 For example, if CHARACTER is a word constituent,\n\
803 the character `w' is returned.\n\
804 The characters that correspond to various syntax codes\n\
805 are listed in the documentation of `modify-syntax-entry'.")
807 Lisp_Object character
;
810 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
812 gl_state
.use_global
= 0;
813 CHECK_NUMBER (character
, 0);
814 char_int
= XINT (character
);
815 return make_number (syntax_code_spec
[(int) SYNTAX (char_int
)]);
818 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
819 "Return the matching parenthesis of CHARACTER, or nil if none.")
821 Lisp_Object character
;
824 gl_state
.current_syntax_table
= current_buffer
->syntax_table
;
825 gl_state
.use_global
= 0;
826 CHECK_NUMBER (character
, 0);
827 char_int
= XINT (character
);
828 code
= SYNTAX (char_int
);
829 if (code
== Sopen
|| code
== Sclose
)
830 return SYNTAX_MATCH (char_int
);
834 /* This comment supplies the doc string for modify-syntax-entry,
835 for make-docfile to see. We cannot put this in the real DEFUN
836 due to limits in the Unix cpp.
838 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
839 "Set syntax for character CHAR according to string S.\n\
840 The syntax is changed only for table TABLE, which defaults to\n\
841 the current buffer's syntax table.\n\
842 The first character of S should be one of the following:\n\
843 Space or - whitespace syntax. w word constituent.\n\
844 _ symbol constituent. . punctuation.\n\
845 ( open-parenthesis. ) close-parenthesis.\n\
846 \" string quote. \\ escape.\n\
847 $ paired delimiter. ' expression quote or prefix operator.\n\
848 < comment starter. > comment ender.\n\
849 / character-quote. @ inherit from `standard-syntax-table'.\n\
851 Only single-character comment start and end sequences are represented thus.\n\
852 Two-character sequences are represented as described below.\n\
853 The second character of S is the matching parenthesis,\n\
854 used only if the first character is `(' or `)'.\n\
855 Any additional characters are flags.\n\
856 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
857 1 means CHAR is the start of a two-char comment start sequence.\n\
858 2 means CHAR is the second character of such a sequence.\n\
859 3 means CHAR is the start of a two-char comment end sequence.\n\
860 4 means CHAR is the second character of such a sequence.\n\
862 There can be up to two orthogonal comment sequences. This is to support\n\
863 language modes such as C++. By default, all comment sequences are of style\n\
864 a, but you can set the comment sequence style to b (on the second character\n\
865 of a comment-start, or the first character of a comment-end sequence) using\n\
867 b means CHAR is part of comment sequence b.\n\
868 n means CHAR is part of a nestable comment sequence.\n\
870 p means CHAR is a prefix character for `backward-prefix-chars';\n\
871 such characters are treated as whitespace when they occur\n\
872 between expressions.")
876 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
877 /* I really don't know why this is interactive
878 help-form should at least be made useful whilst reading the second arg
880 "cSet syntax for character: \nsSet syntax for %s to: ",
881 0 /* See immediately above */)
882 (c
, newentry
, syntax_table
)
883 Lisp_Object c
, newentry
, syntax_table
;
885 register unsigned char *p
;
886 register enum syntaxcode code
;
891 CHECK_STRING (newentry
, 1);
893 if (NILP (syntax_table
))
894 syntax_table
= current_buffer
->syntax_table
;
896 check_syntax_table (syntax_table
);
898 p
= XSTRING (newentry
)->data
;
899 code
= (enum syntaxcode
) syntax_spec_code
[*p
++];
900 if (((int) code
& 0377) == 0377)
901 error ("invalid syntax description letter: %c", p
[-1]);
903 if (code
== Sinherit
)
905 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), Qnil
);
912 int character
= (STRING_CHAR_AND_LENGTH
913 (p
, STRING_BYTES (XSTRING (newentry
)) - 1, len
));
914 XSETINT (match
, character
);
915 if (XFASTINT (match
) == ' ')
955 if (val
< XVECTOR (Vsyntax_code_object
)->size
&& NILP (match
))
956 newentry
= XVECTOR (Vsyntax_code_object
)->contents
[val
];
958 /* Since we can't use a shared object, let's make a new one. */
959 newentry
= Fcons (make_number (val
), match
);
961 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
966 /* Dump syntax table to buffer in human-readable format */
969 describe_syntax (value
)
972 register enum syntaxcode code
;
973 char desc
, start1
, start2
, end1
, end2
, prefix
, comstyle
;
975 Lisp_Object first
, match_lisp
;
977 Findent_to (make_number (16), make_number (1));
981 insert_string ("default\n");
985 if (CHAR_TABLE_P (value
))
987 insert_string ("deeper char-table ...\n");
993 insert_string ("invalid\n");
997 first
= XCAR (value
);
998 match_lisp
= XCDR (value
);
1000 if (!INTEGERP (first
) || !(NILP (match_lisp
) || INTEGERP (match_lisp
)))
1002 insert_string ("invalid\n");
1006 code
= (enum syntaxcode
) (XINT (first
) & 0377);
1007 start1
= (XINT (first
) >> 16) & 1;
1008 start2
= (XINT (first
) >> 17) & 1;
1009 end1
= (XINT (first
) >> 18) & 1;
1010 end2
= (XINT (first
) >> 19) & 1;
1011 prefix
= (XINT (first
) >> 20) & 1;
1012 comstyle
= (XINT (first
) >> 21) & 1;
1014 if ((int) code
< 0 || (int) code
>= (int) Smax
)
1016 insert_string ("invalid");
1019 desc
= syntax_code_spec
[(int) code
];
1021 str
[0] = desc
, str
[1] = 0;
1024 if (NILP (match_lisp
))
1027 insert_char (XINT (match_lisp
));
1044 insert_string ("\twhich means: ");
1046 switch (SWITCH_ENUM_CAST (code
))
1049 insert_string ("whitespace"); break;
1051 insert_string ("punctuation"); break;
1053 insert_string ("word"); break;
1055 insert_string ("symbol"); break;
1057 insert_string ("open"); break;
1059 insert_string ("close"); break;
1061 insert_string ("quote"); break;
1063 insert_string ("string"); break;
1065 insert_string ("math"); break;
1067 insert_string ("escape"); break;
1069 insert_string ("charquote"); break;
1071 insert_string ("comment"); break;
1073 insert_string ("endcomment"); break;
1075 insert_string ("invalid");
1079 if (!NILP (match_lisp
))
1081 insert_string (", matches ");
1082 insert_char (XINT (match_lisp
));
1086 insert_string (",\n\t is the first character of a comment-start sequence");
1088 insert_string (",\n\t is the second character of a comment-start sequence");
1091 insert_string (",\n\t is the first character of a comment-end sequence");
1093 insert_string (",\n\t is the second character of a comment-end sequence");
1095 insert_string (" (comment style b)");
1098 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1100 insert_string ("\n");
1104 describe_syntax_1 (vector
)
1107 struct buffer
*old
= current_buffer
;
1108 set_buffer_internal (XBUFFER (Vstandard_output
));
1109 describe_vector (vector
, Qnil
, describe_syntax
, 0, Qnil
, Qnil
, (int *) 0, 0);
1110 while (! NILP (XCHAR_TABLE (vector
)->parent
))
1112 vector
= XCHAR_TABLE (vector
)->parent
;
1113 insert_string ("\nThe parent syntax table is:");
1114 describe_vector (vector
, Qnil
, describe_syntax
, 0, Qnil
, Qnil
,
1118 call0 (intern ("help-mode"));
1119 set_buffer_internal (old
);
1123 DEFUN ("describe-syntax", Fdescribe_syntax
, Sdescribe_syntax
, 0, 0, "",
1124 "Describe the syntax specifications in the syntax table.\n\
1125 The descriptions are inserted in a buffer, which is then displayed.")
1128 internal_with_output_to_temp_buffer
1129 ("*Help*", describe_syntax_1
, current_buffer
->syntax_table
);
1134 int parse_sexp_ignore_comments
;
1136 /* Return the position across COUNT words from FROM.
1137 If that many words cannot be found before the end of the buffer, return 0.
1138 COUNT negative means scan backward and stop at word beginning. */
1141 scan_words (from
, count
)
1142 register int from
, count
;
1144 register int beg
= BEGV
;
1145 register int end
= ZV
;
1146 register int from_byte
= CHAR_TO_BYTE (from
);
1147 register enum syntaxcode code
;
1153 SETUP_SYNTAX_TABLE (from
, count
);
1164 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1165 ch0
= FETCH_CHAR (from_byte
);
1166 code
= SYNTAX (ch0
);
1167 INC_BOTH (from
, from_byte
);
1168 if (words_include_escapes
1169 && (code
== Sescape
|| code
== Scharquote
))
1174 /* Now CH0 is a character which begins a word and FROM is the
1175 position of the next character. */
1178 if (from
== end
) break;
1179 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1180 ch1
= FETCH_CHAR (from_byte
);
1181 code
= SYNTAX (ch1
);
1182 if (!(words_include_escapes
1183 && (code
== Sescape
|| code
== Scharquote
)))
1184 if (code
!= Sword
|| WORD_BOUNDARY_P (ch0
, ch1
))
1186 INC_BOTH (from
, from_byte
);
1200 DEC_BOTH (from
, from_byte
);
1201 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1202 ch1
= FETCH_CHAR (from_byte
);
1203 code
= SYNTAX (ch1
);
1204 if (words_include_escapes
1205 && (code
== Sescape
|| code
== Scharquote
))
1210 /* Now CH1 is a character which ends a word and FROM is the
1218 temp_byte
= dec_bytepos (from_byte
);
1219 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1220 ch0
= FETCH_CHAR (temp_byte
);
1221 code
= SYNTAX (ch0
);
1222 if (!(words_include_escapes
1223 && (code
== Sescape
|| code
== Scharquote
)))
1224 if (code
!= Sword
|| WORD_BOUNDARY_P (ch0
, ch1
))
1226 DEC_BOTH (from
, from_byte
);
1237 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 1, 1, "p",
1238 "Move point forward ARG words (backward if ARG is negative).\n\
1239 Normally returns t.\n\
1240 If an edge of the buffer or a field boundary is reached, point is left there\n\
1241 and the function returns nil. Field boundaries are not noticed if\n\
1242 `inhibit-field-text-motion' is non-nil.")
1247 CHECK_NUMBER (count
, 0);
1249 val
= orig_val
= scan_words (PT
, XINT (count
));
1251 val
= XINT (count
) > 0 ? ZV
: BEGV
;
1253 /* Avoid jumping out of an input field. */
1254 val
= XFASTINT (Fconstrain_to_field (make_number (val
), make_number (PT
),
1258 return val
== orig_val
? Qt
: Qnil
;
1261 Lisp_Object
skip_chars ();
1263 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1264 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1265 STRING is like the inside of a `[...]' in a regular expression\n\
1266 except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1267 (but not as the end of a range; quoting is never needed there).\n\
1268 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1269 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1270 Returns the distance traveled, either zero or positive.")
1272 Lisp_Object string
, lim
;
1274 return skip_chars (1, 0, string
, lim
);
1277 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1278 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1279 See `skip-chars-forward' for details.\n\
1280 Returns the distance traveled, either zero or negative.")
1282 Lisp_Object string
, lim
;
1284 return skip_chars (0, 0, string
, lim
);
1287 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1288 "Move point forward across chars in specified syntax classes.\n\
1289 SYNTAX is a string of syntax code characters.\n\
1290 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1291 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1292 This function returns the distance traveled, either zero or positive.")
1294 Lisp_Object syntax
, lim
;
1296 return skip_chars (1, 1, syntax
, lim
);
1299 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1300 "Move point backward across chars in specified syntax classes.\n\
1301 SYNTAX is a string of syntax code characters.\n\
1302 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1303 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1304 This function returns the distance traveled, either zero or negative.")
1306 Lisp_Object syntax
, lim
;
1308 return skip_chars (0, 1, syntax
, lim
);
1312 skip_chars (forwardp
, syntaxp
, string
, lim
)
1313 int forwardp
, syntaxp
;
1314 Lisp_Object string
, lim
;
1316 register unsigned int c
;
1318 unsigned char fastmap
[0400];
1319 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1320 of which codes don't fit in FASTMAP. In that case, we set the
1321 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1322 and set the actual ranges of characters in CHAR_RANGES. In the
1323 form "X-Y" of STRING, both X and Y must belong to the same
1324 character set because a range striding across character sets is
1327 int n_char_ranges
= 0;
1329 register int i
, i_byte
;
1330 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1331 int string_multibyte
;
1334 CHECK_STRING (string
, 0);
1335 char_ranges
= (int *) alloca (XSTRING (string
)->size
* (sizeof (int)) * 2);
1336 string_multibyte
= STRING_MULTIBYTE (string
);
1337 size_byte
= STRING_BYTES (XSTRING (string
));
1340 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1342 CHECK_NUMBER_COERCE_MARKER (lim
, 0);
1344 /* In any case, don't allow scan outside bounds of buffer. */
1345 if (XINT (lim
) > ZV
)
1346 XSETFASTINT (lim
, ZV
);
1347 if (XINT (lim
) < BEGV
)
1348 XSETFASTINT (lim
, BEGV
);
1350 bzero (fastmap
, sizeof fastmap
);
1354 if (i_byte
< size_byte
1355 && XSTRING (string
)->data
[0] == '^')
1357 negate
= 1; i
++, i_byte
++;
1360 /* Find the characters specified and set their elements of fastmap.
1361 If syntaxp, each character counts as itself.
1362 Otherwise, handle backslashes and ranges specially. */
1364 while (i_byte
< size_byte
)
1366 int c_leading_code
= XSTRING (string
)->data
[i_byte
];
1368 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
1370 /* Convert multibyteness between what the string has
1371 and what the buffer has. */
1373 c
= unibyte_char_to_multibyte (c
);
1378 fastmap
[syntax_spec_code
[c
& 0377]] = 1;
1383 if (i_byte
== size_byte
)
1386 c_leading_code
= XSTRING (string
)->data
[i_byte
];
1387 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
1389 if (i_byte
< size_byte
1390 && XSTRING (string
)->data
[i_byte
] == '-')
1392 unsigned int c2
, c2_leading_code
;
1394 /* Skip over the dash. */
1397 if (i_byte
== size_byte
)
1400 /* Get the end of the range. */
1401 c2_leading_code
= XSTRING (string
)->data
[i_byte
];
1402 FETCH_STRING_CHAR_ADVANCE (c2
, string
, i
, i_byte
);
1404 if (SINGLE_BYTE_CHAR_P (c
))
1406 if (! SINGLE_BYTE_CHAR_P (c2
))
1407 error ("Invalid charcter range: %s",
1408 XSTRING (string
)->data
);
1417 if (c_leading_code
!= c2_leading_code
)
1418 error ("Invalid charcter range: %s",
1419 XSTRING (string
)->data
);
1420 fastmap
[c_leading_code
] = 1;
1423 char_ranges
[n_char_ranges
++] = c
;
1424 char_ranges
[n_char_ranges
++] = c2
;
1430 fastmap
[c_leading_code
] = 1;
1431 if (!SINGLE_BYTE_CHAR_P (c
))
1433 char_ranges
[n_char_ranges
++] = c
;
1434 char_ranges
[n_char_ranges
++] = c
;
1440 /* If ^ was the first character, complement the fastmap. In
1441 addition, as all multibyte characters have possibility of
1442 matching, set all entries for base leading codes, which is
1443 harmless even if SYNTAXP is 1. */
1446 for (i
= 0; i
< sizeof fastmap
; i
++)
1448 if (!multibyte
|| !BASE_LEADING_CODE_P (i
))
1455 int start_point
= PT
;
1457 int pos_byte
= PT_BYTE
;
1462 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
1467 if (pos
< XINT (lim
))
1468 while (fastmap
[(int) SYNTAX (FETCH_CHAR (pos_byte
))])
1470 /* Since we already checked for multibyteness,
1471 avoid using INC_BOTH which checks again. */
1474 if (pos
>= XINT (lim
))
1476 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
1481 while (pos
< XINT (lim
)
1482 && fastmap
[(int) SYNTAX (FETCH_BYTE (pos
))])
1485 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
1493 while (pos
> XINT (lim
))
1495 int savepos
= pos_byte
;
1496 /* Since we already checked for multibyteness,
1497 avoid using DEC_BOTH which checks again. */
1500 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
1501 if (!fastmap
[(int) SYNTAX (FETCH_CHAR (pos_byte
))])
1511 if (pos
> XINT (lim
))
1512 while (fastmap
[(int) SYNTAX (FETCH_BYTE (pos
- 1))])
1515 if (pos
<= XINT (lim
))
1517 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
1527 while (pos
< XINT (lim
) && fastmap
[(c
= FETCH_BYTE (pos_byte
))])
1529 if (!BASE_LEADING_CODE_P (c
))
1530 INC_BOTH (pos
, pos_byte
);
1531 else if (n_char_ranges
)
1533 /* We much check CHAR_RANGES for a multibyte
1535 ch
= FETCH_MULTIBYTE_CHAR (pos_byte
);
1536 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1537 if ((ch
>= char_ranges
[i
] && ch
<= char_ranges
[i
+ 1]))
1539 if (!(negate
^ (i
< n_char_ranges
)))
1542 INC_BOTH (pos
, pos_byte
);
1547 INC_BOTH (pos
, pos_byte
);
1551 while (pos
< XINT (lim
) && fastmap
[FETCH_BYTE (pos
)])
1557 while (pos
> XINT (lim
))
1559 int savepos
= pos_byte
;
1560 DEC_BOTH (pos
, pos_byte
);
1561 if (fastmap
[(c
= FETCH_BYTE (pos_byte
))])
1563 if (!BASE_LEADING_CODE_P (c
))
1565 else if (n_char_ranges
)
1567 /* We much check CHAR_RANGES for a multibyte
1569 ch
= FETCH_MULTIBYTE_CHAR (pos_byte
);
1570 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1571 if (ch
>= char_ranges
[i
] && ch
<= char_ranges
[i
+ 1])
1573 if (!(negate
^ (i
< n_char_ranges
)))
1596 while (pos
> XINT (lim
) && fastmap
[FETCH_BYTE (pos
- 1)])
1601 #if 0 /* Not needed now that a position in mid-character
1602 cannot be specified in Lisp. */
1604 /* INC_POS or DEC_POS might have moved POS over LIM. */
1605 && (forwardp
? (pos
> XINT (lim
)) : (pos
< XINT (lim
))))
1612 SET_PT_BOTH (pos
, pos_byte
);
1615 return make_number (PT
- start_point
);
1619 /* Jump over a comment, assuming we are at the beginning of one.
1620 FROM is the current position.
1621 FROM_BYTE is the bytepos corresponding to FROM.
1622 Do not move past STOP (a charpos).
1623 The comment over which we have to jump is of style STYLE
1624 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1625 NESTING should be positive to indicate the nesting at the beginning
1626 for nested comments and should be zero or negative else.
1627 ST_COMMENT_STYLE cannot be nested.
1628 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1629 (or 0 If the search cannot start in the middle of a two-character).
1631 If successful, return 1 and store the charpos of the comment's end
1632 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1633 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1634 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1635 (as defined for state.incomment) in *INCOMMENT_PTR.
1637 The comment end is the last character of the comment rather than the
1638 character just after the comment.
1640 Global syntax data is assumed to initially be valid for FROM and
1641 remains valid for forward search starting at the returned position. */
1644 forw_comment (from
, from_byte
, stop
, nesting
, style
, prev_syntax
,
1645 charpos_ptr
, bytepos_ptr
, incomment_ptr
)
1646 int from
, from_byte
, stop
;
1647 int nesting
, style
, prev_syntax
;
1648 int *charpos_ptr
, *bytepos_ptr
, *incomment_ptr
;
1651 register enum syntaxcode code
;
1652 register int syntax
;
1654 if (nesting
<= 0) nesting
= -1;
1656 /* Enter the loop in the middle so that we find
1657 a 2-char comment ender if we start in the middle of it. */
1658 syntax
= prev_syntax
;
1659 if (syntax
!= 0) goto forw_incomment
;
1665 *incomment_ptr
= nesting
;
1666 *charpos_ptr
= from
;
1667 *bytepos_ptr
= from_byte
;
1670 c
= FETCH_CHAR (from_byte
);
1671 syntax
= SYNTAX_WITH_FLAGS (c
);
1672 code
= syntax
& 0xff;
1673 if (code
== Sendcomment
1674 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
1676 /* we have encountered a comment end of the same style
1677 as the comment sequence which began this comment
1680 if (code
== Scomment_fence
1681 && style
== ST_COMMENT_STYLE
)
1682 /* we have encountered a comment end of the same style
1683 as the comment sequence which began this comment
1688 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
)
1689 /* we have encountered a nested comment of the same style
1690 as the comment sequence which began this comment section */
1692 INC_BOTH (from
, from_byte
);
1693 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1696 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
1697 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
) == style
1698 && (c1
= FETCH_CHAR (from_byte
),
1699 SYNTAX_COMEND_SECOND (c1
)))
1702 /* we have encountered a comment end of the same style
1703 as the comment sequence which began this comment
1708 INC_BOTH (from
, from_byte
);
1709 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1714 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
1715 && (c1
= FETCH_CHAR (from_byte
),
1716 SYNTAX_COMMENT_STYLE (c1
) == style
1717 && SYNTAX_COMSTART_SECOND (c1
)))
1718 /* we have encountered a nested comment of the same style
1719 as the comment sequence which began this comment
1722 INC_BOTH (from
, from_byte
);
1723 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1727 *charpos_ptr
= from
;
1728 *bytepos_ptr
= from_byte
;
1732 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
1733 "Move forward across up to N comments. If N is negative, move backward.\n\
1734 Stop scanning if we find something other than a comment or whitespace.\n\
1735 Set point to where scanning stops.\n\
1736 If N comments are found as expected, with nothing except whitespace\n\
1737 between them, return t; otherwise return nil.")
1745 register enum syntaxcode code
;
1746 int comstyle
= 0; /* style of comment encountered */
1747 int comnested
= 0; /* whether the comment is nestable or not */
1750 int out_charpos
, out_bytepos
;
1753 CHECK_NUMBER (count
, 0);
1754 count1
= XINT (count
);
1755 stop
= count1
> 0 ? ZV
: BEGV
;
1761 from_byte
= PT_BYTE
;
1763 SETUP_SYNTAX_TABLE (from
, count1
);
1772 SET_PT_BOTH (from
, from_byte
);
1776 c
= FETCH_CHAR (from_byte
);
1778 comstart_first
= SYNTAX_COMSTART_FIRST (c
);
1779 comnested
= SYNTAX_COMMENT_NESTED (c
);
1780 INC_BOTH (from
, from_byte
);
1781 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1783 if (from
< stop
&& comstart_first
1784 && (c1
= FETCH_CHAR (from_byte
),
1785 SYNTAX_COMSTART_SECOND (c1
)))
1787 /* We have encountered a comment start sequence and we
1788 are ignoring all text inside comments. We must record
1789 the comment style this sequence begins so that later,
1790 only a comment end of the same style actually ends
1791 the comment section. */
1793 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
1794 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
1795 INC_BOTH (from
, from_byte
);
1796 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1798 /* FIXME: here we ignore 2-char endcomments while we don't
1799 when going backwards. */
1801 while (code
== Swhitespace
|| code
== Sendcomment
);
1803 if (code
== Scomment_fence
)
1804 comstyle
= ST_COMMENT_STYLE
;
1805 else if (code
!= Scomment
)
1808 DEC_BOTH (from
, from_byte
);
1809 SET_PT_BOTH (from
, from_byte
);
1812 /* We're at the start of a comment. */
1813 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
1814 &out_charpos
, &out_bytepos
, &dummy
);
1815 from
= out_charpos
; from_byte
= out_bytepos
;
1819 SET_PT_BOTH (from
, from_byte
);
1822 INC_BOTH (from
, from_byte
);
1823 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1824 /* We have skipped one comment. */
1832 int quoted
, comstart_second
;
1836 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
1841 DEC_BOTH (from
, from_byte
);
1842 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
1843 quoted
= char_quoted (from
, from_byte
);
1846 DEC_BOTH (from
, from_byte
);
1849 c
= FETCH_CHAR (from_byte
);
1852 comnested
= SYNTAX_COMMENT_NESTED (c
);
1853 if (code
== Sendcomment
)
1854 comstyle
= SYNTAX_COMMENT_STYLE (c
);
1855 comstart_second
= SYNTAX_COMSTART_SECOND (c
);
1856 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1857 && prev_char_comend_first (from
, from_byte
)
1858 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
1860 /* We must record the comment style encountered so that
1861 later, we can match only the proper comment begin
1862 sequence of the same style. */
1863 DEC_BOTH (from
, from_byte
);
1865 /* Calling char_quoted, above, set up global syntax position
1866 at the new value of FROM. */
1867 c1
= FETCH_CHAR (from_byte
);
1868 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
1869 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
1871 if (from
> stop
&& comstart_second
1872 && prev_char_comstart_first (from
, from_byte
)
1873 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
1876 DEC_BOTH (from
, from_byte
);
1879 if (code
== Scomment_fence
)
1881 /* Skip until first preceding unquoted comment_fence. */
1882 int found
= 0, ini
= from
, ini_byte
= from_byte
;
1886 DEC_BOTH (from
, from_byte
);
1889 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1890 c
= FETCH_CHAR (from_byte
);
1891 if (SYNTAX (c
) == Scomment_fence
1892 && !char_quoted (from
, from_byte
))
1900 from
= ini
; /* Set point to ini + 1. */
1901 from_byte
= ini_byte
;
1905 else if (code
== Sendcomment
)
1907 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
1908 &out_charpos
, &out_bytepos
);
1911 #if 0 /* cc-mode (and maybe others) relies on the bogus behavior. */
1912 /* Failure: we should go back to the end of this
1913 not-quite-endcomment. */
1914 if (SYNTAX(c
) != code
)
1915 /* It was a two-char Sendcomment. */
1916 INC_BOTH (from
, from_byte
);
1921 /* We have skipped one comment. */
1922 from
= out_charpos
, from_byte
= out_bytepos
;
1925 else if (code
!= Swhitespace
&& code
!= Scomment
)
1929 INC_BOTH (from
, from_byte
);
1930 SET_PT_BOTH (from
, from_byte
);
1938 SET_PT_BOTH (from
, from_byte
);
1943 /* Return syntax code of character C if C is a single byte character
1944 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
1946 #define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
1947 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
1948 ? SYNTAX (c) : Ssymbol)
1951 scan_lists (from
, count
, depth
, sexpflag
)
1953 int count
, depth
, sexpflag
;
1956 register int stop
= count
> 0 ? ZV
: BEGV
;
1961 register enum syntaxcode code
, temp_code
;
1962 int min_depth
= depth
; /* Err out if depth gets less than this. */
1963 int comstyle
= 0; /* style of comment encountered */
1964 int comnested
= 0; /* whether the comment is nestable or not */
1966 int last_good
= from
;
1969 int out_bytepos
, out_charpos
;
1971 int multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
1973 if (depth
> 0) min_depth
= 0;
1975 if (from
> ZV
) from
= ZV
;
1976 if (from
< BEGV
) from
= BEGV
;
1978 from_byte
= CHAR_TO_BYTE (from
);
1983 SETUP_SYNTAX_TABLE (from
, count
);
1988 int comstart_first
, prefix
;
1989 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1990 c
= FETCH_CHAR (from_byte
);
1991 code
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
1992 comstart_first
= SYNTAX_COMSTART_FIRST (c
);
1993 comnested
= SYNTAX_COMMENT_NESTED (c
);
1994 prefix
= SYNTAX_PREFIX (c
);
1995 if (depth
== min_depth
)
1997 INC_BOTH (from
, from_byte
);
1998 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1999 if (from
< stop
&& comstart_first
2000 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte
))
2001 && parse_sexp_ignore_comments
)
2003 /* we have encountered a comment start sequence and we
2004 are ignoring all text inside comments. We must record
2005 the comment style this sequence begins so that later,
2006 only a comment end of the same style actually ends
2007 the comment section */
2009 c1
= FETCH_CHAR (from_byte
);
2010 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2011 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2012 INC_BOTH (from
, from_byte
);
2013 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2019 switch (SWITCH_ENUM_CAST (code
))
2023 if (from
== stop
) goto lose
;
2024 INC_BOTH (from
, from_byte
);
2025 /* treat following character as a word constituent */
2028 if (depth
|| !sexpflag
) break;
2029 /* This word counts as a sexp; return at end of it. */
2032 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2034 /* Some compilers can't handle this inside the switch. */
2035 c
= FETCH_CHAR (from_byte
);
2036 temp
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2041 INC_BOTH (from
, from_byte
);
2042 if (from
== stop
) goto lose
;
2051 INC_BOTH (from
, from_byte
);
2055 case Scomment_fence
:
2056 comstyle
= ST_COMMENT_STYLE
;
2059 if (!parse_sexp_ignore_comments
) break;
2060 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2061 found
= forw_comment (from
, from_byte
, stop
,
2062 comnested
, comstyle
, 0,
2063 &out_charpos
, &out_bytepos
, &dummy
);
2064 from
= out_charpos
, from_byte
= out_bytepos
;
2071 INC_BOTH (from
, from_byte
);
2072 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2078 if (from
!= stop
&& c
== FETCH_CHAR (from_byte
))
2080 INC_BOTH (from
, from_byte
);
2090 if (!++depth
) goto done
;
2095 if (!--depth
) goto done
;
2096 if (depth
< min_depth
)
2097 Fsignal (Qscan_error
,
2098 Fcons (build_string ("Containing expression ends prematurely"),
2099 Fcons (make_number (last_good
),
2100 Fcons (make_number (from
), Qnil
))));
2105 temp_pos
= dec_bytepos (from_byte
);
2106 stringterm
= FETCH_CHAR (temp_pos
);
2109 if (from
>= stop
) goto lose
;
2110 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2111 c
= FETCH_CHAR (from_byte
);
2114 : SYNTAX_WITH_MULTIBYTE_CHECK (c
) == Sstring_fence
)
2117 /* Some compilers can't handle this inside the switch. */
2118 temp
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2123 INC_BOTH (from
, from_byte
);
2125 INC_BOTH (from
, from_byte
);
2127 INC_BOTH (from
, from_byte
);
2128 if (!depth
&& sexpflag
) goto done
;
2133 /* Reached end of buffer. Error if within object, return nil if between */
2134 if (depth
) goto lose
;
2139 /* End of object reached */
2148 DEC_BOTH (from
, from_byte
);
2149 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2150 c
= FETCH_CHAR (from_byte
);
2151 code
= SYNTAX_WITH_MULTIBYTE_CHECK (c
);
2152 if (depth
== min_depth
)
2155 comnested
= SYNTAX_COMMENT_NESTED (c
);
2156 if (code
== Sendcomment
)
2157 comstyle
= SYNTAX_COMMENT_STYLE (c
);
2158 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
2159 && prev_char_comend_first (from
, from_byte
)
2160 && parse_sexp_ignore_comments
)
2162 /* We must record the comment style encountered so that
2163 later, we can match only the proper comment begin
2164 sequence of the same style. */
2165 DEC_BOTH (from
, from_byte
);
2166 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2168 c1
= FETCH_CHAR (from_byte
);
2169 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
2170 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2173 /* Quoting turns anything except a comment-ender
2174 into a word character. Note that this if cannot be true
2175 if we decremented FROM in the if-statement above. */
2176 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2178 else if (SYNTAX_PREFIX (c
))
2181 switch (SWITCH_ENUM_CAST (code
))
2187 if (depth
|| !sexpflag
) break;
2188 /* This word counts as a sexp; count object finished
2189 after passing it. */
2192 temp_pos
= from_byte
;
2193 if (! NILP (current_buffer
->enable_multibyte_characters
))
2197 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2198 c1
= FETCH_CHAR (temp_pos
);
2199 temp_code
= SYNTAX_WITH_MULTIBYTE_CHECK (c1
);
2200 /* Don't allow comment-end to be quoted. */
2201 if (temp_code
== Sendcomment
)
2203 quoted
= char_quoted (from
- 1, temp_pos
);
2206 DEC_BOTH (from
, from_byte
);
2207 temp_pos
= dec_bytepos (temp_pos
);
2208 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2210 c1
= FETCH_CHAR (temp_pos
);
2211 temp_code
= SYNTAX_WITH_MULTIBYTE_CHECK (c1
);
2212 if (! (quoted
|| temp_code
== Sword
2213 || temp_code
== Ssymbol
2214 || temp_code
== Squote
))
2216 DEC_BOTH (from
, from_byte
);
2223 temp_pos
= dec_bytepos (from_byte
);
2224 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2225 if (from
!= stop
&& c
== FETCH_CHAR (temp_pos
))
2226 DEC_BOTH (from
, from_byte
);
2235 if (!++depth
) goto done2
;
2240 if (!--depth
) goto done2
;
2241 if (depth
< min_depth
)
2242 Fsignal (Qscan_error
,
2243 Fcons (build_string ("Containing expression ends prematurely"),
2244 Fcons (make_number (last_good
),
2245 Fcons (make_number (from
), Qnil
))));
2249 if (!parse_sexp_ignore_comments
)
2251 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2252 &out_charpos
, &out_bytepos
);
2253 /* FIXME: if found == -1, then it really wasn't a comment-end.
2254 For single-char Sendcomment, we can't do much about it apart
2255 from skipping the char.
2256 For 2-char endcomments, we could try again, taking both
2257 chars as separate entities, but it's a lot of trouble
2258 for very little gain, so we don't bother either. -sm */
2260 from
= out_charpos
, from_byte
= out_bytepos
;
2263 case Scomment_fence
:
2267 DEC_BOTH (from
, from_byte
);
2268 if (from
== stop
) goto lose
;
2269 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2270 if (!char_quoted (from
, from_byte
)
2271 && (c
= FETCH_CHAR (from_byte
),
2272 SYNTAX_WITH_MULTIBYTE_CHECK (c
) == code
))
2275 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2279 stringterm
= FETCH_CHAR (from_byte
);
2282 if (from
== stop
) goto lose
;
2283 temp_pos
= from_byte
;
2284 if (! NILP (current_buffer
->enable_multibyte_characters
))
2288 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2289 if (!char_quoted (from
- 1, temp_pos
)
2290 && stringterm
== FETCH_CHAR (temp_pos
))
2292 DEC_BOTH (from
, from_byte
);
2294 DEC_BOTH (from
, from_byte
);
2295 if (!depth
&& sexpflag
) goto done2
;
2300 /* Reached start of buffer. Error if within object, return nil if between */
2301 if (depth
) goto lose
;
2312 XSETFASTINT (val
, from
);
2316 Fsignal (Qscan_error
,
2317 Fcons (build_string ("Unbalanced parentheses"),
2318 Fcons (make_number (last_good
),
2319 Fcons (make_number (from
), Qnil
))));
2324 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
2325 "Scan from character number FROM by COUNT lists.\n\
2326 Returns the character number of the position thus found.\n\
2328 If DEPTH is nonzero, paren depth begins counting from that value,\n\
2329 only places where the depth in parentheses becomes zero\n\
2330 are candidates for stopping; COUNT such places are counted.\n\
2331 Thus, a positive value for DEPTH means go out levels.\n\
2333 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2335 If the beginning or end of (the accessible part of) the buffer is reached\n\
2336 and the depth is wrong, an error is signaled.\n\
2337 If the depth is right but the count is not used up, nil is returned.")
2338 (from
, count
, depth
)
2339 Lisp_Object from
, count
, depth
;
2341 CHECK_NUMBER (from
, 0);
2342 CHECK_NUMBER (count
, 1);
2343 CHECK_NUMBER (depth
, 2);
2345 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
2348 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
2349 "Scan from character number FROM by COUNT balanced expressions.\n\
2350 If COUNT is negative, scan backwards.\n\
2351 Returns the character number of the position thus found.\n\
2353 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2355 If the beginning or end of (the accessible part of) the buffer is reached\n\
2356 in the middle of a parenthetical grouping, an error is signaled.\n\
2357 If the beginning or end is reached between groupings\n\
2358 but before count is used up, nil is returned.")
2360 Lisp_Object from
, count
;
2362 CHECK_NUMBER (from
, 0);
2363 CHECK_NUMBER (count
, 1);
2365 return scan_lists (XINT (from
), XINT (count
), 0, 1);
2368 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
2370 "Move point backward over any number of chars with prefix syntax.\n\
2371 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2376 int opoint_byte
= PT_BYTE
;
2378 int pos_byte
= PT_BYTE
;
2383 SET_PT_BOTH (opoint
, opoint_byte
);
2388 SETUP_SYNTAX_TABLE (pos
, -1);
2390 DEC_BOTH (pos
, pos_byte
);
2392 while (!char_quoted (pos
, pos_byte
)
2393 /* Previous statement updates syntax table. */
2394 && ((c
= FETCH_CHAR (pos_byte
), SYNTAX (c
) == Squote
)
2395 || SYNTAX_PREFIX (c
)))
2398 opoint_byte
= pos_byte
;
2401 DEC_BOTH (pos
, pos_byte
);
2404 SET_PT_BOTH (opoint
, opoint_byte
);
2409 /* Parse forward from FROM / FROM_BYTE to END,
2410 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2411 and return a description of the state of the parse at END.
2412 If STOPBEFORE is nonzero, stop at the start of an atom.
2413 If COMMENTSTOP is 1, stop at the start of a comment.
2414 If COMMENTSTOP is -1, stop at the start or end of a comment,
2415 after the beginning of a string, or after the end of a string. */
2418 scan_sexps_forward (stateptr
, from
, from_byte
, end
, targetdepth
,
2419 stopbefore
, oldstate
, commentstop
)
2420 struct lisp_parse_state
*stateptr
;
2422 int end
, targetdepth
, stopbefore
;
2423 Lisp_Object oldstate
;
2426 struct lisp_parse_state state
;
2428 register enum syntaxcode code
;
2431 struct level
{ int last
, prev
; };
2432 struct level levelstart
[100];
2433 register struct level
*curlevel
= levelstart
;
2434 struct level
*endlevel
= levelstart
+ 100;
2435 register int depth
; /* Paren depth of current scanning location.
2436 level - levelstart equals this except
2437 when the depth becomes negative. */
2438 int mindepth
; /* Lowest DEPTH value seen. */
2439 int start_quoted
= 0; /* Nonzero means starting after a char quote */
2441 int prev_from
; /* Keep one character before FROM. */
2443 int prev_from_syntax
;
2444 int boundary_stop
= commentstop
== -1;
2447 int out_bytepos
, out_charpos
;
2451 prev_from_byte
= from_byte
;
2453 DEC_BOTH (prev_from
, prev_from_byte
);
2455 /* Use this macro instead of `from++'. */
2457 do { prev_from = from; \
2458 prev_from_byte = from_byte; \
2460 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
2461 INC_BOTH (from, from_byte); \
2462 UPDATE_SYNTAX_TABLE_FORWARD (from); \
2468 if (NILP (oldstate
))
2471 state
.instring
= -1;
2472 state
.incomment
= 0;
2473 state
.comstyle
= 0; /* comment style a by default. */
2474 state
.comstr_start
= -1; /* no comment/string seen. */
2478 tem
= Fcar (oldstate
);
2484 oldstate
= Fcdr (oldstate
);
2485 oldstate
= Fcdr (oldstate
);
2486 oldstate
= Fcdr (oldstate
);
2487 tem
= Fcar (oldstate
);
2488 /* Check whether we are inside string_fence-style string: */
2489 state
.instring
= (!NILP (tem
)
2490 ? (INTEGERP (tem
) ? XINT (tem
) : ST_STRING_STYLE
)
2493 oldstate
= Fcdr (oldstate
);
2494 tem
= Fcar (oldstate
);
2495 state
.incomment
= (!NILP (tem
)
2496 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
2499 oldstate
= Fcdr (oldstate
);
2500 tem
= Fcar (oldstate
);
2501 start_quoted
= !NILP (tem
);
2503 /* if the eighth element of the list is nil, we are in comment
2504 style a. If it is non-nil, we are in comment style b */
2505 oldstate
= Fcdr (oldstate
);
2506 oldstate
= Fcdr (oldstate
);
2507 tem
= Fcar (oldstate
);
2508 state
.comstyle
= NILP (tem
) ? 0 : (EQ (tem
, Qsyntax_table
)
2509 ? ST_COMMENT_STYLE
: 1);
2511 oldstate
= Fcdr (oldstate
);
2512 tem
= Fcar (oldstate
);
2513 state
.comstr_start
= NILP (tem
) ? -1 : XINT (tem
) ;
2514 oldstate
= Fcdr (oldstate
);
2515 tem
= Fcar (oldstate
);
2516 while (!NILP (tem
)) /* >= second enclosing sexps. */
2518 /* curlevel++->last ran into compiler bug on Apollo */
2519 curlevel
->last
= XINT (Fcar (tem
));
2520 if (++curlevel
== endlevel
)
2521 error ("Nesting too deep for parser");
2522 curlevel
->prev
= -1;
2523 curlevel
->last
= -1;
2530 curlevel
->prev
= -1;
2531 curlevel
->last
= -1;
2533 SETUP_SYNTAX_TABLE (prev_from
, 1);
2534 prev_from_syntax
= SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte
));
2535 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2537 /* Enter the loop at a place appropriate for initial state. */
2539 if (state
.incomment
)
2540 goto startincomment
;
2541 if (state
.instring
>= 0)
2543 nofence
= state
.instring
!= ST_STRING_STYLE
;
2545 goto startquotedinstring
;
2548 else if (start_quoted
)
2551 #if 0 /* This seems to be redundant with the identical code above. */
2552 SETUP_SYNTAX_TABLE (prev_from
, 1);
2553 prev_from_syntax
= SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte
));
2554 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2560 code
= prev_from_syntax
& 0xff;
2562 if (code
== Scomment
)
2564 state
.incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
2566 state
.comstr_start
= prev_from
;
2568 else if (code
== Scomment_fence
)
2570 /* Record the comment style we have entered so that only
2571 the comment-end sequence of the same style actually
2572 terminates the comment section. */
2573 state
.comstyle
= ST_COMMENT_STYLE
;
2574 state
.incomment
= -1;
2575 state
.comstr_start
= prev_from
;
2578 else if (from
< end
)
2579 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
))
2580 if (c1
= FETCH_CHAR (from_byte
),
2581 SYNTAX_COMSTART_SECOND (c1
))
2582 /* Duplicate code to avoid a complex if-expression
2583 which causes trouble for the SGI compiler. */
2585 /* Record the comment style we have entered so that only
2586 the comment-end sequence of the same style actually
2587 terminates the comment section. */
2588 state
.comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte
));
2589 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
);
2590 comnested
= comnested
|| SYNTAX_COMMENT_NESTED (c1
);
2591 state
.incomment
= comnested
? 1 : -1;
2592 state
.comstr_start
= prev_from
;
2597 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
2599 switch (SWITCH_ENUM_CAST (code
))
2603 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2604 curlevel
->last
= prev_from
;
2606 if (from
== end
) goto endquoted
;
2609 /* treat following character as a word constituent */
2612 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2613 curlevel
->last
= prev_from
;
2617 /* Some compilers can't handle this inside the switch. */
2618 temp
= SYNTAX (FETCH_CHAR (from_byte
));
2624 if (from
== end
) goto endquoted
;
2636 curlevel
->prev
= curlevel
->last
;
2640 if (commentstop
== 1)
2645 if (! state
.incomment
)
2647 if (commentstop
|| boundary_stop
) goto done
;
2649 /* The (from == BEGV) test is to enter the loop in the middle so
2650 that we find a 2-char comment ender even if we start in the
2652 found
= forw_comment (from
, from_byte
, end
,
2653 state
.incomment
, state
.comstyle
,
2654 (from
== BEGV
) ? 0 : prev_from_syntax
,
2655 &out_charpos
, &out_bytepos
, &state
.incomment
);
2656 from
= out_charpos
; from_byte
= out_bytepos
;
2657 /* Beware! prev_from and friends are invalid now.
2658 Luckily, the `done' doesn't use them and the INC_FROM
2659 sets them to a sane value without looking at them. */
2660 if (!found
) goto done
;
2662 state
.incomment
= 0;
2663 state
.comstyle
= 0; /* reset the comment style */
2664 if (boundary_stop
) goto done
;
2668 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2670 /* curlevel++->last ran into compiler bug on Apollo */
2671 curlevel
->last
= prev_from
;
2672 if (++curlevel
== endlevel
)
2673 error ("Nesting too deep for parser");
2674 curlevel
->prev
= -1;
2675 curlevel
->last
= -1;
2676 if (targetdepth
== depth
) goto done
;
2681 if (depth
< mindepth
)
2683 if (curlevel
!= levelstart
)
2685 curlevel
->prev
= curlevel
->last
;
2686 if (targetdepth
== depth
) goto done
;
2691 state
.comstr_start
= from
- 1;
2692 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
2693 curlevel
->last
= prev_from
;
2694 state
.instring
= (code
== Sstring
2695 ? (FETCH_CHAR (prev_from_byte
))
2697 if (boundary_stop
) goto done
;
2700 nofence
= state
.instring
!= ST_STRING_STYLE
;
2706 if (from
>= end
) goto done
;
2707 c
= FETCH_CHAR (from_byte
);
2708 /* Some compilers can't handle this inside the switch. */
2711 /* Check TEMP here so that if the char has
2712 a syntax-table property which says it is NOT
2713 a string character, it does not end the string. */
2714 if (nofence
&& c
== state
.instring
&& temp
== Sstring
)
2720 if (!nofence
) goto string_end
;
2725 startquotedinstring
:
2726 if (from
>= end
) goto endquoted
;
2732 state
.instring
= -1;
2733 curlevel
->prev
= curlevel
->last
;
2735 if (boundary_stop
) goto done
;
2744 stop
: /* Here if stopping before start of sexp. */
2745 from
= prev_from
; /* We have just fetched the char that starts it; */
2746 goto done
; /* but return the position before it. */
2751 state
.depth
= depth
;
2752 state
.mindepth
= mindepth
;
2753 state
.thislevelstart
= curlevel
->prev
;
2754 state
.prevlevelstart
2755 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
2756 state
.location
= from
;
2757 state
.levelstarts
= Qnil
;
2758 while (--curlevel
>= levelstart
)
2759 state
.levelstarts
= Fcons (make_number (curlevel
->last
),
2766 /* This comment supplies the doc string for parse-partial-sexp,
2767 for make-docfile to see. We cannot put this in the real DEFUN
2768 due to limits in the Unix cpp.
2770 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
2771 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2772 Parsing stops at TO or when certain criteria are met;\n\
2773 point is set to where parsing stops.\n\
2774 If fifth arg STATE is omitted or nil,\n\
2775 parsing assumes that FROM is the beginning of a function.\n\
2776 Value is a list of ten elements describing final state of parsing:\n\
2777 0. depth in parens.\n\
2778 1. character address of start of innermost containing list; nil if none.\n\
2779 2. character address of start of last complete sexp terminated.\n\
2780 3. non-nil if inside a string.\n\
2781 (it is the character that will terminate the string,\n\
2782 or t if the string should be terminated by a generic string delimiter.)\n\
2783 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2784 else an integer (the current comment nesting).\n\
2785 5. t if following a quote character.\n\
2786 6. the minimum paren-depth encountered during this scan.\n\
2787 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
2788 should be terminated by a generic comment delimiter.\n\
2789 8. character address of start of comment or string; nil if not in one.\n\
2790 9. Intermediate data for continuation of parsing (subject to change).\n\
2791 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2792 in parentheses becomes equal to TARGETDEPTH.\n\
2793 Fourth arg STOPBEFORE non-nil means stop when come to\n\
2794 any character that starts a sexp.\n\
2795 Fifth arg STATE is a nine-element list like what this function returns.\n\
2796 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2797 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2798 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2799 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2800 string, or after end of a comment or a string.")
2801 (from, to, targetdepth, stopbefore, state, commentstop)
2804 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
2805 0 /* See immediately above */)
2806 (from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
)
2807 Lisp_Object from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
;
2809 struct lisp_parse_state state
;
2812 if (!NILP (targetdepth
))
2814 CHECK_NUMBER (targetdepth
, 3);
2815 target
= XINT (targetdepth
);
2818 target
= -100000; /* We won't reach this depth */
2820 validate_region (&from
, &to
);
2821 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
2823 target
, !NILP (stopbefore
), oldstate
,
2825 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
2827 SET_PT (state
.location
);
2829 return Fcons (make_number (state
.depth
),
2830 Fcons (state
.prevlevelstart
< 0 ? Qnil
: make_number (state
.prevlevelstart
),
2831 Fcons (state
.thislevelstart
< 0 ? Qnil
: make_number (state
.thislevelstart
),
2832 Fcons (state
.instring
>= 0
2833 ? (state
.instring
== ST_STRING_STYLE
2834 ? Qt
: make_number (state
.instring
)) : Qnil
,
2835 Fcons (state
.incomment
< 0 ? Qt
:
2836 (state
.incomment
== 0 ? Qnil
:
2837 make_number (state
.incomment
)),
2838 Fcons (state
.quoted
? Qt
: Qnil
,
2839 Fcons (make_number (state
.mindepth
),
2840 Fcons ((state
.comstyle
2841 ? (state
.comstyle
== ST_COMMENT_STYLE
2842 ? Qsyntax_table
: Qt
) :
2844 Fcons (((state
.incomment
2845 || (state
.instring
>= 0))
2846 ? make_number (state
.comstr_start
)
2848 Fcons (state
.levelstarts
, Qnil
))))))))));
2857 /* This has to be done here, before we call Fmake_char_table. */
2858 Qsyntax_table
= intern ("syntax-table");
2859 staticpro (&Qsyntax_table
);
2861 /* Intern this now in case it isn't already done.
2862 Setting this variable twice is harmless.
2863 But don't staticpro it here--that is done in alloc.c. */
2864 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
2866 /* Create objects which can be shared among syntax tables. */
2867 Vsyntax_code_object
= Fmake_vector (make_number (13), Qnil
);
2868 for (i
= 0; i
< XVECTOR (Vsyntax_code_object
)->size
; i
++)
2869 XVECTOR (Vsyntax_code_object
)->contents
[i
]
2870 = Fcons (make_number (i
), Qnil
);
2872 /* Now we are ready to set up this property, so we can
2873 create syntax tables. */
2874 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
2876 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Swhitespace
];
2878 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
2880 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Sword
];
2881 for (i
= 'a'; i
<= 'z'; i
++)
2882 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
2883 for (i
= 'A'; i
<= 'Z'; i
++)
2884 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
2885 for (i
= '0'; i
<= '9'; i
++)
2886 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
2888 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
2889 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
2891 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
2892 Fcons (make_number (Sopen
), make_number (')')));
2893 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
2894 Fcons (make_number (Sclose
), make_number ('(')));
2895 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
2896 Fcons (make_number (Sopen
), make_number (']')));
2897 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
2898 Fcons (make_number (Sclose
), make_number ('[')));
2899 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
2900 Fcons (make_number (Sopen
), make_number ('}')));
2901 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
2902 Fcons (make_number (Sclose
), make_number ('{')));
2903 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
2904 Fcons (make_number ((int) Sstring
), Qnil
));
2905 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
2906 Fcons (make_number ((int) Sescape
), Qnil
));
2908 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Ssymbol
];
2909 for (i
= 0; i
< 10; i
++)
2911 c
= "_-+*/&|<>="[i
];
2912 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
2915 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Spunct
];
2916 for (i
= 0; i
< 12; i
++)
2918 c
= ".,;:?!#@~^'`"[i
];
2919 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
2922 /* All multibyte characters have syntax `word' by default. */
2923 temp
= XVECTOR (Vsyntax_code_object
)->contents
[(int) Sword
];
2924 for (i
= CHAR_TABLE_SINGLE_BYTE_SLOTS
; i
< CHAR_TABLE_ORDINARY_SLOTS
; i
++)
2925 XCHAR_TABLE (Vstandard_syntax_table
)->contents
[i
] = temp
;
2931 Qsyntax_table_p
= intern ("syntax-table-p");
2932 staticpro (&Qsyntax_table_p
);
2934 staticpro (&Vsyntax_code_object
);
2936 Qscan_error
= intern ("scan-error");
2937 staticpro (&Qscan_error
);
2938 Fput (Qscan_error
, Qerror_conditions
,
2939 Fcons (Qscan_error
, Fcons (Qerror
, Qnil
)));
2940 Fput (Qscan_error
, Qerror_message
,
2941 build_string ("Scan error"));
2943 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments
,
2944 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2946 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties
,
2947 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2948 The value of this property should be either a syntax table, or a cons\n\
2949 of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2950 syntax code, MATCHCHAR being nil or the character to match (which is\n\
2951 relevant only for open/close type.");
2953 words_include_escapes
= 0;
2954 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes
,
2955 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2957 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol
,
2958 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
2959 multibyte_syntax_as_symbol
= 0;
2961 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
2962 &open_paren_in_column_0_is_defun_start
,
2963 "Non-nil means an open paren in column 0 denotes the start of a defun.");
2964 open_paren_in_column_0_is_defun_start
= 1;
2966 defsubr (&Ssyntax_table_p
);
2967 defsubr (&Ssyntax_table
);
2968 defsubr (&Sstandard_syntax_table
);
2969 defsubr (&Scopy_syntax_table
);
2970 defsubr (&Sset_syntax_table
);
2971 defsubr (&Schar_syntax
);
2972 defsubr (&Smatching_paren
);
2973 defsubr (&Smodify_syntax_entry
);
2974 defsubr (&Sdescribe_syntax
);
2976 defsubr (&Sforward_word
);
2978 defsubr (&Sskip_chars_forward
);
2979 defsubr (&Sskip_chars_backward
);
2980 defsubr (&Sskip_syntax_forward
);
2981 defsubr (&Sskip_syntax_backward
);
2983 defsubr (&Sforward_comment
);
2984 defsubr (&Sscan_lists
);
2985 defsubr (&Sscan_sexps
);
2986 defsubr (&Sbackward_prefix_chars
);
2987 defsubr (&Sparse_partial_sexp
);