1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2017 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
24 #include "character.h"
28 #include "intervals.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags
)
64 return (flags
>> 16) & 1;
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags
)
69 return (flags
>> 17) & 1;
72 SYNTAX_FLAGS_COMEND_FIRST (int flags
)
74 return (flags
>> 18) & 1;
77 SYNTAX_FLAGS_COMEND_SECOND (int flags
)
79 return (flags
>> 19) & 1;
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags
)
84 return (flags
& 0x50000) != 0;
87 SYNTAX_FLAGS_PREFIX (int flags
)
89 return (flags
>> 20) & 1;
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags
)
94 return (flags
>> 21) & 1;
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags
)
99 return (flags
>> 23) & 1;
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags
)
104 return (flags
>> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags
)
109 return (flags
>> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags
, int other_flags
)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags
)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags
)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags
));
122 /* Extract a particular flag for a given character. */
125 SYNTAX_COMEND_FIRST (int c
)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c
));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
136 ST_COMMENT_STYLE
= 256 + 1,
137 ST_STRING_STYLE
= 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth
; /* Depth at end of parsing. */
145 int instring
; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment
; /* -1 if in unnestable comment else comment nesting */
147 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted
; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth
; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart
;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart
;
154 ptrdiff_t location
; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte
; /* Corresponding byte position. */
156 ptrdiff_t comstr_start
; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax
; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos
;
174 static ptrdiff_t find_start_value
;
175 static ptrdiff_t find_start_value_byte
;
176 static struct buffer
*find_start_buffer
;
177 static ptrdiff_t find_start_begv
;
178 static EMACS_INT find_start_modiff
;
181 static Lisp_Object
skip_chars (bool, Lisp_Object
, Lisp_Object
, bool);
182 static Lisp_Object
skip_syntaxes (bool, Lisp_Object
, Lisp_Object
);
183 static Lisp_Object
scan_lists (EMACS_INT
, EMACS_INT
, EMACS_INT
, bool);
184 static void scan_sexps_forward (struct lisp_parse_state
*,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT
,
187 static void internalize_parse_state (Lisp_Object
, struct lisp_parse_state
*);
188 static bool in_classes (int, Lisp_Object
);
189 static void parse_sexp_propertize (ptrdiff_t charpos
);
190 static void check_syntax_table (Lisp_Object obj
);
192 /* This setter is used only in this file, so it can be private. */
194 bset_syntax_table (struct buffer
*b
, Lisp_Object val
)
196 b
->syntax_table_
= val
;
199 /* Whether the syntax of the character C has the prefix flag set. */
201 syntax_prefix_flag_p (int c
)
203 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c
));
206 struct gl_state_s gl_state
; /* Global state of syntax parser. */
208 enum { INTERVALS_AT_ONCE
= 10 }; /* 1 + max-number of intervals
209 to scan to property-change. */
211 /* Set the syntax entry VAL for char C in table TABLE. */
214 SET_RAW_SYNTAX_ENTRY (Lisp_Object table
, int c
, Lisp_Object val
)
216 CHAR_TABLE_SET (table
, c
, val
);
219 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
220 RANGE is a cons (FROM . TO) specifying the range of characters. */
223 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table
, Lisp_Object range
,
226 Fset_char_table_range (table
, range
, val
);
229 /* Extract the information from the entry for character C
230 in the current syntax table. */
235 Lisp_Object ent
= SYNTAX_ENTRY (c
);
236 return CONSP (ent
) ? XCDR (ent
) : Qnil
;
239 /* This should be called with FROM at the start of forward
240 search, or after the last position of the backward search. It
241 makes sure that the first char is picked up with correct table, so
242 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
244 Sign of COUNT gives the direction of the search.
248 SETUP_SYNTAX_TABLE (ptrdiff_t from
, ptrdiff_t count
)
250 SETUP_BUFFER_SYNTAX_TABLE ();
251 gl_state
.b_property
= BEGV
;
252 gl_state
.e_property
= ZV
+ 1;
253 gl_state
.object
= Qnil
;
255 if (parse_sexp_lookup_properties
)
258 update_syntax_table_forward (from
, true, Qnil
);
259 else if (from
> BEGV
)
261 update_syntax_table (from
- 1, count
, true, Qnil
);
262 parse_sexp_propertize (from
- 1);
267 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
268 If it is t (which is only used in fast_c_string_match_ignore_case),
269 ignore properties altogether.
271 This is meant for regex.c to use. For buffers, regex.c passes arguments
272 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
273 So if it is a buffer, we set the offset field to BEGV. */
276 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object
,
277 ptrdiff_t from
, ptrdiff_t count
)
279 SETUP_BUFFER_SYNTAX_TABLE ();
280 gl_state
.object
= object
;
281 if (BUFFERP (gl_state
.object
))
283 struct buffer
*buf
= XBUFFER (gl_state
.object
);
284 gl_state
.b_property
= 1;
285 gl_state
.e_property
= BUF_ZV (buf
) - BUF_BEGV (buf
) + 1;
286 gl_state
.offset
= BUF_BEGV (buf
) - 1;
288 else if (NILP (gl_state
.object
))
290 gl_state
.b_property
= 1;
291 gl_state
.e_property
= ZV
- BEGV
+ 1;
292 gl_state
.offset
= BEGV
- 1;
294 else if (EQ (gl_state
.object
, Qt
))
296 gl_state
.b_property
= 0;
297 gl_state
.e_property
= PTRDIFF_MAX
;
302 gl_state
.b_property
= 0;
303 gl_state
.e_property
= 1 + SCHARS (gl_state
.object
);
306 if (parse_sexp_lookup_properties
)
307 update_syntax_table (from
+ gl_state
.offset
- (count
<= 0),
308 count
, 1, gl_state
.object
);
311 /* Update gl_state to an appropriate interval which contains CHARPOS. The
312 sign of COUNT give the relative position of CHARPOS wrt the previously
313 valid interval. If INIT, only [be]_property fields of gl_state are
314 valid at start, the rest is filled basing on OBJECT.
316 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
317 direction than the intervals - or in an interval. We update the
318 current syntax-table basing on the property of this interval, and
319 update the interval to start further than CHARPOS - or be
320 NULL. We also update lim_property to be the next value of
321 charpos to call this subroutine again - or be before/after the
322 start/end of OBJECT. */
325 update_syntax_table (ptrdiff_t charpos
, EMACS_INT count
, bool init
,
328 Lisp_Object tmp_table
;
330 bool invalidate
= true;
335 gl_state
.old_prop
= Qnil
;
336 gl_state
.start
= gl_state
.b_property
;
337 gl_state
.stop
= gl_state
.e_property
;
338 i
= interval_of (charpos
, object
);
339 gl_state
.backward_i
= gl_state
.forward_i
= i
;
343 /* interval_of updates only ->position of the return value, so
344 update the parents manually to speed up update_interval. */
345 while (!NULL_PARENT (i
))
347 if (AM_RIGHT_CHILD (i
))
348 INTERVAL_PARENT (i
)->position
= i
->position
349 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
350 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
351 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
353 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
355 i
= INTERVAL_PARENT (i
);
357 i
= gl_state
.forward_i
;
358 gl_state
.b_property
= i
->position
- gl_state
.offset
;
359 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
362 i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
364 /* We are guaranteed to be called with CHARPOS either in i,
367 error ("Error in syntax_table logic for to-the-end intervals");
368 else if (charpos
< i
->position
) /* Move left. */
371 error ("Error in syntax_table logic for intervals <-");
372 /* Update the interval. */
373 i
= update_interval (i
, charpos
);
374 if (INTERVAL_LAST_POS (i
) != gl_state
.b_property
)
377 gl_state
.forward_i
= i
;
378 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
381 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
384 error ("Error in syntax_table logic for intervals ->");
385 /* Update the interval. */
386 i
= update_interval (i
, charpos
);
387 if (i
->position
!= gl_state
.e_property
)
390 gl_state
.backward_i
= i
;
391 gl_state
.b_property
= i
->position
- gl_state
.offset
;
396 tmp_table
= textget (i
->plist
, Qsyntax_table
);
399 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
401 if (invalidate
) /* Did not get to adjacent interval. */
402 { /* with the same table => */
403 /* invalidate the old range. */
406 gl_state
.backward_i
= i
;
407 gl_state
.b_property
= i
->position
- gl_state
.offset
;
411 gl_state
.forward_i
= i
;
412 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
416 if (!EQ (tmp_table
, gl_state
.old_prop
))
418 gl_state
.current_syntax_table
= tmp_table
;
419 gl_state
.old_prop
= tmp_table
;
420 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
422 gl_state
.use_global
= 0;
424 else if (CONSP (tmp_table
))
426 gl_state
.use_global
= 1;
427 gl_state
.global_code
= tmp_table
;
431 gl_state
.use_global
= 0;
432 gl_state
.current_syntax_table
= BVAR (current_buffer
, syntax_table
);
438 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
442 gl_state
.e_property
= i
->position
- gl_state
.offset
;
443 gl_state
.forward_i
= i
;
448 = i
->position
+ LENGTH (i
) - gl_state
.offset
;
449 gl_state
.backward_i
= i
;
453 else if (cnt
== INTERVALS_AT_ONCE
)
458 = i
->position
+ LENGTH (i
) - gl_state
.offset
459 /* e_property at EOB is not set to ZV but to ZV+1, so that
460 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
461 having to check eob between the two. */
462 + (next_interval (i
) ? 0 : 1);
463 gl_state
.forward_i
= i
;
467 gl_state
.b_property
= i
->position
- gl_state
.offset
;
468 gl_state
.backward_i
= i
;
473 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
475 eassert (i
== NULL
); /* This property goes to the end. */
478 gl_state
.e_property
= gl_state
.stop
;
479 gl_state
.forward_i
= i
;
482 gl_state
.b_property
= gl_state
.start
;
486 parse_sexp_propertize (ptrdiff_t charpos
)
489 if (syntax_propertize__done
<= charpos
490 && syntax_propertize__done
< zv
)
492 EMACS_INT modiffs
= CHARS_MODIFF
;
493 safe_call1 (Qinternal__syntax_propertize
,
494 make_number (min (zv
, 1 + charpos
)));
495 if (modiffs
!= CHARS_MODIFF
)
496 error ("parse-sexp-propertize-function modified the buffer!");
497 if (syntax_propertize__done
<= charpos
498 && syntax_propertize__done
< zv
)
499 error ("parse-sexp-propertize-function did not move"
500 " syntax-propertize--done");
501 SETUP_SYNTAX_TABLE (charpos
, 1);
503 else if (gl_state
.e_property
> syntax_propertize__done
)
505 gl_state
.e_property
= syntax_propertize__done
;
506 gl_state
.e_property_truncated
= true;
508 else if (gl_state
.e_property_truncated
509 && gl_state
.e_property
< syntax_propertize__done
)
510 { /* When moving backward, e_property might be set without resetting
511 e_property_truncated, so the e_property_truncated flag may
512 occasionally be left raised spuriously. This should be rare. */
513 gl_state
.e_property_truncated
= false;
514 update_syntax_table_forward (charpos
, false, Qnil
);
519 update_syntax_table_forward (ptrdiff_t charpos
, bool init
,
522 if (gl_state
.e_property_truncated
)
524 eassert (NILP (object
));
525 eassert (charpos
>= gl_state
.e_property
);
526 parse_sexp_propertize (charpos
);
530 update_syntax_table (charpos
, 1, init
, object
);
531 if (NILP (object
) && gl_state
.e_property
> syntax_propertize__done
)
532 parse_sexp_propertize (charpos
);
536 /* Returns true if char at CHARPOS is quoted.
537 Global syntax-table data should be set up already to be good at CHARPOS
538 or after. On return global syntax data is good for lookup at CHARPOS. */
541 char_quoted (ptrdiff_t charpos
, ptrdiff_t bytepos
)
543 enum syntaxcode code
;
544 ptrdiff_t beg
= BEGV
;
546 ptrdiff_t orig
= charpos
;
548 while (charpos
> beg
)
551 DEC_BOTH (charpos
, bytepos
);
553 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
554 c
= FETCH_CHAR_AS_MULTIBYTE (bytepos
);
556 if (! (code
== Scharquote
|| code
== Sescape
))
562 UPDATE_SYNTAX_TABLE (orig
);
566 /* Return the bytepos one character before BYTEPOS.
567 We assume that BYTEPOS is not at the start of the buffer. */
570 dec_bytepos (ptrdiff_t bytepos
)
572 if (NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
579 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
582 prev_char_comend_first (ptrdiff_t pos
, ptrdiff_t pos_byte
)
587 DEC_BOTH (pos
, pos_byte
);
588 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
589 c
= FETCH_CHAR (pos_byte
);
590 val
= SYNTAX_COMEND_FIRST (c
);
591 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
595 /* `literal-cache' text properties
596 -------------------------------
597 These are applied to all text between BOB and `literal-cache-hwm'
598 which is in literals. They record what type of literal the current
601 On a buffer change (when `inhibit-modification-hooks' is nil), any
602 buffer change (including changing text-properties) will reduce
603 `literal-cache-hwm' to the change position, if it is higher. When
604 `inhibit-modification-hooks' is non-nil, only changes to the
605 `syntax-table' text property (possibly via a `category' text property)
606 which affect the scanning of literals cause the setting of
609 The `literal-cache' text property for a literal is applied on the text
610 between just after its opening delimiter and just after its closing
613 The value of the `literal-cache' text property is a cons. For a
614 string, its car is the symbol `string' and its cdr is the expected
615 closing delimiter (or ST_STRING_STYLE in the case of a string fence
616 string). For a comment, the car is -1 for a non-nestable comment, or
617 the current nesting depth for a nestable comment. When not in a
618 literal, no `literal-cache' text property exists at that place. These
619 values match the internal values used in `scan_sexps_forward. */
621 DEFUN ("trim-literal-cache", Ftrim_literal_cache
, Strim_literal_cache
, 0, 1, 0,
622 doc
: /* Mark the selected buffer's "comment cache" as invalid from POS.
623 By default, POS is the beginning of the buffer (position 1). If the cache is
624 already invalid from an earlier position than POS, this function has no
625 effect. The return value is the new bound. */)
628 ptrdiff_t position
, cache_limit
;
633 position
= max (XINT (pos
), 1);
637 cache_limit
= XINT (BVAR (current_buffer
, literal_cache_hwm
));
638 BVAR (current_buffer
, literal_cache_hwm
)
639 = make_number (min (cache_limit
, position
));
640 return BVAR (current_buffer
, literal_cache_hwm
);
643 /* Empty the literal-cache of every buffer whose syntax table is
644 currently set to SYNTAB. */
646 empty_syntax_tables_buffers_literal_caches (Lisp_Object syntab
)
648 Lisp_Object buf
, buf_list
;
649 Lisp_Object one
= make_number (1);
652 buf_list
= Fbuffer_list (Qnil
);
653 while (!NILP (buf_list
))
655 buf
= XCAR (buf_list
);
657 if (EQ (BVAR (b
, syntax_table
), syntab
))
658 BVAR (b
, literal_cache_hwm
) = one
;
659 buf_list
= XCDR (buf_list
);
663 #define LITERAL_MASK ((1 << Sstring) \
665 | (1 << Scharquote) \
667 | (1 << Sendcomment) \
668 | (1 << Scomment_fence) \
669 | (1 << Sstring_fence))
671 /* The following returns true if ELT (which will be a raw syntax
672 descriptor (see page "Syntax Table Internals" in the Elisp manual)
673 or nil) represents a syntax which is (potentially) relevant to
674 strings or comments. */
676 SYNTAB_LITERAL (Lisp_Object elt
)
681 ielt
= XINT (XCAR (elt
));
682 return (ielt
& 0xF0000) /* a comment flag is set */
683 || ((1 << (ielt
& 0xFF)) & LITERAL_MASK
); /* One of Sstring, .... */
687 bool syntax_table_value_is_interesting_for_literals (Lisp_Object val
)
689 ptrdiff_t syntax
, code
;
691 || !INTEGERP (XCAR (val
)))
693 return SYNTAB_LITERAL (XCAR (val
));
696 /* The text property PROP is having its value VAL at position POS in buffer BUF
697 either set or cleared. If this value is relevant to the syntax of literals,
698 reduce the BUF's value of literal_cache_hwm to POS. */
700 check_literal_cache_hwm_for_prop (ptrdiff_t pos
, Lisp_Object prop
,
701 Lisp_Object val
, Lisp_Object buffer
)
707 if (!BUFFERP (buffer
))
709 b
= XBUFFER (buffer
);
710 hwm
= XINT (BVAR (b
, literal_cache_hwm
));
714 if (EQ (prop
, Qcategory
)
717 plist
= Fsymbol_plist (val
);
718 while (CONSP (plist
))
721 plist
= XCDR (plist
);
725 if (EQ (prop
, Qsyntax_table
))
727 plist
= XCDR (plist
);
730 if (EQ (prop
, Qsyntax_table
)
731 && syntax_table_value_is_interesting_for_literals (val
))
732 BVAR (b
, literal_cache_hwm
) = make_number (pos
);
735 /* Scan forward over the innards of a containing comment, marking
736 nested comments. FROM/FROM_BYTE, TO delimit the region to be marked.
737 LITERAL_CACHE_VALUE is the value of the `literal-cache' property that
738 was applied to the containing comment. */
740 scan_nested_comments_forward (ptrdiff_t from
, ptrdiff_t from_byte
,
742 Lisp_Object literal_cache_value
)
745 int comstyle
= XINT (XCDR (literal_cache_value
));
746 struct lisp_parse_state state
;
748 /* Increment the nesting depth. */
749 literal_cache_value
=
750 Fcons (make_number (XINT (XCAR (literal_cache_value
)) + 1),
751 XCDR (literal_cache_value
));
752 /* Make sure our text property value is `eq' to other values which
754 tem
= Fmember (literal_cache_value
, Vliteral_cache_values
);
756 literal_cache_value
= XCAR (tem
);
758 Vliteral_cache_values
= Fcons (literal_cache_value
,
759 Vliteral_cache_values
);
761 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
762 internalize_parse_state (Qnil
, &state
);
766 scan_sexps_forward (&state
, from
, from_byte
, to
,
767 TYPE_MINIMUM (EMACS_INT
), false,
768 -1); /* Stop after literal boundary. */
769 from
= state
.location
;
770 from_byte
= state
.location_byte
;
772 if (state
.instring
!= -1)
773 state
.instring
= -1; /* Ignore string delim we've passed. */
774 else if (state
.incomment
<= 0
775 || state
.comstyle
!= comstyle
)
776 state
.incomment
= 0; /* Ignore a wrong type comment opener
780 /* We're at the start of the innards of a nested comment
781 of the right type. We know the next scan will stop at
782 the end of this comment. */
783 scan_sexps_forward (&state
, from
, from_byte
, to
,
784 TYPE_MINIMUM (EMACS_INT
), false,
786 Fput_text_property (make_number (from
),
787 make_number (state
.location
),
789 literal_cache_value
, Qnil
);
790 scan_nested_comments_forward (from
, from_byte
,
792 literal_cache_value
);
793 from
= state
.location
;
794 from_byte
= state
.location_byte
;
799 /* Scan forward over all text between literal-cache-hwm and TO,
800 marking literals (strings and comments) with the `literal-cache'
801 text property. `literal-cache-hwm' is updated to TO. */
803 scan_comments_forward_to (ptrdiff_t to
, ptrdiff_t to_byte
)
805 ptrdiff_t count
= SPECPDL_INDEX ();
806 ptrdiff_t hwm
, hwm_byte
;
807 struct lisp_parse_state state
;
808 ptrdiff_t orig_begv
= BEGV
, orig_begv_byte
= BEGV_BYTE
;
809 ptrdiff_t tmp
, tmp_byte
;
811 enum syntaxcode code
;
813 Lisp_Object literal_cache_value
;
816 hwm
= XINT (BVAR (current_buffer
, literal_cache_hwm
));
820 record_unwind_protect (save_restriction_restore
,
821 save_restriction_save ());
822 BEGV
= BEG
; BEGV_BYTE
= BEG_BYTE
;
824 hwm_byte
= CHAR_TO_BYTE (hwm
);
825 /* We mustn't start scanning just after the first half of a
826 double character comment starter or ender. */
829 tmp
= hwm
; tmp_byte
= hwm_byte
;
832 DEC_BOTH (tmp
, tmp_byte
);
833 UPDATE_SYNTAX_TABLE_BACKWARD (tmp
);
834 c
= FETCH_CHAR_AS_MULTIBYTE (tmp_byte
);
835 syntax
= SYNTAX_WITH_FLAGS (c
);
840 || (syntax
& 0xF0000))); /* Flags `1', `2', `3', `4'. */
842 INC_BOTH (tmp
, tmp_byte
);
843 hwm
= tmp
; hwm_byte
= tmp_byte
;
846 internalize_parse_state (Qnil
, &state
);
848 /* Initialize STATE with the current value of the
849 `literal-cache' text property. */
851 depth
= Fget_text_property (make_number (hwm
- 1),
852 Qliteral_cache
, Qnil
);
855 if (EQ (Fcar (depth
), Qstring
))
857 state
.instring
= XINT (Fcdr (depth
));
863 state
.incomment
= XINT (Fcar (depth
));
864 state
.comstyle
= XINT (Fcdr (depth
));
870 /* Setup the buffer to write text properties discreetly. */
871 Lisp_Object modified
= Fbuffer_modified_p (Qnil
);
872 ptrdiff_t count1
= SPECPDL_INDEX ();
874 specbind (Qinhibit_modification_hooks
, Qt
);
875 specbind (intern ("buffer-undo-list"), Qt
);
876 specbind (Qinhibit_read_only
, Qt
);
877 specbind (Qdeactivate_mark
, Qnil
);
879 record_unwind_protect
880 ((void (*) (Lisp_Object
))Frestore_buffer_modified_p
, Qnil
);
884 /* For each literal we scan, we apply the `literal-cache'
885 property on its innards and closing delimiter. Calculate
886 the value we will use first. */
887 literal_cache_value
= (state
.instring
!= -1)
888 ? Fcons (Qstring
, make_number (state
.instring
))
890 ? Fcons (make_number (state
.incomment
),
891 make_number (state
.comstyle
))
893 /* Ensure all `equal' values of literal-cache-value are also `eq'. */
894 if (!NILP (literal_cache_value
))
896 tem
= Fmember (literal_cache_value
, Vliteral_cache_values
);
898 literal_cache_value
= XCAR (tem
);
900 Vliteral_cache_values
= Fcons (literal_cache_value
,
901 Vliteral_cache_values
);
904 scan_sexps_forward (&state
, hwm
, hwm_byte
, to
,
905 TYPE_MINIMUM (EMACS_INT
), false,
906 -1); /* stop after literal boundary */
908 if (!NILP (literal_cache_value
))
909 Fput_text_property (make_number (hwm
),
910 make_number (state
.location
),
912 literal_cache_value
, Qnil
);
914 Fremove_list_of_text_properties
916 make_number (state
.location
),
917 Fcons (Qliteral_cache
, Qnil
), Qnil
);
919 if (!NILP (literal_cache_value
)
920 && NUMBERP (XCAR (literal_cache_value
))
921 && XINT (XCAR (literal_cache_value
)) > 0)
922 scan_nested_comments_forward
923 (hwm
, hwm_byte
, state
.location
, literal_cache_value
);
925 hwm
= state
.location
;
926 hwm_byte
= state
.location_byte
;
928 unbind_to (count1
, Qnil
);
930 /* Frestore_buffer_modified_p overwrites gl_state, hence: */
931 SETUP_SYNTAX_TABLE (to
, -1);
933 BVAR (current_buffer
, literal_cache_hwm
) = make_number (hwm
);
934 unbind_to (count
, Qnil
);
938 /* Check whether charpos FROM is at the end of a comment.
939 FROM_BYTE is the bytepos corresponding to FROM.
940 Do not move back before STOP.
942 Return true if we find a comment ending at FROM/FROM_BYTE.
944 If successful, store the charpos of the comment's beginning
945 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
947 Global syntax data remains valid for backward search starting at
948 the returned value (or at FROM, if the search was not successful). */
950 back_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
951 bool comnested
, int comstyle
, ptrdiff_t *charpos_ptr
,
952 ptrdiff_t *bytepos_ptr
)
955 ptrdiff_t literal_cache
, target_depth
, comment_style
;
960 scan_comments_forward_to (from
, from_byte
);
963 depth
= Fget_text_property (make_number (from
- 1), Qliteral_cache
, Qnil
);
964 if (!CONSP (depth
) /* nil, not in a literal. */
965 || !INTEGERP (XCAR (depth
))) /* A string. */
967 literal_cache
= XINT (XCAR (depth
));
968 comment_style
= XINT (XCDR (depth
));
969 if (comment_style
!= comstyle
) /* Wrong sort of comment. This
970 can happen with "*|" at the
971 end of a "||" line comment. */
974 /* literal_cache: -1 is a non-nested comment, otherwise it's
975 the depth of nesting of nested comments. */
976 target_depth
= literal_cache
< 0 ? 0 : literal_cache
- 1;
979 temp
= Fprevious_single_property_change (make_number (from
),
980 Qliteral_cache
, Qnil
, Qnil
);
986 && (depth
= Fget_text_property (make_number (from
- 1),
987 Qliteral_cache
, Qnil
),
989 && XINT (XCAR (depth
)) > target_depth
);
992 from_byte
= CHAR_TO_BYTE (from
);
994 /* Having passed back over the body of the comment, we should now find a
996 DEC_BOTH (from
, from_byte
);
997 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
999 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1000 syntax
= SYNTAX_WITH_FLAGS (c
);
1002 if (code
!= Scomment
&& code
!= Scomment_fence
)
1006 if (!SYNTAX_FLAGS_COMSTART_SECOND (syntax
))
1008 DEC_BOTH (from
, from_byte
);
1009 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1010 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1011 syntax
= SYNTAX_WITH_FLAGS (c
);
1012 if (!SYNTAX_FLAGS_COMSTART_FIRST (syntax
))
1015 *charpos_ptr
= from
;
1016 *bytepos_ptr
= from_byte
;
1020 /* If the two syntax entries OLD_SYN and NEW_SYN would parse strings
1021 or comments differently return true, otherwise return nil. */
1023 literally_different (Lisp_Object old_syn
, Lisp_Object new_syn
)
1025 bool old_literality
= SYNTAB_LITERAL (old_syn
),
1026 new_literality
= SYNTAB_LITERAL (new_syn
);
1027 return (old_literality
!= new_literality
)
1029 && (!EQ (XCAR (old_syn
), XCAR (new_syn
))));
1032 /* If there is a character position in the range [START, END] for
1033 whose syntaxes in syntax tables OLD and NEW strings or comments
1034 might be parsed differently, return the lowest character for which
1035 this holds. Otherwise, return -1. */
1037 syntax_table_ranges_differ_literally_p (Lisp_Object old
, Lisp_Object
new,
1040 int old_from
, new_from
, old_to
, new_to
;
1041 Lisp_Object old_syn
, new_syn
;
1042 bool old_literality
, new_literality
;
1044 new_from
= old_from
= start
;
1045 new_to
= old_to
= -1;
1047 while ((old_from
< end
) && (new_from
< end
))
1049 if (old_from
== new_from
)
1051 old_syn
= char_table_ref_and_range_with_parents (old
, old_from
,
1052 &old_from
, &old_to
);
1053 new_syn
= char_table_ref_and_range_with_parents (new, new_from
,
1054 &new_from
, &new_to
);
1055 if (literally_different (old_syn
, new_syn
))
1057 old_from
= old_to
+ 1;
1058 new_from
= new_to
+ 1;
1062 else if (old_from
< new_from
)
1064 old_syn
= char_table_ref_and_range_with_parents (old
, old_from
,
1065 &old_from
, &old_to
);
1066 if (literally_different (old_syn
, new_syn
))
1068 old_from
= old_to
+ 1;
1073 new_syn
= char_table_ref_and_range_with_parents (new, new_from
,
1074 &new_from
, &new_to
);
1075 if (literally_different (old_syn
, new_syn
))
1077 new_from
= new_to
+ 1;
1084 DEFUN ("least-literal-difference-between-syntax-tables",
1085 Fleast_literal_difference_between_syntax_tables
,
1086 Sleast_literal_difference_between_syntax_tables
,
1088 doc
: /* Lowest char whose different syntaxes in OLD and NEW parse literals differently.
1089 OLD and NEW are syntax tables. */)
1090 (Lisp_Object old
, Lisp_Object
new)
1094 check_syntax_table (old
);
1095 check_syntax_table (new);
1096 c
= syntax_table_ranges_differ_literally_p (old
, new, 0, MAX_CHAR
+ 1);
1098 return make_number (c
);
1102 DEFUN ("syntax-tables-literally-different-p",
1103 Fsyntax_tables_literally_different_p
,
1104 Ssyntax_tables_literally_different_p
,
1106 doc
: /* Will syntax tables OLD and NEW parse literals differently?
1107 Return t when OLD and NEW might parse comments and strings differently,
1108 otherwise nil. (Use `least-literal-difference-between-syntax-tables'
1109 to locate a character position where the tables differ.) */)
1110 (Lisp_Object old
, Lisp_Object
new)
1114 check_syntax_table (old
);
1115 check_syntax_table (new);
1116 /* Check to see if there is a cached relationship between the tables. */
1117 if (Fmemq (new, XCHAR_TABLE (old
)->extras
[0]))
1119 if (Fmemq (new, XCHAR_TABLE (old
)->extras
[1]))
1121 /* the two tables have no known relationship, so we'll have
1122 laboriously to compare them. */
1123 if (syntax_table_ranges_differ_literally_p (old
, new, 0, MAX_CHAR
+ 1) >= 0)
1125 /* mark the "literally different" relationship between the OLD and
1126 NEW syntax tables. */
1127 extra
= Fcons (new, XCHAR_TABLE (old
)->extras
[1]);
1128 XCHAR_TABLE (old
)->extras
[1] = extra
;
1129 extra
= Fcons (old
, XCHAR_TABLE (new)->extras
[1]);
1130 XCHAR_TABLE (new)->extras
[1] = extra
;
1135 /* mark the "not literally different" relationship between the OLD
1136 and NEW syntax tables. */
1137 extra
= Fcons (new, XCHAR_TABLE (old
)->extras
[0]);
1138 XCHAR_TABLE (old
)->extras
[0] = extra
;
1139 extra
= Fcons (old
, XCHAR_TABLE (new)->extras
[0]);
1140 XCHAR_TABLE (new)->extras
[0] = extra
;
1145 /* If any character in the range [START, END) has an entry in syntax
1146 table SYNTAB which is relevant to literal parsing, return true,
1147 else return false. */
1149 syntax_table_value_range_is_interesting_for_literals (Lisp_Object syntab
,
1159 syn
= char_table_ref_and_range_with_parents (syntab
, from
, &from
, &to
);
1160 if (SYNTAB_LITERAL (syn
))
1169 /* In the syntax table SYNTAB, in the 0th and 1st extra slots are
1170 lists of other syntax tables which are known to be "literally the
1171 same" and "literally different" respectively. Those other tables
1172 will each contain SYNTAB in their extra slots. Remove all these
1173 syntax tables from all these extra slots; this will leave both of
1174 the slots on SYNTAB nil. */
1176 break_off_syntax_tables_literal_relations (Lisp_Object syntab
)
1178 struct Lisp_Char_Table
*c
= XCHAR_TABLE (syntab
);
1179 Lisp_Object remote_tab
;
1180 struct Lisp_Char_Table
*r
;
1181 Lisp_Object syntab_extra
, remote_extra
;
1183 syntab_extra
= c
->extras
[0];
1184 while (!NILP (syntab_extra
))
1186 remote_tab
= XCAR (syntab_extra
);
1187 r
= XCHAR_TABLE (remote_tab
);
1188 remote_extra
= r
->extras
[0];
1189 r
->extras
[0] = Fdelq (syntab
, remote_extra
);
1190 syntab_extra
= XCDR (syntab_extra
);
1192 c
->extras
[0] = Qnil
;
1194 syntab_extra
= c
->extras
[1];
1195 while (!NILP (syntab_extra
))
1197 remote_tab
= XCAR (syntab_extra
);
1198 r
= XCHAR_TABLE (remote_tab
);
1199 remote_extra
= r
->extras
[1];
1200 r
->extras
[1] = Fdelq (syntab
, remote_extra
);
1201 syntab_extra
= XCDR (syntab_extra
);
1203 c
->extras
[1] = Qnil
;
1207 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
1208 doc
: /* Return t if OBJECT is a syntax table.
1209 Currently, any char-table counts as a syntax table. */)
1210 (Lisp_Object object
)
1212 if (CHAR_TABLE_P (object
)
1213 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
1219 check_syntax_table (Lisp_Object obj
)
1221 CHECK_TYPE (CHAR_TABLE_P (obj
) && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
),
1222 Qsyntax_table_p
, obj
);
1225 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
1226 doc
: /* Return the current syntax table.
1227 This is the one specified by the current buffer. */)
1230 return BVAR (current_buffer
, syntax_table
);
1233 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
1234 Sstandard_syntax_table
, 0, 0, 0,
1235 doc
: /* Return the standard syntax table.
1236 This is the one used for new buffers. */)
1239 return Vstandard_syntax_table
;
1242 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
1243 doc
: /* Construct a new syntax table and return it.
1244 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1250 check_syntax_table (table
);
1252 table
= Vstandard_syntax_table
;
1254 copy
= Fcopy_sequence (table
);
1256 /* Only the standard syntax table should have a default element.
1257 Other syntax tables should inherit from parents instead. */
1258 set_char_table_defalt (copy
, Qnil
);
1260 /* Copied syntax tables should all have parents.
1261 If we copied one with no parent, such as the standard syntax table,
1262 use the standard syntax table as the copy's parent. */
1263 if (NILP (XCHAR_TABLE (copy
)->parent
))
1264 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
1268 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
1269 doc
: /* Select a new syntax table for the current buffer.
1270 One argument, a syntax table. */)
1274 check_syntax_table (table
);
1275 if (Fsyntax_table_p (BVAR (current_buffer
, syntax_table
))
1276 && !NILP (Fsyntax_tables_literally_different_p
1277 (BVAR (current_buffer
, syntax_table
), table
)))
1278 Ftrim_literal_cache (Qnil
);
1279 bset_syntax_table (current_buffer
, table
);
1280 /* Indicate that this buffer now has a specified syntax table. */
1281 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
1282 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
1286 /* Convert a letter which signifies a syntax code
1287 into the code it signifies.
1288 This is used by modify-syntax-entry, and other things. */
1290 unsigned char const syntax_spec_code
[0400] =
1291 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1292 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1293 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1294 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1295 Swhitespace
, Scomment_fence
, Sstring
, 0377, Smath
, 0377, 0377, Squote
,
1296 Sopen
, Sclose
, 0377, 0377, 0377, Swhitespace
, Spunct
, Scharquote
,
1297 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1298 0377, 0377, 0377, 0377, Scomment
, 0377, Sendcomment
, 0377,
1299 Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1300 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1301 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1302 0377, 0377, 0377, 0377, Sescape
, 0377, 0377, Ssymbol
,
1303 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1304 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1305 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
1306 0377, 0377, 0377, 0377, Sstring_fence
, 0377, 0377, 0377
1309 /* Indexed by syntax code, give the letter that describes it. */
1311 char const syntax_code_spec
[16] =
1313 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1317 /* Indexed by syntax code, give the object (cons of syntax code and
1318 nil) to be stored in syntax table. Since these objects can be
1319 shared among syntax tables, we generate them in advance. By
1320 sharing objects, the function `describe-syntax' can give a more
1322 static Lisp_Object Vsyntax_code_object
;
1325 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
1326 doc
: /* Return the syntax code of CHARACTER, described by a character.
1327 For example, if CHARACTER is a word constituent, the
1328 character `w' (119) is returned.
1329 The characters that correspond to various syntax codes
1330 are listed in the documentation of `modify-syntax-entry'. */)
1331 (Lisp_Object character
)
1334 CHECK_CHARACTER (character
);
1335 char_int
= XINT (character
);
1336 SETUP_BUFFER_SYNTAX_TABLE ();
1337 return make_number (syntax_code_spec
[SYNTAX (char_int
)]);
1340 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
1341 doc
: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1342 (Lisp_Object character
)
1345 enum syntaxcode code
;
1346 CHECK_CHARACTER (character
);
1347 char_int
= XINT (character
);
1348 SETUP_BUFFER_SYNTAX_TABLE ();
1349 code
= SYNTAX (char_int
);
1350 if (code
== Sopen
|| code
== Sclose
)
1351 return SYNTAX_MATCH (char_int
);
1355 DEFUN ("string-to-syntax", Fstring_to_syntax
, Sstring_to_syntax
, 1, 1, 0,
1356 doc
: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1357 STRING should be a string of the form allowed as argument of
1358 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1359 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1360 the value of a `syntax-table' text property. */)
1361 (Lisp_Object string
)
1363 const unsigned char *p
;
1367 CHECK_STRING (string
);
1370 val
= syntax_spec_code
[*p
++];
1372 error ("Invalid syntax description letter: %c", p
[-1]);
1374 if (val
== Sinherit
)
1380 int character
= STRING_CHAR_AND_LENGTH (p
, len
);
1381 XSETINT (match
, character
);
1382 if (XFASTINT (match
) == ' ')
1425 if (val
< ASIZE (Vsyntax_code_object
) && NILP (match
))
1426 return AREF (Vsyntax_code_object
, val
);
1428 /* Since we can't use a shared object, let's make a new one. */
1429 return Fcons (make_number (val
), match
);
1432 /* I really don't know why this is interactive
1433 help-form should at least be made useful whilst reading the second arg. */
1434 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
1435 "cSet syntax for character: \nsSet syntax for %s to: ",
1436 doc
: /* Set syntax for character CHAR according to string NEWENTRY.
1437 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1438 the current buffer's syntax table.
1439 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1440 in the range MIN to MAX are changed.
1441 The first character of NEWENTRY should be one of the following:
1442 Space or - whitespace syntax. w word constituent.
1443 _ symbol constituent. . punctuation.
1444 ( open-parenthesis. ) close-parenthesis.
1445 " string quote. \\ escape.
1446 $ paired delimiter. \\=' expression quote or prefix operator.
1447 < comment starter. > comment ender.
1448 / character-quote. @ inherit from parent table.
1449 | generic string fence. ! generic comment fence.
1451 Only single-character comment start and end sequences are represented thus.
1452 Two-character sequences are represented as described below.
1453 The second character of NEWENTRY is the matching parenthesis,
1454 used only if the first character is `(' or `)'.
1455 Any additional characters are flags.
1456 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1457 1 means CHAR is the start of a two-char comment start sequence.
1458 2 means CHAR is the second character of such a sequence.
1459 3 means CHAR is the start of a two-char comment end sequence.
1460 4 means CHAR is the second character of such a sequence.
1462 There can be several orthogonal comment sequences. This is to support
1463 language modes such as C++. By default, all comment sequences are of style
1464 a, but you can set the comment sequence style to b (on the second character
1465 of a comment-start, and the first character of a comment-end sequence) and/or
1466 c (on any of its chars) using this flag:
1467 b means CHAR is part of comment sequence b.
1468 c means CHAR is part of comment sequence c.
1469 n means CHAR is part of a nestable comment sequence.
1471 p means CHAR is a prefix character for `backward-prefix-chars';
1472 such characters are treated as whitespace when they occur
1473 between expressions.
1474 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1475 (Lisp_Object c
, Lisp_Object newentry
, Lisp_Object syntax_table
)
1479 CHECK_CHARACTER_CAR (c
);
1480 CHECK_CHARACTER_CDR (c
);
1483 CHECK_CHARACTER (c
);
1485 if (NILP (syntax_table
))
1486 syntax_table
= BVAR (current_buffer
, syntax_table
);
1488 check_syntax_table (syntax_table
);
1490 newentry
= Fstring_to_syntax (newentry
);
1491 if (SYNTAB_LITERAL (newentry
)
1493 ? syntax_table_value_range_is_interesting_for_literals
1494 (syntax_table
, XINT (XCAR(c
)), XINT (XCDR (c
)))
1495 : (SYNTAB_LITERAL (c
))))
1497 empty_syntax_tables_buffers_literal_caches (syntax_table
);
1498 break_off_syntax_tables_literal_relations (syntax_table
);
1502 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table
, c
, newentry
);
1504 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
1506 /* We clear the regexp cache, since character classes can now have
1507 different values from those in the compiled regexps.*/
1508 clear_regexp_cache ();
1514 /* Dump syntax table to buffer in human-readable format */
1516 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value
,
1517 Sinternal_describe_syntax_value
, 1, 1, 0,
1518 doc
: /* Insert a description of the internal syntax description SYNTAX at point. */)
1519 (Lisp_Object syntax
)
1521 int code
, syntax_code
;
1522 bool start1
, start2
, end1
, end2
, prefix
, comstyleb
, comstylec
, comnested
;
1524 Lisp_Object first
, match_lisp
, value
= syntax
;
1528 insert_string ("default");
1532 if (CHAR_TABLE_P (value
))
1534 insert_string ("deeper char-table ...");
1540 insert_string ("invalid");
1544 first
= XCAR (value
);
1545 match_lisp
= XCDR (value
);
1547 if (!INTEGERP (first
) || !(NILP (match_lisp
) || CHARACTERP (match_lisp
)))
1549 insert_string ("invalid");
1553 syntax_code
= XINT (first
) & INT_MAX
;
1554 code
= syntax_code
& 0377;
1555 start1
= SYNTAX_FLAGS_COMSTART_FIRST (syntax_code
);
1556 start2
= SYNTAX_FLAGS_COMSTART_SECOND (syntax_code
);
1557 end1
= SYNTAX_FLAGS_COMEND_FIRST (syntax_code
);
1558 end2
= SYNTAX_FLAGS_COMEND_SECOND (syntax_code
);
1559 prefix
= SYNTAX_FLAGS_PREFIX (syntax_code
);
1560 comstyleb
= SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code
);
1561 comstylec
= SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code
);
1562 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax_code
);
1566 insert_string ("invalid");
1570 str
[0] = syntax_code_spec
[code
], str
[1] = 0;
1573 if (NILP (match_lisp
))
1576 insert_char (XINT (match_lisp
));
1597 insert_string ("\twhich means: ");
1602 insert_string ("whitespace"); break;
1604 insert_string ("punctuation"); break;
1606 insert_string ("word"); break;
1608 insert_string ("symbol"); break;
1610 insert_string ("open"); break;
1612 insert_string ("close"); break;
1614 insert_string ("prefix"); break;
1616 insert_string ("string"); break;
1618 insert_string ("math"); break;
1620 insert_string ("escape"); break;
1622 insert_string ("charquote"); break;
1624 insert_string ("comment"); break;
1626 insert_string ("endcomment"); break;
1628 insert_string ("inherit"); break;
1629 case Scomment_fence
:
1630 insert_string ("comment fence"); break;
1632 insert_string ("string fence"); break;
1634 insert_string ("invalid");
1638 if (!NILP (match_lisp
))
1640 insert_string (", matches ");
1641 insert_char (XINT (match_lisp
));
1645 insert_string (",\n\t is the first character of a comment-start sequence");
1647 insert_string (",\n\t is the second character of a comment-start sequence");
1650 insert_string (",\n\t is the first character of a comment-end sequence");
1652 insert_string (",\n\t is the second character of a comment-end sequence");
1654 insert_string (" (comment style b)");
1656 insert_string (" (comment style c)");
1658 insert_string (" (nestable)");
1662 AUTO_STRING (prefixdoc
,
1663 ",\n\t is a prefix character for `backward-prefix-chars'");
1664 insert1 (Fsubstitute_command_keys (prefixdoc
));
1670 /* Return the position across COUNT words from FROM.
1671 If that many words cannot be found before the end of the buffer, return 0.
1672 COUNT negative means scan backward and stop at word beginning. */
1675 scan_words (ptrdiff_t from
, EMACS_INT count
)
1677 ptrdiff_t beg
= BEGV
;
1679 ptrdiff_t from_byte
= CHAR_TO_BYTE (from
);
1680 enum syntaxcode code
;
1682 Lisp_Object func
, pos
;
1684 SETUP_SYNTAX_TABLE (from
, count
);
1692 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1693 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1694 code
= SYNTAX (ch0
);
1695 INC_BOTH (from
, from_byte
);
1696 if (words_include_escapes
1697 && (code
== Sescape
|| code
== Scharquote
))
1703 /* Now CH0 is a character which begins a word and FROM is the
1704 position of the next character. */
1705 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch0
);
1706 if (! NILP (Ffboundp (func
)))
1708 pos
= call2 (func
, make_number (from
- 1), make_number (end
));
1709 if (INTEGERP (pos
) && from
< XINT (pos
) && XINT (pos
) <= ZV
)
1712 from_byte
= CHAR_TO_BYTE (from
);
1719 if (from
== end
) break;
1720 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1721 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1722 code
= SYNTAX (ch1
);
1724 && (! words_include_escapes
1725 || (code
!= Sescape
&& code
!= Scharquote
)))
1726 || word_boundary_p (ch0
, ch1
))
1728 INC_BOTH (from
, from_byte
);
1741 DEC_BOTH (from
, from_byte
);
1742 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1743 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1744 code
= SYNTAX (ch1
);
1745 if (words_include_escapes
1746 && (code
== Sescape
|| code
== Scharquote
))
1752 /* Now CH1 is a character which ends a word and FROM is the
1754 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch1
);
1755 if (! NILP (Ffboundp (func
)))
1757 pos
= call2 (func
, make_number (from
), make_number (beg
));
1758 if (INTEGERP (pos
) && BEGV
<= XINT (pos
) && XINT (pos
) < from
)
1761 from_byte
= CHAR_TO_BYTE (from
);
1770 DEC_BOTH (from
, from_byte
);
1771 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1772 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1773 code
= SYNTAX (ch0
);
1775 && (! words_include_escapes
1776 || (code
!= Sescape
&& code
!= Scharquote
)))
1777 || word_boundary_p (ch0
, ch1
))
1779 INC_BOTH (from
, from_byte
);
1792 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 0, 1, "^p",
1793 doc
: /* Move point forward ARG words (backward if ARG is negative).
1794 If ARG is omitted or nil, move point forward one word.
1796 If an edge of the buffer or a field boundary is reached, point is
1797 left there and the function returns nil. Field boundaries are not
1798 noticed if `inhibit-field-text-motion' is non-nil.
1800 The word boundaries are normally determined by the buffer's syntax
1801 table, but `find-word-boundary-function-table', such as set up
1802 by `subword-mode', can change that. If a Lisp program needs to
1803 move by words determined strictly by the syntax table, it should
1804 use `forward-word-strictly' instead. */)
1808 ptrdiff_t orig_val
, val
;
1811 XSETFASTINT (arg
, 1);
1815 val
= orig_val
= scan_words (PT
, XINT (arg
));
1817 val
= XINT (arg
) > 0 ? ZV
: BEGV
;
1819 /* Avoid jumping out of an input field. */
1820 tmp
= Fconstrain_to_field (make_number (val
), make_number (PT
),
1822 val
= XFASTINT (tmp
);
1825 return val
== orig_val
? Qt
: Qnil
;
1828 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1829 doc
: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1830 STRING is like the inside of a `[...]' in a regular expression
1831 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1832 (but not at the end of a range; quoting is never needed there).
1833 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1834 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1835 Char classes, e.g. `[:alpha:]', are supported.
1837 Returns the distance traveled, either zero or positive. */)
1838 (Lisp_Object string
, Lisp_Object lim
)
1840 return skip_chars (1, string
, lim
, 1);
1843 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1844 doc
: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1845 See `skip-chars-forward' for details.
1846 Returns the distance traveled, either zero or negative. */)
1847 (Lisp_Object string
, Lisp_Object lim
)
1849 return skip_chars (0, string
, lim
, 1);
1852 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1853 doc
: /* Move point forward across chars in specified syntax classes.
1854 SYNTAX is a string of syntax code characters.
1855 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1856 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1857 This function returns the distance traveled, either zero or positive. */)
1858 (Lisp_Object syntax
, Lisp_Object lim
)
1860 return skip_syntaxes (1, syntax
, lim
);
1863 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1864 doc
: /* Move point backward across chars in specified syntax classes.
1865 SYNTAX is a string of syntax code characters.
1866 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1867 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1868 This function returns either zero or a negative number, and the absolute value
1869 of this is the distance traveled. */)
1870 (Lisp_Object syntax
, Lisp_Object lim
)
1872 return skip_syntaxes (0, syntax
, lim
);
1876 skip_chars (bool forwardp
, Lisp_Object string
, Lisp_Object lim
,
1877 bool handle_iso_classes
)
1881 /* Store the ranges of non-ASCII characters. */
1882 int *char_ranges UNINIT
;
1883 int n_char_ranges
= 0;
1885 ptrdiff_t i
, i_byte
;
1886 /* True if the current buffer is multibyte and the region contains
1889 /* True if STRING is multibyte and it contains non-ASCII chars. */
1890 bool string_multibyte
;
1891 ptrdiff_t size_byte
;
1892 const unsigned char *str
;
1894 Lisp_Object iso_classes
;
1897 CHECK_STRING (string
);
1901 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1903 CHECK_NUMBER_COERCE_MARKER (lim
);
1905 /* In any case, don't allow scan outside bounds of buffer. */
1906 if (XINT (lim
) > ZV
)
1907 XSETFASTINT (lim
, ZV
);
1908 if (XINT (lim
) < BEGV
)
1909 XSETFASTINT (lim
, BEGV
);
1911 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
1912 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1913 string_multibyte
= SBYTES (string
) > SCHARS (string
);
1915 memset (fastmap
, 0, sizeof fastmap
);
1917 str
= SDATA (string
);
1918 size_byte
= SBYTES (string
);
1921 if (i_byte
< size_byte
1922 && SREF (string
, 0) == '^')
1924 negate
= 1; i_byte
++;
1927 /* Find the characters specified and set their elements of fastmap.
1928 Handle backslashes and ranges specially.
1930 If STRING contains non-ASCII characters, setup char_ranges for
1931 them and use fastmap only for their leading codes. */
1933 if (! string_multibyte
)
1935 bool string_has_eight_bit
= 0;
1937 /* At first setup fastmap. */
1938 while (i_byte
< size_byte
)
1940 if (handle_iso_classes
)
1942 const unsigned char *ch
= str
+ i_byte
;
1943 re_wctype_t cc
= re_wctype_parse (&ch
, size_byte
- i_byte
);
1945 error ("Invalid ISO C character class");
1948 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1958 if (i_byte
== size_byte
)
1963 /* Treat `-' as range character only if another character
1965 if (i_byte
+ 1 < size_byte
1966 && str
[i_byte
] == '-')
1970 /* Skip over the dash. */
1973 /* Get the end of the range. */
1976 && i_byte
< size_byte
)
1984 if (! ASCII_CHAR_P (c2
))
1985 string_has_eight_bit
= 1;
1991 if (! ASCII_CHAR_P (c
))
1992 string_has_eight_bit
= 1;
1996 /* If the current range is multibyte and STRING contains
1997 eight-bit chars, arrange fastmap and setup char_ranges for
1998 the corresponding multibyte chars. */
1999 if (multibyte
&& string_has_eight_bit
)
2002 char himap
[0200 + 1];
2003 memcpy (himap
, fastmap
+ 0200, 0200);
2005 memset (fastmap
+ 0200, 0, 0200);
2006 SAFE_NALLOCA (char_ranges
, 2, 128);
2009 while ((p1
= memchr (himap
+ i
, 1, 0200 - i
)))
2011 /* Deduce the next range C..C2 from the next clump of 1s
2012 in HIMAP starting with &HIMAP[I]. HIMAP is the high
2013 order half of the old FASTMAP. */
2014 int c2
, leading_code
;
2016 c
= BYTE8_TO_CHAR (i
+ 0200);
2018 c2
= BYTE8_TO_CHAR (i
+ 0200 - 1);
2020 char_ranges
[n_char_ranges
++] = c
;
2021 char_ranges
[n_char_ranges
++] = c2
;
2022 leading_code
= CHAR_LEADING_CODE (c
);
2023 memset (fastmap
+ leading_code
, 1,
2024 CHAR_LEADING_CODE (c2
) - leading_code
+ 1);
2028 else /* STRING is multibyte */
2030 SAFE_NALLOCA (char_ranges
, 2, SCHARS (string
));
2032 while (i_byte
< size_byte
)
2034 int leading_code
= str
[i_byte
];
2036 if (handle_iso_classes
)
2038 const unsigned char *ch
= str
+ i_byte
;
2039 re_wctype_t cc
= re_wctype_parse (&ch
, size_byte
- i_byte
);
2041 error ("Invalid ISO C character class");
2044 iso_classes
= Fcons (make_number (cc
), iso_classes
);
2050 if (leading_code
== '\\')
2052 if (++i_byte
== size_byte
)
2055 leading_code
= str
[i_byte
];
2057 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
2061 /* Treat `-' as range character only if another character
2063 if (i_byte
+ 1 < size_byte
2064 && str
[i_byte
] == '-')
2066 int c2
, leading_code2
;
2068 /* Skip over the dash. */
2071 /* Get the end of the range. */
2072 leading_code2
= str
[i_byte
];
2073 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
2077 && i_byte
< size_byte
)
2079 leading_code2
= str
[i_byte
];
2080 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
2086 if (ASCII_CHAR_P (c
))
2088 while (c
<= c2
&& c
< 0x80)
2090 leading_code
= CHAR_LEADING_CODE (c
);
2092 if (! ASCII_CHAR_P (c
))
2094 int lim2
= leading_code2
+ 1;
2095 while (leading_code
< lim2
)
2096 fastmap
[leading_code
++] = 1;
2099 char_ranges
[n_char_ranges
++] = c
;
2100 char_ranges
[n_char_ranges
++] = c2
;
2106 if (ASCII_CHAR_P (c
))
2110 fastmap
[leading_code
] = 1;
2111 char_ranges
[n_char_ranges
++] = c
;
2112 char_ranges
[n_char_ranges
++] = c
;
2117 /* If the current range is unibyte and STRING contains non-ASCII
2118 chars, arrange fastmap for the corresponding unibyte
2121 if (! multibyte
&& n_char_ranges
> 0)
2123 memset (fastmap
+ 0200, 0, 0200);
2124 for (i
= 0; i
< n_char_ranges
; i
+= 2)
2126 int c1
= char_ranges
[i
];
2127 int lim2
= char_ranges
[i
+ 1] + 1;
2129 for (; c1
< lim2
; c1
++)
2131 int b
= CHAR_TO_BYTE_SAFE (c1
);
2139 /* If ^ was the first character, complement the fastmap. */
2143 for (i
= 0; i
< sizeof fastmap
; i
++)
2147 for (i
= 0; i
< 0200; i
++)
2149 /* All non-ASCII chars possibly match. */
2150 for (; i
< sizeof fastmap
; i
++)
2156 ptrdiff_t start_point
= PT
;
2158 ptrdiff_t pos_byte
= PT_BYTE
;
2159 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
2163 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
2164 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
2168 endp
= CHAR_POS_ADDR (XINT (lim
));
2169 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
2172 /* This code may look up syntax tables using functions that rely on the
2173 gl_state object. To make sure this object is not out of date,
2174 let's initialize it manually.
2175 We ignore syntax-table text-properties for now, since that's
2176 what we've done in the past. */
2177 SETUP_BUFFER_SYNTAX_TABLE ();
2192 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
2193 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
2203 if (! ASCII_CHAR_P (c
))
2205 /* As we are looking at a multibyte character, we
2206 must look up the character in the table
2207 CHAR_RANGES. If there's no data in the table,
2208 that character is not what we want to skip. */
2210 /* The following code do the right thing even if
2211 n_char_ranges is zero (i.e. no data in
2213 for (i
= 0; i
< n_char_ranges
; i
+= 2)
2214 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
2216 if (!(negate
^ (i
< n_char_ranges
)))
2220 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2234 if (!NILP (iso_classes
) && in_classes (*p
, iso_classes
))
2239 goto fwd_unibyte_ok
;
2245 p
++, pos
++, pos_byte
++;
2261 unsigned char *prev_p
= p
;
2264 while (stop
<= p
&& ! CHAR_HEAD_P (*p
));
2266 c
= STRING_CHAR (p
);
2268 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
2278 if (! ASCII_CHAR_P (c
))
2280 /* See the comment in the previous similar code. */
2281 for (i
= 0; i
< n_char_ranges
; i
+= 2)
2282 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
2284 if (!(negate
^ (i
< n_char_ranges
)))
2288 pos
--, pos_byte
-= prev_p
- p
;
2302 if (! NILP (iso_classes
) && in_classes (p
[-1], iso_classes
))
2307 goto back_unibyte_ok
;
2310 if (!fastmap
[p
[-1]])
2313 p
--, pos
--, pos_byte
--;
2318 SET_PT_BOTH (pos
, pos_byte
);
2321 return make_number (PT
- start_point
);
2327 skip_syntaxes (bool forwardp
, Lisp_Object string
, Lisp_Object lim
)
2330 unsigned char fastmap
[0400];
2332 ptrdiff_t i
, i_byte
;
2334 ptrdiff_t size_byte
;
2337 CHECK_STRING (string
);
2340 XSETINT (lim
, forwardp
? ZV
: BEGV
);
2342 CHECK_NUMBER_COERCE_MARKER (lim
);
2344 /* In any case, don't allow scan outside bounds of buffer. */
2345 if (XINT (lim
) > ZV
)
2346 XSETFASTINT (lim
, ZV
);
2347 if (XINT (lim
) < BEGV
)
2348 XSETFASTINT (lim
, BEGV
);
2350 if (forwardp
? (PT
>= XFASTINT (lim
)) : (PT
<= XFASTINT (lim
)))
2351 return make_number (0);
2353 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
2354 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
2356 memset (fastmap
, 0, sizeof fastmap
);
2358 if (SBYTES (string
) > SCHARS (string
))
2359 /* As this is very rare case (syntax spec is ASCII only), don't
2360 consider efficiency. */
2361 string
= string_make_unibyte (string
);
2363 str
= SDATA (string
);
2364 size_byte
= SBYTES (string
);
2367 if (i_byte
< size_byte
2368 && SREF (string
, 0) == '^')
2370 negate
= 1; i_byte
++;
2373 /* Find the syntaxes specified and set their elements of fastmap. */
2375 while (i_byte
< size_byte
)
2378 fastmap
[syntax_spec_code
[c
]] = 1;
2381 /* If ^ was the first character, complement the fastmap. */
2383 for (i
= 0; i
< sizeof fastmap
; i
++)
2387 ptrdiff_t start_point
= PT
;
2389 ptrdiff_t pos_byte
= PT_BYTE
;
2390 unsigned char *p
, *endp
, *stop
;
2392 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
2398 p
= BYTE_POS_ADDR (pos_byte
);
2399 endp
= XINT (lim
) == GPT
? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
2400 stop
= pos
< GPT
&& GPT
< XINT (lim
) ? GPT_ADDR
: endp
;
2414 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
2417 if (! fastmap
[SYNTAX (c
)])
2419 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2422 while (!parse_sexp_lookup_properties
2423 || pos
< gl_state
.e_property
);
2425 update_syntax_table_forward (pos
+ gl_state
.offset
,
2426 false, gl_state
.object
);
2431 p
= BYTE_POS_ADDR (pos_byte
);
2432 endp
= CHAR_POS_ADDR (XINT (lim
));
2433 stop
= pos
>= GPT
&& GPT
> XINT (lim
) ? GAP_END_ADDR
: endp
;
2446 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2448 unsigned char *prev_p
= p
;
2451 while (stop
<= p
&& ! CHAR_HEAD_P (*p
));
2453 c
= STRING_CHAR (p
);
2454 if (! fastmap
[SYNTAX (c
)])
2456 pos
--, pos_byte
-= prev_p
- p
;
2471 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2472 if (! fastmap
[SYNTAX (p
[-1])])
2474 p
--, pos
--, pos_byte
--;
2481 SET_PT_BOTH (pos
, pos_byte
);
2483 return make_number (PT
- start_point
);
2487 /* Return true if character C belongs to one of the ISO classes
2488 in the list ISO_CLASSES. Each class is represented by an
2489 integer which is its type according to re_wctype. */
2492 in_classes (int c
, Lisp_Object iso_classes
)
2494 bool fits_class
= 0;
2496 while (CONSP (iso_classes
))
2499 elt
= XCAR (iso_classes
);
2500 iso_classes
= XCDR (iso_classes
);
2502 if (re_iswctype (c
, XFASTINT (elt
)))
2509 /* Jump over a comment, assuming we are at the beginning of one.
2510 FROM is the current position.
2511 FROM_BYTE is the bytepos corresponding to FROM.
2512 Do not move past STOP (a charpos).
2513 The comment over which we have to jump is of style STYLE
2514 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2515 NESTING should be positive to indicate the nesting at the beginning
2516 for nested comments and should be zero or negative else.
2517 ST_COMMENT_STYLE cannot be nested.
2518 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2519 (or 0 If the search cannot start in the middle of a two-character).
2521 If successful, return true and store the charpos of the comment's
2522 end into *CHARPOS_PTR and the corresponding bytepos into
2523 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2524 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2525 current nesting (as defined for state->incomment) in
2526 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2527 comment be a possible first character of a two character construct,
2528 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2529 we store Smax into *last_syntax_ptr.
2531 The comment end is the last character of the comment rather than the
2532 character just after the comment.
2534 Global syntax data is assumed to initially be valid for FROM and
2535 remains valid for forward search starting at the returned position. */
2538 forw_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
2539 EMACS_INT nesting
, int style
, int prev_syntax
,
2540 ptrdiff_t *charpos_ptr
, ptrdiff_t *bytepos_ptr
,
2541 EMACS_INT
*incomment_ptr
, int *last_syntax_ptr
)
2543 unsigned short int quit_count
= 0;
2545 enum syntaxcode code
;
2546 int syntax
, other_syntax
;
2548 if (nesting
<= 0) nesting
= -1;
2550 /* Enter the loop in the middle so that we find
2551 a 2-char comment ender if we start in the middle of it. */
2552 syntax
= prev_syntax
;
2553 code
= syntax
& 0xff;
2554 if (syntax
!= 0 && from
< stop
) goto forw_incomment
;
2560 *incomment_ptr
= nesting
;
2561 *charpos_ptr
= from
;
2562 *bytepos_ptr
= from_byte
;
2564 (code
== Sescape
|| code
== Scharquote
2565 || SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2567 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)))
2571 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2572 syntax
= SYNTAX_WITH_FLAGS (c
);
2573 code
= syntax
& 0xff;
2574 if (code
== Sendcomment
2575 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
2576 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ?
2577 (nesting
> 0 && --nesting
== 0) : nesting
< 0)
2578 && !(Vcomment_end_can_be_escaped
&& char_quoted (from
, from_byte
)))
2579 /* We have encountered a comment end of the same style
2580 as the comment sequence which began this comment
2583 if (code
== Scomment_fence
2584 && style
== ST_COMMENT_STYLE
)
2585 /* We have encountered a comment end of the same style
2586 as the comment sequence which began this comment
2591 && SYNTAX_FLAGS_COMMENT_NESTED (syntax
)
2592 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
)
2593 /* We have encountered a nested comment of the same style
2594 as the comment sequence which began this comment section. */
2596 INC_BOTH (from
, from_byte
);
2597 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2600 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2601 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2602 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2603 SYNTAX_FLAGS_COMEND_SECOND (other_syntax
))
2604 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, other_syntax
) == style
2605 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2606 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
))
2607 ? nesting
> 0 : nesting
< 0))
2609 syntax
= Smax
; /* So that "|#" (lisp) can not return
2610 the syntax of "#" in *last_syntax_ptr. */
2612 /* We have encountered a comment end of the same style
2613 as the comment sequence which began this comment section. */
2617 INC_BOTH (from
, from_byte
);
2618 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2623 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
2624 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2625 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2626 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
) == style
2627 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2628 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2629 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
)))
2630 /* We have encountered a nested comment of the same style
2631 as the comment sequence which began this comment section. */
2633 syntax
= Smax
; /* So that "#|#" isn't also a comment ender. */
2634 INC_BOTH (from
, from_byte
);
2635 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2639 rarely_quit (++quit_count
);
2641 *charpos_ptr
= from
;
2642 *bytepos_ptr
= from_byte
;
2643 *last_syntax_ptr
= Smax
; /* Any syntactic power the last byte had is
2648 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
2650 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2651 Stop scanning if we find something other than a comment or whitespace.
2652 Set point to where scanning stops.
2653 If COUNT comments are found as expected, with nothing except whitespace
2654 between them, return t; otherwise return nil. */)
2657 ptrdiff_t from
, from_byte
, stop
;
2659 enum syntaxcode code
;
2660 int comstyle
= 0; /* style of comment encountered */
2661 bool comnested
= 0; /* whether the comment is nestable or not */
2664 ptrdiff_t out_charpos
, out_bytepos
;
2667 unsigned short int quit_count
= 0;
2669 CHECK_NUMBER (count
);
2670 count1
= XINT (count
);
2671 stop
= count1
> 0 ? ZV
: BEGV
;
2674 from_byte
= PT_BYTE
;
2676 SETUP_SYNTAX_TABLE (from
, count1
);
2681 bool comstart_first
;
2682 int syntax
, other_syntax
;
2686 SET_PT_BOTH (from
, from_byte
);
2689 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2690 syntax
= SYNTAX_WITH_FLAGS (c
);
2692 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2693 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2694 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2695 INC_BOTH (from
, from_byte
);
2696 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2697 if (from
< stop
&& comstart_first
2698 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2699 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2700 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
)))
2702 /* We have encountered a comment start sequence and we
2703 are ignoring all text inside comments. We must record
2704 the comment style this sequence begins so that later,
2705 only a comment end of the same style actually ends
2706 the comment section. */
2708 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2709 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2710 INC_BOTH (from
, from_byte
);
2711 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2713 rarely_quit (++quit_count
);
2715 while (code
== Swhitespace
|| (code
== Sendcomment
&& c
== '\n'));
2717 if (code
== Scomment_fence
)
2718 comstyle
= ST_COMMENT_STYLE
;
2719 else if (code
!= Scomment
)
2721 DEC_BOTH (from
, from_byte
);
2722 SET_PT_BOTH (from
, from_byte
);
2725 /* We're at the start of a comment. */
2726 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
2727 &out_charpos
, &out_bytepos
, &dummy
, &dummy2
);
2728 from
= out_charpos
; from_byte
= out_bytepos
;
2731 SET_PT_BOTH (from
, from_byte
);
2734 INC_BOTH (from
, from_byte
);
2735 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2736 /* We have skipped one comment. */
2746 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
2750 DEC_BOTH (from
, from_byte
);
2751 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2752 bool quoted
= char_quoted (from
, from_byte
);
2753 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2754 int syntax
= SYNTAX_WITH_FLAGS (c
);
2757 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2758 if (code
== Sendcomment
)
2759 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2760 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2761 && prev_char_comend_first (from
, from_byte
)
2762 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
2765 /* We must record the comment style encountered so that
2766 later, we can match only the proper comment begin
2767 sequence of the same style. */
2768 DEC_BOTH (from
, from_byte
);
2770 /* Calling char_quoted, above, set up global syntax position
2771 at the new value of FROM. */
2772 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2773 other_syntax
= SYNTAX_WITH_FLAGS (c1
);
2774 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2775 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2778 if (code
== Scomment_fence
)
2780 /* Skip until first preceding unquoted comment_fence. */
2781 bool fence_found
= 0;
2782 ptrdiff_t ini
= from
, ini_byte
= from_byte
;
2786 DEC_BOTH (from
, from_byte
);
2787 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2788 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2789 if (SYNTAX (c
) == Scomment_fence
2790 && !char_quoted (from
, from_byte
))
2795 else if (from
== stop
)
2797 rarely_quit (++quit_count
);
2799 if (fence_found
== 0)
2801 from
= ini
; /* Set point to ini + 1. */
2802 from_byte
= ini_byte
;
2806 /* We have skipped one comment. */
2809 else if (code
== Sendcomment
)
2811 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2812 &out_charpos
, &out_bytepos
);
2816 /* This end-of-line is not an end-of-comment.
2817 Treat it like a whitespace.
2818 CC-mode (and maybe others) relies on this behavior. */
2822 /* Failure: we should go back to the end of this
2823 not-quite-endcomment. */
2824 if (SYNTAX (c
) != code
)
2825 /* It was a two-char Sendcomment. */
2826 INC_BOTH (from
, from_byte
);
2832 /* We have skipped one comment. */
2833 from
= out_charpos
, from_byte
= out_bytepos
;
2837 else if (code
!= Swhitespace
|| quoted
)
2840 INC_BOTH (from
, from_byte
);
2841 SET_PT_BOTH (from
, from_byte
);
2845 rarely_quit (++quit_count
);
2851 SET_PT_BOTH (from
, from_byte
);
2855 /* Return syntax code of character C if C is an ASCII character
2856 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2858 static enum syntaxcode
2859 syntax_multibyte (int c
, bool multibyte_symbol_p
)
2861 return ASCII_CHAR_P (c
) || !multibyte_symbol_p
? SYNTAX (c
) : Ssymbol
;
2865 scan_lists (EMACS_INT from
, EMACS_INT count
, EMACS_INT depth
, bool sexpflag
)
2868 ptrdiff_t stop
= count
> 0 ? ZV
: BEGV
;
2873 enum syntaxcode code
;
2874 EMACS_INT min_depth
= depth
; /* Err out if depth gets less than this. */
2875 int comstyle
= 0; /* Style of comment encountered. */
2876 bool comnested
= 0; /* Whether the comment is nestable or not. */
2878 EMACS_INT last_good
= from
;
2880 ptrdiff_t from_byte
;
2881 ptrdiff_t out_bytepos
, out_charpos
;
2884 bool multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2885 unsigned short int quit_count
= 0;
2887 if (depth
> 0) min_depth
= 0;
2889 if (from
> ZV
) from
= ZV
;
2890 if (from
< BEGV
) from
= BEGV
;
2892 from_byte
= CHAR_TO_BYTE (from
);
2896 SETUP_SYNTAX_TABLE (from
, count
);
2901 rarely_quit (++quit_count
);
2902 bool comstart_first
, prefix
;
2903 int syntax
, other_syntax
;
2904 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2905 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2906 syntax
= SYNTAX_WITH_FLAGS (c
);
2907 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2908 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2909 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2910 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2911 prefix
= SYNTAX_FLAGS_PREFIX (syntax
);
2912 if (depth
== min_depth
)
2914 INC_BOTH (from
, from_byte
);
2915 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2916 if (from
< stop
&& comstart_first
2917 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2918 other_syntax
= SYNTAX_WITH_FLAGS (c
),
2919 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2920 && parse_sexp_ignore_comments
)
2922 /* We have encountered a comment start sequence and we
2923 are ignoring all text inside comments. We must record
2924 the comment style this sequence begins so that later,
2925 only a comment end of the same style actually ends
2926 the comment section. */
2928 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2929 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2930 INC_BOTH (from
, from_byte
);
2931 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2943 INC_BOTH (from
, from_byte
);
2944 /* Treat following character as a word constituent. */
2947 if (depth
|| !sexpflag
) break;
2948 /* This word counts as a sexp; return at end of it. */
2951 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2953 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2954 switch (syntax_multibyte (c
, multibyte_symbol_p
))
2958 INC_BOTH (from
, from_byte
);
2969 INC_BOTH (from
, from_byte
);
2970 rarely_quit (++quit_count
);
2974 case Scomment_fence
:
2975 comstyle
= ST_COMMENT_STYLE
;
2978 if (!parse_sexp_ignore_comments
) break;
2979 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2980 found
= forw_comment (from
, from_byte
, stop
,
2981 comnested
, comstyle
, 0,
2982 &out_charpos
, &out_bytepos
, &dummy
,
2984 from
= out_charpos
, from_byte
= out_bytepos
;
2991 INC_BOTH (from
, from_byte
);
2992 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2998 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (from_byte
))
3000 INC_BOTH (from
, from_byte
);
3010 if (!++depth
) goto done
;
3015 if (!--depth
) goto done
;
3016 if (depth
< min_depth
)
3017 xsignal3 (Qscan_error
,
3018 build_string ("Containing expression ends prematurely"),
3019 make_number (last_good
), make_number (from
));
3024 temp_pos
= dec_bytepos (from_byte
);
3025 stringterm
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
3028 enum syntaxcode c_code
;
3031 UPDATE_SYNTAX_TABLE_FORWARD (from
);
3032 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3033 c_code
= syntax_multibyte (c
, multibyte_symbol_p
);
3035 ? c
== stringterm
&& c_code
== Sstring
3036 : c_code
== Sstring_fence
)
3039 if (c_code
== Scharquote
|| c_code
== Sescape
)
3040 INC_BOTH (from
, from_byte
);
3041 INC_BOTH (from
, from_byte
);
3042 rarely_quit (++quit_count
);
3044 INC_BOTH (from
, from_byte
);
3045 if (!depth
&& sexpflag
) goto done
;
3048 /* Ignore whitespace, punctuation, quote, endcomment. */
3053 /* Reached end of buffer. Error if within object, return nil if between */
3059 /* End of object reached */
3068 rarely_quit (++quit_count
);
3069 DEC_BOTH (from
, from_byte
);
3070 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
3071 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3072 int syntax
= SYNTAX_WITH_FLAGS (c
);
3073 code
= syntax_multibyte (c
, multibyte_symbol_p
);
3074 if (depth
== min_depth
)
3077 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
3078 if (code
== Sendcomment
)
3079 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
3080 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
3081 && prev_char_comend_first (from
, from_byte
)
3082 && parse_sexp_ignore_comments
)
3084 /* We must record the comment style encountered so that
3085 later, we can match only the proper comment begin
3086 sequence of the same style. */
3087 int c2
, other_syntax
;
3088 DEC_BOTH (from
, from_byte
);
3089 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
3091 c2
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3092 other_syntax
= SYNTAX_WITH_FLAGS (c2
);
3093 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
3094 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
3097 /* Quoting turns anything except a comment-ender
3098 into a word character. Note that this cannot be true
3099 if we decremented FROM in the if-statement above. */
3100 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
3102 DEC_BOTH (from
, from_byte
);
3105 else if (SYNTAX_FLAGS_PREFIX (syntax
))
3114 if (depth
|| !sexpflag
) break;
3115 /* This word counts as a sexp; count object finished
3116 after passing it. */
3119 temp_pos
= from_byte
;
3120 if (! NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
3124 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
3125 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
3126 /* Don't allow comment-end to be quoted. */
3127 if (syntax_multibyte (c1
, multibyte_symbol_p
) == Sendcomment
)
3129 quoted
= char_quoted (from
- 1, temp_pos
);
3132 DEC_BOTH (from
, from_byte
);
3133 temp_pos
= dec_bytepos (temp_pos
);
3134 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
3136 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
3138 switch (syntax_multibyte (c1
, multibyte_symbol_p
))
3140 case Sword
: case Ssymbol
: case Squote
: break;
3141 default: goto done2
;
3143 DEC_BOTH (from
, from_byte
);
3144 rarely_quit (++quit_count
);
3153 temp_pos
= dec_bytepos (from_byte
);
3154 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
3155 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (temp_pos
))
3156 DEC_BOTH (from
, from_byte
);
3166 if (!++depth
) goto done2
;
3171 if (!--depth
) goto done2
;
3172 if (depth
< min_depth
)
3173 xsignal3 (Qscan_error
,
3174 build_string ("Containing expression ends prematurely"),
3175 make_number (last_good
), make_number (from
));
3179 if (!parse_sexp_ignore_comments
)
3181 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
3182 &out_charpos
, &out_bytepos
);
3183 /* FIXME: if !found, it really wasn't a comment-end.
3184 For single-char Sendcomment, we can't do much about it apart
3185 from skipping the char.
3186 For 2-char endcomments, we could try again, taking both
3187 chars as separate entities, but it's a lot of trouble
3188 for very little gain, so we don't bother either. -sm */
3190 from
= out_charpos
, from_byte
= out_bytepos
;
3193 case Scomment_fence
:
3199 DEC_BOTH (from
, from_byte
);
3200 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
3201 if (!char_quoted (from
, from_byte
))
3203 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3204 if (syntax_multibyte (c
, multibyte_symbol_p
) == code
)
3207 rarely_quit (++quit_count
);
3209 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
3213 stringterm
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3218 DEC_BOTH (from
, from_byte
);
3219 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
3220 if (!char_quoted (from
, from_byte
))
3222 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3224 && (syntax_multibyte (c
, multibyte_symbol_p
)
3228 rarely_quit (++quit_count
);
3230 if (!depth
&& sexpflag
) goto done2
;
3233 /* Ignore whitespace, punctuation, quote, endcomment. */
3238 /* Reached start of buffer. Error if within object, return nil if between */
3249 XSETFASTINT (val
, from
);
3253 xsignal3 (Qscan_error
,
3254 build_string ("Unbalanced parentheses"),
3255 make_number (last_good
), make_number (from
));
3258 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
3259 doc
: /* Scan from character number FROM by COUNT lists.
3260 Scan forward if COUNT is positive, backward if COUNT is negative.
3261 Return the character number of the position thus found.
3263 A \"list", in this context, refers to a balanced parenthetical
3264 grouping, as determined by the syntax table.
3266 If DEPTH is nonzero, treat that as the nesting depth of the starting
3267 point (i.e. the starting point is DEPTH parentheses deep). This
3268 function scans over parentheses until the depth goes to zero COUNT
3269 times. Hence, positive DEPTH moves out that number of levels of
3270 parentheses, while negative DEPTH moves to a deeper level.
3272 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3274 If we reach the beginning or end of the accessible part of the buffer
3275 before we have scanned over COUNT lists, return nil if the depth at
3276 that point is zero, and signal a error if the depth is nonzero. */)
3277 (Lisp_Object from
, Lisp_Object count
, Lisp_Object depth
)
3279 CHECK_NUMBER (from
);
3280 CHECK_NUMBER (count
);
3281 CHECK_NUMBER (depth
);
3283 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
3286 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
3287 doc
: /* Scan from character number FROM by COUNT balanced expressions.
3288 If COUNT is negative, scan backwards.
3289 Returns the character number of the position thus found.
3291 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3293 If the beginning or end of (the accessible part of) the buffer is reached
3294 in the middle of a parenthetical grouping, an error is signaled.
3295 If the beginning or end is reached between groupings
3296 but before count is used up, nil is returned. */)
3297 (Lisp_Object from
, Lisp_Object count
)
3299 CHECK_NUMBER (from
);
3300 CHECK_NUMBER (count
);
3302 return scan_lists (XINT (from
), XINT (count
), 0, 1);
3305 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
3307 doc
: /* Move point backward over any number of chars with prefix syntax.
3308 This includes chars with expression prefix syntax class (\\=') and those with
3309 the prefix syntax flag (p). */)
3312 ptrdiff_t beg
= BEGV
;
3313 ptrdiff_t opoint
= PT
;
3314 ptrdiff_t opoint_byte
= PT_BYTE
;
3316 ptrdiff_t pos_byte
= PT_BYTE
;
3321 SET_PT_BOTH (opoint
, opoint_byte
);
3326 SETUP_SYNTAX_TABLE (pos
, -1);
3328 DEC_BOTH (pos
, pos_byte
);
3330 while (!char_quoted (pos
, pos_byte
)
3331 /* Previous statement updates syntax table. */
3332 && ((c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
), SYNTAX (c
) == Squote
)
3333 || syntax_prefix_flag_p (c
)))
3336 opoint_byte
= pos_byte
;
3340 DEC_BOTH (pos
, pos_byte
);
3344 SET_PT_BOTH (opoint
, opoint_byte
);
3350 /* If the character at FROM_BYTE is the second part of a 2-character
3351 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3354 in_2char_comment_start (struct lisp_parse_state
*state
,
3355 int prev_from_syntax
,
3356 ptrdiff_t prev_from
,
3357 ptrdiff_t from_byte
)
3360 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
)
3361 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
3362 syntax
= SYNTAX_WITH_FLAGS (c1
),
3363 SYNTAX_FLAGS_COMSTART_SECOND (syntax
)))
3365 /* Record the comment style we have entered so that only
3366 the comment-end sequence of the same style actually
3367 terminates the comment section. */
3369 = SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_from_syntax
);
3370 bool comnested
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
)
3371 | SYNTAX_FLAGS_COMMENT_NESTED (syntax
));
3372 state
->incomment
= comnested
? 1 : -1;
3373 state
->comstr_start
= prev_from
;
3379 /* Parse forward from FROM / FROM_BYTE to END,
3380 assuming that FROM has state STATE,
3381 and return a description of the state of the parse at END.
3382 If STOPBEFORE, stop at the start of an atom.
3383 If COMMENTSTOP is 1, stop at the start of a comment.
3384 If COMMENTSTOP is -1, stop at the start or end of a comment,
3385 after the beginning of a string, or after the end of a string. */
3388 scan_sexps_forward (struct lisp_parse_state
*state
,
3389 ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t end
,
3390 EMACS_INT targetdepth
, bool stopbefore
,
3393 enum syntaxcode code
;
3394 struct level
{ ptrdiff_t last
, prev
; };
3395 struct level levelstart
[100];
3396 struct level
*curlevel
= levelstart
;
3397 struct level
*endlevel
= levelstart
+ 100;
3398 EMACS_INT depth
; /* Paren depth of current scanning location.
3399 level - levelstart equals this except
3400 when the depth becomes negative. */
3401 EMACS_INT mindepth
; /* Lowest DEPTH value seen. */
3402 bool start_quoted
= 0; /* True means starting after a char quote. */
3404 ptrdiff_t prev_from
; /* Keep one character before FROM. */
3405 ptrdiff_t prev_from_byte
;
3406 int prev_from_syntax
, prev_prev_from_syntax
;
3407 bool boundary_stop
= commentstop
== -1;
3410 ptrdiff_t out_bytepos
, out_charpos
;
3412 unsigned short int quit_count
= 0;
3415 prev_from_byte
= from_byte
;
3417 DEC_BOTH (prev_from
, prev_from_byte
);
3419 /* Use this macro instead of `from++'. */
3421 do { prev_from = from; \
3422 prev_from_byte = from_byte; \
3423 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3424 prev_prev_from_syntax = prev_from_syntax; \
3425 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3426 INC_BOTH (from, from_byte); \
3428 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3433 depth
= state
->depth
;
3434 start_quoted
= state
->quoted
;
3435 prev_prev_from_syntax
= Smax
;
3436 prev_from_syntax
= state
->prev_syntax
;
3438 tem
= state
->levelstarts
;
3439 while (!NILP (tem
)) /* >= second enclosing sexps. */
3441 Lisp_Object temhd
= Fcar (tem
);
3442 if (RANGED_INTEGERP (PTRDIFF_MIN
, temhd
, PTRDIFF_MAX
))
3443 curlevel
->last
= XINT (temhd
);
3444 if (++curlevel
== endlevel
)
3445 curlevel
--; /* error ("Nesting too deep for parser"); */
3446 curlevel
->prev
= -1;
3447 curlevel
->last
= -1;
3450 curlevel
->prev
= -1;
3451 curlevel
->last
= -1;
3456 SETUP_SYNTAX_TABLE (from
, 1);
3458 /* Enter the loop at a place appropriate for initial state. */
3460 if (state
->incomment
)
3461 goto startincomment
;
3462 if (state
->instring
>= 0)
3464 nofence
= state
->instring
!= ST_STRING_STYLE
;
3466 goto startquotedinstring
;
3469 else if (start_quoted
)
3471 else if ((from
< end
)
3472 && (in_2char_comment_start (state
, prev_from_syntax
,
3473 prev_from
, from_byte
)))
3476 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3482 rarely_quit (++quit_count
);
3486 && (in_2char_comment_start (state
, prev_from_syntax
,
3487 prev_from
, from_byte
)))
3490 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3494 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
3496 code
= prev_from_syntax
& 0xff;
3501 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3502 curlevel
->last
= prev_from
;
3504 if (from
== end
) goto endquoted
;
3507 /* treat following character as a word constituent */
3510 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3511 curlevel
->last
= prev_from
;
3515 if (in_2char_comment_start (state
, prev_from_syntax
,
3516 prev_from
, from_byte
))
3519 prev_from_syntax
= Smax
; /* the syntax has already been "used up". */
3523 int symchar
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3524 switch (SYNTAX (symchar
))
3529 if (from
== end
) goto endquoted
;
3539 rarely_quit (++quit_count
);
3542 curlevel
->prev
= curlevel
->last
;
3545 case Scomment_fence
:
3546 /* Record the comment style we have entered so that only
3547 the comment-end sequence of the same style actually
3548 terminates the comment section. */
3549 state
->comstyle
= ST_COMMENT_STYLE
;
3550 state
->incomment
= -1;
3551 state
->comstr_start
= prev_from
;
3554 state
->comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
, 0);
3555 state
->incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
3557 state
->comstr_start
= prev_from
;
3559 if (commentstop
|| boundary_stop
) goto done
;
3561 /* The (from == BEGV) test was to enter the loop in the middle so
3562 that we find a 2-char comment ender even if we start in the
3563 middle of it. We don't want to do that if we're just at the
3564 beginning of the comment (think of (*) ... (*)). */
3565 found
= forw_comment (from
, from_byte
, end
,
3566 state
->incomment
, state
->comstyle
,
3567 from
== BEGV
? 0 : prev_from_syntax
,
3568 &out_charpos
, &out_bytepos
, &state
->incomment
,
3570 from
= out_charpos
; from_byte
= out_bytepos
;
3571 /* Beware! prev_from and friends (except prev_from_syntax)
3572 are invalid now. Luckily, the `done' doesn't use them
3573 and the INC_FROM sets them to a sane value without
3575 if (!found
) goto done
;
3577 state
->incomment
= 0;
3578 state
->comstyle
= 0; /* reset the comment style */
3579 prev_from_syntax
= Smax
; /* For the comment closer */
3580 if (boundary_stop
) goto done
;
3584 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3586 /* curlevel++->last ran into compiler bug on Apollo */
3587 curlevel
->last
= prev_from
;
3588 if (++curlevel
== endlevel
)
3589 curlevel
--; /* error ("Nesting too deep for parser"); */
3590 curlevel
->prev
= -1;
3591 curlevel
->last
= -1;
3592 if (targetdepth
== depth
) goto done
;
3597 if (depth
< mindepth
)
3599 if (curlevel
!= levelstart
)
3601 curlevel
->prev
= curlevel
->last
;
3602 if (targetdepth
== depth
) goto done
;
3607 state
->comstr_start
= from
- 1;
3608 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3609 curlevel
->last
= prev_from
;
3610 state
->instring
= (code
== Sstring
3611 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte
))
3613 if (boundary_stop
) goto done
;
3616 nofence
= state
->instring
!= ST_STRING_STYLE
;
3621 enum syntaxcode c_code
;
3623 if (from
>= end
) goto done
;
3624 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3625 c_code
= SYNTAX (c
);
3627 /* Check C_CODE here so that if the char has
3628 a syntax-table property which says it is NOT
3629 a string character, it does not end the string. */
3630 if (nofence
&& c
== state
->instring
&& c_code
== Sstring
)
3636 if (!nofence
) goto string_end
;
3642 startquotedinstring
:
3643 if (from
>= end
) goto endquoted
;
3650 rarely_quit (++quit_count
);
3654 state
->instring
= -1;
3655 curlevel
->prev
= curlevel
->last
;
3657 if (boundary_stop
) goto done
;
3661 /* FIXME: We should do something with it. */
3664 /* Ignore whitespace, punctuation, quote, endcomment. */
3670 stop
: /* Here if stopping before start of sexp. */
3671 from
= prev_from
; /* We have just fetched the char that starts it; */
3672 from_byte
= prev_from_byte
;
3673 prev_from_syntax
= prev_prev_from_syntax
;
3674 goto done
; /* but return the position before it. */
3679 state
->depth
= depth
;
3680 state
->mindepth
= mindepth
;
3681 state
->thislevelstart
= curlevel
->prev
;
3682 state
->prevlevelstart
3683 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
3684 state
->location
= from
;
3685 state
->location_byte
= from_byte
;
3686 state
->levelstarts
= Qnil
;
3687 while (curlevel
> levelstart
)
3688 state
->levelstarts
= Fcons (make_number ((--curlevel
)->last
),
3689 state
->levelstarts
);
3690 state
->prev_syntax
= (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax
)
3691 || state
->quoted
) ? prev_from_syntax
: Smax
;
3694 /* Convert a (lisp) parse state to the internal form used in
3695 scan_sexps_forward. */
3697 internalize_parse_state (Lisp_Object external
, struct lisp_parse_state
*state
)
3701 if (NILP (external
))
3704 state
->instring
= -1;
3705 state
->incomment
= 0;
3707 state
->comstyle
= 0; /* comment style a by default. */
3708 state
->comstr_start
= -1; /* no comment/string seen. */
3709 state
->levelstarts
= Qnil
;
3710 state
->prev_syntax
= Smax
;
3714 tem
= Fcar (external
);
3716 state
->depth
= XINT (tem
);
3720 external
= Fcdr (external
);
3721 external
= Fcdr (external
);
3722 external
= Fcdr (external
);
3723 tem
= Fcar (external
);
3724 /* Check whether we are inside string_fence-style string: */
3725 state
->instring
= (!NILP (tem
)
3726 ? (CHARACTERP (tem
) ? XFASTINT (tem
) : ST_STRING_STYLE
)
3729 external
= Fcdr (external
);
3730 tem
= Fcar (external
);
3731 state
->incomment
= (!NILP (tem
)
3732 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
3735 external
= Fcdr (external
);
3736 tem
= Fcar (external
);
3737 state
->quoted
= !NILP (tem
);
3739 /* if the eighth element of the list is nil, we are in comment
3740 style a. If it is non-nil, we are in comment style b */
3741 external
= Fcdr (external
);
3742 external
= Fcdr (external
);
3743 tem
= Fcar (external
);
3744 state
->comstyle
= (NILP (tem
)
3746 : (RANGED_INTEGERP (0, tem
, ST_COMMENT_STYLE
)
3748 : ST_COMMENT_STYLE
));
3750 external
= Fcdr (external
);
3751 tem
= Fcar (external
);
3752 state
->comstr_start
=
3753 RANGED_INTEGERP (PTRDIFF_MIN
, tem
, PTRDIFF_MAX
) ? XINT (tem
) : -1;
3754 external
= Fcdr (external
);
3755 tem
= Fcar (external
);
3756 state
->levelstarts
= tem
;
3758 external
= Fcdr (external
);
3759 tem
= Fcar (external
);
3760 state
->prev_syntax
= NILP (tem
) ? Smax
: XINT (tem
);
3764 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
3765 doc
: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3766 Parsing stops at TO or when certain criteria are met;
3767 point is set to where parsing stops.
3768 If fifth arg OLDSTATE is omitted or nil,
3769 parsing assumes that FROM is the beginning of a function.
3771 Value is a list of elements describing final state of parsing:
3773 1. character address of start of innermost containing list; nil if none.
3774 2. character address of start of last complete sexp terminated.
3775 3. non-nil if inside a string.
3776 (it is the character that will terminate the string,
3777 or t if the string should be terminated by a generic string delimiter.)
3778 4. nil if outside a comment, t if inside a non-nestable comment,
3779 else an integer (the current comment nesting).
3780 5. t if following a quote character.
3781 6. the minimum paren-depth encountered during this scan.
3782 7. style of comment, if any.
3783 8. character address of start of comment or string; nil if not in one.
3784 9. List of positions of currently open parens, outermost first.
3785 10. When the last position scanned holds the first character of a
3786 (potential) two character construct, the syntax of that position,
3787 otherwise nil. That construct can be a two character comment
3788 delimiter or an Escaped or Char-quoted character.
3789 11..... Possible further internal information used by `parse-partial-sexp'.
3791 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3792 in parentheses becomes equal to TARGETDEPTH.
3793 Fourth arg STOPBEFORE non-nil means stop when we come to
3794 any character that starts a sexp.
3795 Fifth arg OLDSTATE is a list like what this function returns.
3796 It is used to initialize the state of the parse. Elements number 1, 2, 6
3798 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3799 If it is the symbol `syntax-table', stop after the start of a comment or a
3800 string, or after end of a comment or a string. */)
3801 (Lisp_Object from
, Lisp_Object to
, Lisp_Object targetdepth
,
3802 Lisp_Object stopbefore
, Lisp_Object oldstate
, Lisp_Object commentstop
)
3804 struct lisp_parse_state state
;
3807 if (!NILP (targetdepth
))
3809 CHECK_NUMBER (targetdepth
);
3810 target
= XINT (targetdepth
);
3813 target
= TYPE_MINIMUM (EMACS_INT
); /* We won't reach this depth. */
3815 validate_region (&from
, &to
);
3816 internalize_parse_state (oldstate
, &state
);
3817 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
3819 target
, !NILP (stopbefore
),
3821 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
3823 SET_PT_BOTH (state
.location
, state
.location_byte
);
3826 Fcons (make_number (state
.depth
),
3827 Fcons (state
.prevlevelstart
< 0
3828 ? Qnil
: make_number (state
.prevlevelstart
),
3829 Fcons (state
.thislevelstart
< 0
3830 ? Qnil
: make_number (state
.thislevelstart
),
3831 Fcons (state
.instring
>= 0
3832 ? (state
.instring
== ST_STRING_STYLE
3833 ? Qt
: make_number (state
.instring
)) : Qnil
,
3834 Fcons (state
.incomment
< 0 ? Qt
:
3835 (state
.incomment
== 0 ? Qnil
:
3836 make_number (state
.incomment
)),
3837 Fcons (state
.quoted
? Qt
: Qnil
,
3838 Fcons (make_number (state
.mindepth
),
3839 Fcons ((state
.comstyle
3840 ? (state
.comstyle
== ST_COMMENT_STYLE
3842 : make_number (state
.comstyle
))
3844 Fcons (((state
.incomment
3845 || (state
.instring
>= 0))
3846 ? make_number (state
.comstr_start
)
3848 Fcons (state
.levelstarts
,
3849 Fcons (state
.prev_syntax
== Smax
3851 : make_number (state
.prev_syntax
),
3856 init_syntax_once (void)
3861 /* This has to be done here, before we call Fmake_char_table. */
3862 DEFSYM (Qsyntax_table
, "syntax-table");
3863 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (2));
3865 /* Create objects which can be shared among syntax tables. */
3866 Vsyntax_code_object
= make_uninit_vector (Smax
);
3867 for (i
= 0; i
< Smax
; i
++)
3868 ASET (Vsyntax_code_object
, i
, Fcons (make_number (i
), Qnil
));
3870 /* Now we are ready to set up this property, so we can
3871 create syntax tables. */
3872 /* Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0)); */
3874 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3876 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
3878 /* Control characters should not be whitespace. */
3879 temp
= AREF (Vsyntax_code_object
, Spunct
);
3880 for (i
= 0; i
<= ' ' - 1; i
++)
3881 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3882 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 0177, temp
);
3884 /* Except that a few really are whitespace. */
3885 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3886 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ' ', temp
);
3887 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\t', temp
);
3888 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\n', temp
);
3889 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 015, temp
);
3890 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 014, temp
);
3892 temp
= AREF (Vsyntax_code_object
, Sword
);
3893 for (i
= 'a'; i
<= 'z'; i
++)
3894 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3895 for (i
= 'A'; i
<= 'Z'; i
++)
3896 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3897 for (i
= '0'; i
<= '9'; i
++)
3898 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3900 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
3901 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
3903 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
3904 Fcons (make_number (Sopen
), make_number (')')));
3905 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
3906 Fcons (make_number (Sclose
), make_number ('(')));
3907 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
3908 Fcons (make_number (Sopen
), make_number (']')));
3909 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
3910 Fcons (make_number (Sclose
), make_number ('[')));
3911 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
3912 Fcons (make_number (Sopen
), make_number ('}')));
3913 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
3914 Fcons (make_number (Sclose
), make_number ('{')));
3915 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
3916 Fcons (make_number (Sstring
), Qnil
));
3917 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
3918 Fcons (make_number (Sescape
), Qnil
));
3920 temp
= AREF (Vsyntax_code_object
, Ssymbol
);
3921 for (i
= 0; i
< 10; i
++)
3923 c
= "_-+*/&|<>="[i
];
3924 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3927 temp
= AREF (Vsyntax_code_object
, Spunct
);
3928 for (i
= 0; i
< 12; i
++)
3930 c
= ".,;:?!#@~^'`"[i
];
3931 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3934 /* All multibyte characters have syntax `word' by default. */
3935 temp
= AREF (Vsyntax_code_object
, Sword
);
3936 char_table_set_range (Vstandard_syntax_table
, 0x80, MAX_CHAR
, temp
);
3940 syms_of_syntax (void)
3942 DEFSYM (Qsyntax_table_p
, "syntax-table-p");
3944 staticpro (&Vsyntax_code_object
);
3946 staticpro (&gl_state
.object
);
3947 staticpro (&gl_state
.global_code
);
3948 staticpro (&gl_state
.current_syntax_table
);
3949 staticpro (&gl_state
.old_prop
);
3951 /* Defined in regex.c. */
3952 staticpro (&re_match_object
);
3954 DEFSYM (Qscan_error
, "scan-error");
3955 Fput (Qscan_error
, Qerror_conditions
,
3956 listn (CONSTYPE_PURE
, 2, Qscan_error
, Qerror
));
3957 Fput (Qscan_error
, Qerror_message
,
3958 build_pure_c_string ("Scan error"));
3960 DEFSYM (Qliteral_cache
, "literal-cache");
3961 DEFVAR_LISP ("literal-cache-values", Vliteral_cache_values
,
3962 doc
: /* A list of values which the text property `literal-cache' can assume.
3963 This is to ensure that any values which are `equal' are also `eq', as required by the text
3964 property functions. The list starts off empty, and any time a new value is needed, it is
3965 pushed onto the list. The second time a value is needed, it is found by `member', and the
3966 canonical equivalent used. */);
3967 Vliteral_cache_values
= Qnil
;
3969 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments
,
3970 doc
: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3972 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties
,
3973 doc
: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3974 Otherwise, that text property is simply ignored.
3975 See the info node `(elisp)Syntax Properties' for a description of the
3976 `syntax-table' property. */);
3978 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done
,
3979 doc
: /* Position up to which syntax-table properties have been set. */);
3980 syntax_propertize__done
= -1;
3981 DEFSYM (Qinternal__syntax_propertize
, "internal--syntax-propertize");
3982 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3984 words_include_escapes
= 0;
3985 DEFVAR_BOOL ("words-include-escapes", words_include_escapes
,
3986 doc
: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3988 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol
,
3989 doc
: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3990 multibyte_syntax_as_symbol
= 0;
3992 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3993 open_paren_in_column_0_is_defun_start
,
3994 doc
: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3995 open_paren_in_column_0_is_defun_start
= 1;
3998 DEFVAR_LISP ("find-word-boundary-function-table",
3999 Vfind_word_boundary_function_table
,
4001 Char table of functions to search for the word boundary.
4002 Each function is called with two arguments; POS and LIMIT.
4003 POS and LIMIT are character positions in the current buffer.
4005 If POS is less than LIMIT, POS is at the first character of a word,
4006 and the return value of a function should be a position after the
4007 last character of that word.
4009 If POS is not less than LIMIT, POS is at the last character of a word,
4010 and the return value of a function should be a position at the first
4011 character of that word.
4013 In both cases, LIMIT bounds the search. */);
4014 Vfind_word_boundary_function_table
= Fmake_char_table (Qnil
, Qnil
);
4016 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped
,
4017 doc
: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
4018 Vcomment_end_can_be_escaped
= 0;
4019 DEFSYM (Qcomment_end_can_be_escaped
, "comment-end-can-be-escaped");
4020 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped
);
4022 defsubr (&Strim_literal_cache
);
4023 defsubr (&Sleast_literal_difference_between_syntax_tables
);
4024 defsubr (&Ssyntax_tables_literally_different_p
);
4025 defsubr (&Ssyntax_table_p
);
4026 defsubr (&Ssyntax_table
);
4027 defsubr (&Sstandard_syntax_table
);
4028 defsubr (&Scopy_syntax_table
);
4029 defsubr (&Sset_syntax_table
);
4030 defsubr (&Schar_syntax
);
4031 defsubr (&Smatching_paren
);
4032 defsubr (&Sstring_to_syntax
);
4033 defsubr (&Smodify_syntax_entry
);
4034 defsubr (&Sinternal_describe_syntax_value
);
4036 defsubr (&Sforward_word
);
4038 defsubr (&Sskip_chars_forward
);
4039 defsubr (&Sskip_chars_backward
);
4040 defsubr (&Sskip_syntax_forward
);
4041 defsubr (&Sskip_syntax_backward
);
4043 defsubr (&Sforward_comment
);
4044 defsubr (&Sscan_lists
);
4045 defsubr (&Sscan_sexps
);
4046 defsubr (&Sbackward_prefix_chars
);
4047 defsubr (&Sparse_partial_sexp
);