Merge branch 'master' into comment-cache
[emacs.git] / src / syntax.c
blob34a9e632b3cb4cbdadd78732d3b1490a5158166c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2017 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "buffer.h"
26 #include "regex.h"
27 #include "syntax.h"
28 #include "intervals.h"
29 #include "category.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
56 the chars. */
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
61 static bool
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
64 return (flags >> 16) & 1;
66 static bool
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
69 return (flags >> 17) & 1;
71 static bool
72 SYNTAX_FLAGS_COMEND_FIRST (int flags)
74 return (flags >> 18) & 1;
76 static bool
77 SYNTAX_FLAGS_COMEND_SECOND (int flags)
79 return (flags >> 19) & 1;
81 static bool
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
84 return (flags & 0x50000) != 0;
86 static bool
87 SYNTAX_FLAGS_PREFIX (int flags)
89 return (flags >> 20) & 1;
91 static bool
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
94 return (flags >> 21) & 1;
96 static bool
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
99 return (flags >> 23) & 1;
101 static int
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
104 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
106 static bool
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
109 return (flags >> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
114 static int
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
122 /* Extract a particular flag for a given character. */
124 static bool
125 SYNTAX_COMEND_FIRST (int c)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
134 enum
136 ST_COMMENT_STYLE = 256 + 1,
137 ST_STRING_STYLE = 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth; /* Depth at end of parsing. */
145 int instring; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
147 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart;
154 ptrdiff_t location; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte; /* Corresponding byte position. */
156 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos;
174 static ptrdiff_t find_start_value;
175 static ptrdiff_t find_start_value_byte;
176 static struct buffer *find_start_buffer;
177 static ptrdiff_t find_start_begv;
178 static EMACS_INT find_start_modiff;
181 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
182 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
183 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
184 static void scan_sexps_forward (struct lisp_parse_state *,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
186 bool, int);
187 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
188 static bool in_classes (int, Lisp_Object);
189 static void parse_sexp_propertize (ptrdiff_t charpos);
190 static void check_syntax_table (Lisp_Object obj);
192 /* This setter is used only in this file, so it can be private. */
193 static void
194 bset_syntax_table (struct buffer *b, Lisp_Object val)
196 b->syntax_table_ = val;
199 /* Whether the syntax of the character C has the prefix flag set. */
200 bool
201 syntax_prefix_flag_p (int c)
203 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
206 struct gl_state_s gl_state; /* Global state of syntax parser. */
208 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
209 to scan to property-change. */
211 /* Set the syntax entry VAL for char C in table TABLE. */
213 static void
214 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
216 CHAR_TABLE_SET (table, c, val);
219 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
220 RANGE is a cons (FROM . TO) specifying the range of characters. */
222 static void
223 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
224 Lisp_Object val)
226 Fset_char_table_range (table, range, val);
229 /* Extract the information from the entry for character C
230 in the current syntax table. */
232 static Lisp_Object
233 SYNTAX_MATCH (int c)
235 Lisp_Object ent = SYNTAX_ENTRY (c);
236 return CONSP (ent) ? XCDR (ent) : Qnil;
239 /* This should be called with FROM at the start of forward
240 search, or after the last position of the backward search. It
241 makes sure that the first char is picked up with correct table, so
242 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
243 call.
244 Sign of COUNT gives the direction of the search.
247 static void
248 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
250 SETUP_BUFFER_SYNTAX_TABLE ();
251 gl_state.b_property = BEGV;
252 gl_state.e_property = ZV + 1;
253 gl_state.object = Qnil;
254 gl_state.offset = 0;
255 if (parse_sexp_lookup_properties)
257 if (count > 0)
258 update_syntax_table_forward (from, true, Qnil);
259 else if (from > BEGV)
261 update_syntax_table (from - 1, count, true, Qnil);
262 parse_sexp_propertize (from - 1);
267 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
268 If it is t (which is only used in fast_c_string_match_ignore_case),
269 ignore properties altogether.
271 This is meant for regex.c to use. For buffers, regex.c passes arguments
272 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
273 So if it is a buffer, we set the offset field to BEGV. */
275 void
276 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
277 ptrdiff_t from, ptrdiff_t count)
279 SETUP_BUFFER_SYNTAX_TABLE ();
280 gl_state.object = object;
281 if (BUFFERP (gl_state.object))
283 struct buffer *buf = XBUFFER (gl_state.object);
284 gl_state.b_property = 1;
285 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
286 gl_state.offset = BUF_BEGV (buf) - 1;
288 else if (NILP (gl_state.object))
290 gl_state.b_property = 1;
291 gl_state.e_property = ZV - BEGV + 1;
292 gl_state.offset = BEGV - 1;
294 else if (EQ (gl_state.object, Qt))
296 gl_state.b_property = 0;
297 gl_state.e_property = PTRDIFF_MAX;
298 gl_state.offset = 0;
300 else
302 gl_state.b_property = 0;
303 gl_state.e_property = 1 + SCHARS (gl_state.object);
304 gl_state.offset = 0;
306 if (parse_sexp_lookup_properties)
307 update_syntax_table (from + gl_state.offset - (count <= 0),
308 count, 1, gl_state.object);
311 /* Update gl_state to an appropriate interval which contains CHARPOS. The
312 sign of COUNT give the relative position of CHARPOS wrt the previously
313 valid interval. If INIT, only [be]_property fields of gl_state are
314 valid at start, the rest is filled basing on OBJECT.
316 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
317 direction than the intervals - or in an interval. We update the
318 current syntax-table basing on the property of this interval, and
319 update the interval to start further than CHARPOS - or be
320 NULL. We also update lim_property to be the next value of
321 charpos to call this subroutine again - or be before/after the
322 start/end of OBJECT. */
324 void
325 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
326 Lisp_Object object)
328 Lisp_Object tmp_table;
329 int cnt = 0;
330 bool invalidate = true;
331 INTERVAL i;
333 if (init)
335 gl_state.old_prop = Qnil;
336 gl_state.start = gl_state.b_property;
337 gl_state.stop = gl_state.e_property;
338 i = interval_of (charpos, object);
339 gl_state.backward_i = gl_state.forward_i = i;
340 invalidate = false;
341 if (!i)
342 return;
343 /* interval_of updates only ->position of the return value, so
344 update the parents manually to speed up update_interval. */
345 while (!NULL_PARENT (i))
347 if (AM_RIGHT_CHILD (i))
348 INTERVAL_PARENT (i)->position = i->position
349 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
350 - TOTAL_LENGTH (INTERVAL_PARENT (i))
351 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
352 else
353 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
354 + TOTAL_LENGTH (i);
355 i = INTERVAL_PARENT (i);
357 i = gl_state.forward_i;
358 gl_state.b_property = i->position - gl_state.offset;
359 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
360 goto update;
362 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
364 /* We are guaranteed to be called with CHARPOS either in i,
365 or further off. */
366 if (!i)
367 error ("Error in syntax_table logic for to-the-end intervals");
368 else if (charpos < i->position) /* Move left. */
370 if (count > 0)
371 error ("Error in syntax_table logic for intervals <-");
372 /* Update the interval. */
373 i = update_interval (i, charpos);
374 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
376 invalidate = false;
377 gl_state.forward_i = i;
378 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
381 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
383 if (count < 0)
384 error ("Error in syntax_table logic for intervals ->");
385 /* Update the interval. */
386 i = update_interval (i, charpos);
387 if (i->position != gl_state.e_property)
389 invalidate = false;
390 gl_state.backward_i = i;
391 gl_state.b_property = i->position - gl_state.offset;
395 update:
396 tmp_table = textget (i->plist, Qsyntax_table);
398 if (invalidate)
399 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
401 if (invalidate) /* Did not get to adjacent interval. */
402 { /* with the same table => */
403 /* invalidate the old range. */
404 if (count > 0)
406 gl_state.backward_i = i;
407 gl_state.b_property = i->position - gl_state.offset;
409 else
411 gl_state.forward_i = i;
412 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
416 if (!EQ (tmp_table, gl_state.old_prop))
418 gl_state.current_syntax_table = tmp_table;
419 gl_state.old_prop = tmp_table;
420 if (EQ (Fsyntax_table_p (tmp_table), Qt))
422 gl_state.use_global = 0;
424 else if (CONSP (tmp_table))
426 gl_state.use_global = 1;
427 gl_state.global_code = tmp_table;
429 else
431 gl_state.use_global = 0;
432 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
436 while (i)
438 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
440 if (count > 0)
442 gl_state.e_property = i->position - gl_state.offset;
443 gl_state.forward_i = i;
445 else
447 gl_state.b_property
448 = i->position + LENGTH (i) - gl_state.offset;
449 gl_state.backward_i = i;
451 return;
453 else if (cnt == INTERVALS_AT_ONCE)
455 if (count > 0)
457 gl_state.e_property
458 = i->position + LENGTH (i) - gl_state.offset
459 /* e_property at EOB is not set to ZV but to ZV+1, so that
460 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
461 having to check eob between the two. */
462 + (next_interval (i) ? 0 : 1);
463 gl_state.forward_i = i;
465 else
467 gl_state.b_property = i->position - gl_state.offset;
468 gl_state.backward_i = i;
470 return;
472 cnt++;
473 i = count > 0 ? next_interval (i) : previous_interval (i);
475 eassert (i == NULL); /* This property goes to the end. */
476 if (count > 0)
478 gl_state.e_property = gl_state.stop;
479 gl_state.forward_i = i;
481 else
482 gl_state.b_property = gl_state.start;
485 static void
486 parse_sexp_propertize (ptrdiff_t charpos)
488 EMACS_INT zv = ZV;
489 if (syntax_propertize__done <= charpos
490 && syntax_propertize__done < zv)
492 EMACS_INT modiffs = CHARS_MODIFF;
493 safe_call1 (Qinternal__syntax_propertize,
494 make_number (min (zv, 1 + charpos)));
495 if (modiffs != CHARS_MODIFF)
496 error ("parse-sexp-propertize-function modified the buffer!");
497 if (syntax_propertize__done <= charpos
498 && syntax_propertize__done < zv)
499 error ("parse-sexp-propertize-function did not move"
500 " syntax-propertize--done");
501 SETUP_SYNTAX_TABLE (charpos, 1);
503 else if (gl_state.e_property > syntax_propertize__done)
505 gl_state.e_property = syntax_propertize__done;
506 gl_state.e_property_truncated = true;
508 else if (gl_state.e_property_truncated
509 && gl_state.e_property < syntax_propertize__done)
510 { /* When moving backward, e_property might be set without resetting
511 e_property_truncated, so the e_property_truncated flag may
512 occasionally be left raised spuriously. This should be rare. */
513 gl_state.e_property_truncated = false;
514 update_syntax_table_forward (charpos, false, Qnil);
518 void
519 update_syntax_table_forward (ptrdiff_t charpos, bool init,
520 Lisp_Object object)
522 if (gl_state.e_property_truncated)
524 eassert (NILP (object));
525 eassert (charpos >= gl_state.e_property);
526 parse_sexp_propertize (charpos);
528 else
530 update_syntax_table (charpos, 1, init, object);
531 if (NILP (object) && gl_state.e_property > syntax_propertize__done)
532 parse_sexp_propertize (charpos);
536 /* Returns true if char at CHARPOS is quoted.
537 Global syntax-table data should be set up already to be good at CHARPOS
538 or after. On return global syntax data is good for lookup at CHARPOS. */
540 static bool
541 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
543 enum syntaxcode code;
544 ptrdiff_t beg = BEGV;
545 bool quoted = 0;
546 ptrdiff_t orig = charpos;
548 while (charpos > beg)
550 int c;
551 DEC_BOTH (charpos, bytepos);
553 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
554 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
555 code = SYNTAX (c);
556 if (! (code == Scharquote || code == Sescape))
557 break;
559 quoted = !quoted;
562 UPDATE_SYNTAX_TABLE (orig);
563 return quoted;
566 /* Return the bytepos one character before BYTEPOS.
567 We assume that BYTEPOS is not at the start of the buffer. */
569 static ptrdiff_t
570 dec_bytepos (ptrdiff_t bytepos)
572 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
573 return bytepos - 1;
575 DEC_POS (bytepos);
576 return bytepos;
579 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
581 static bool
582 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
584 int c;
585 bool val;
587 DEC_BOTH (pos, pos_byte);
588 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
589 c = FETCH_CHAR (pos_byte);
590 val = SYNTAX_COMEND_FIRST (c);
591 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
592 return val;
595 /* `literal-cache' text properties
596 -------------------------------
597 These are applied to all text between BOB and `literal-cache-hwm'
598 which is in literals. They record what type of literal the current
599 character is in.
601 On a buffer change (when `inhibit-modification-hooks' is nil), any
602 buffer change (including changing text-properties) will reduce
603 `literal-cache-hwm' to the change position, if it is higher. When
604 `inhibit-modification-hooks' is non-nil, only changes to the
605 `syntax-table' text property (possibly via a `category' text property)
606 which affect the scanning of literals cause the setting of
607 `literal-cache-hwm'.
609 The `literal-cache' text property for a literal is applied on the text
610 between just after its opening delimiter and just after its closing
611 delimiter.
613 The value of the `literal-cache' text property is a cons. For a
614 string, its car is the symbol `string' and its cdr is the expected
615 closing delimiter (or ST_STRING_STYLE in the case of a string fence
616 string). For a comment, the car is -1 for a non-nestable comment, or
617 the current nesting depth for a nestable comment. When not in a
618 literal, no `literal-cache' text property exists at that place. These
619 values match the internal values used in `scan_sexps_forward. */
621 DEFUN ("trim-literal-cache", Ftrim_literal_cache, Strim_literal_cache, 0, 1, 0,
622 doc: /* Mark the selected buffer's "comment cache" as invalid from POS.
623 By default, POS is the beginning of the buffer (position 1). If the cache is
624 already invalid from an earlier position than POS, this function has no
625 effect. The return value is the new bound. */)
626 (Lisp_Object pos)
628 ptrdiff_t position, cache_limit;
630 if (!NILP (pos))
632 CHECK_NUMBER (pos);
633 position = max (XINT (pos), 1);
635 else
636 position = 1;
637 cache_limit = XINT (BVAR (current_buffer, literal_cache_hwm));
638 BVAR (current_buffer, literal_cache_hwm)
639 = make_number (min (cache_limit, position));
640 return BVAR (current_buffer, literal_cache_hwm);
643 /* Empty the literal-cache of every buffer whose syntax table is
644 currently set to SYNTAB. */
645 void
646 empty_syntax_tables_buffers_literal_caches (Lisp_Object syntab)
648 Lisp_Object buf, buf_list;
649 Lisp_Object one = make_number (1);
650 struct buffer *b;
652 buf_list = Fbuffer_list (Qnil);
653 while (!NILP (buf_list))
655 buf = XCAR (buf_list);
656 b = XBUFFER (buf);
657 if (EQ (BVAR (b, syntax_table), syntab))
658 BVAR (b, literal_cache_hwm) = one;
659 buf_list = XCDR (buf_list);
663 #define LITERAL_MASK ((1 << Sstring) \
664 | (1 << Sescape) \
665 | (1 << Scharquote) \
666 | (1 << Scomment) \
667 | (1 << Sendcomment) \
668 | (1 << Scomment_fence) \
669 | (1 << Sstring_fence))
671 /* The following returns true if ELT (which will be a raw syntax
672 descriptor (see page "Syntax Table Internals" in the Elisp manual)
673 or nil) represents a syntax which is (potentially) relevant to
674 strings or comments. */
675 INLINE bool
676 SYNTAB_LITERAL (Lisp_Object elt)
678 int ielt;
679 if (!CONSP (elt))
680 return false;
681 ielt = XINT (XCAR (elt));
682 return (ielt & 0xF0000) /* a comment flag is set */
683 || ((1 << (ielt & 0xFF)) & LITERAL_MASK); /* One of Sstring, .... */
686 static
687 bool syntax_table_value_is_interesting_for_literals (Lisp_Object val)
689 ptrdiff_t syntax, code;
690 if (!CONSP (val)
691 || !INTEGERP (XCAR (val)))
692 return false;
693 return SYNTAB_LITERAL (XCAR (val));
696 /* The text property PROP is having its value VAL at position POS in buffer BUF
697 either set or cleared. If this value is relevant to the syntax of literals,
698 reduce the BUF's value of literal_cache_hwm to POS. */
699 void
700 check_literal_cache_hwm_for_prop (ptrdiff_t pos, Lisp_Object prop,
701 Lisp_Object val, Lisp_Object buffer)
703 struct buffer *b;
704 ptrdiff_t hwm;
705 Lisp_Object plist;
707 if (!BUFFERP (buffer))
708 return;
709 b = XBUFFER (buffer);
710 hwm = XINT (BVAR (b, literal_cache_hwm));
711 if (pos >= hwm)
712 return;
714 if (EQ (prop, Qcategory)
715 && SYMBOLP (val))
717 plist = Fsymbol_plist (val);
718 while (CONSP (plist))
720 prop = XCAR (plist);
721 plist = XCDR (plist);
722 if (!CONSP (plist))
723 return;
724 val = XCAR (plist);
725 if (EQ (prop, Qsyntax_table))
726 break;
727 plist = XCDR (plist);
730 if (EQ (prop, Qsyntax_table)
731 && syntax_table_value_is_interesting_for_literals (val))
732 BVAR (b, literal_cache_hwm) = make_number (pos);
735 /* Scan forward over the innards of a containing comment, marking
736 nested comments. FROM/FROM_BYTE, TO delimit the region to be marked.
737 LITERAL_CACHE_VALUE is the value of the `literal-cache' property that
738 was applied to the containing comment. */
739 static void
740 scan_nested_comments_forward (ptrdiff_t from, ptrdiff_t from_byte,
741 ptrdiff_t to,
742 Lisp_Object literal_cache_value)
744 Lisp_Object tem;
745 int comstyle = XINT (XCDR (literal_cache_value));
746 struct lisp_parse_state state;
748 /* Increment the nesting depth. */
749 literal_cache_value =
750 Fcons (make_number (XINT (XCAR (literal_cache_value)) + 1),
751 XCDR (literal_cache_value));
752 /* Make sure our text property value is `eq' to other values which
753 are `equal'. */
754 tem = Fmember (literal_cache_value, Vliteral_cache_values);
755 if (CONSP (tem))
756 literal_cache_value = XCAR (tem);
757 else
758 Vliteral_cache_values = Fcons (literal_cache_value,
759 Vliteral_cache_values);
761 UPDATE_SYNTAX_TABLE_BACKWARD (from);
762 internalize_parse_state (Qnil, &state);
764 while (from < to)
766 scan_sexps_forward (&state, from, from_byte, to,
767 TYPE_MINIMUM (EMACS_INT), false,
768 -1); /* Stop after literal boundary. */
769 from = state.location;
770 from_byte = state.location_byte;
772 if (state.instring != -1)
773 state.instring = -1; /* Ignore string delim we've passed. */
774 else if (state.incomment <= 0
775 || state.comstyle != comstyle)
776 state.incomment = 0; /* Ignore a wrong type comment opener
777 we've passed. */
778 else if (from < to)
780 /* We're at the start of the innards of a nested comment
781 of the right type. We know the next scan will stop at
782 the end of this comment. */
783 scan_sexps_forward (&state, from, from_byte, to,
784 TYPE_MINIMUM (EMACS_INT), false,
785 -1);
786 Fput_text_property (make_number (from),
787 make_number (state.location),
788 Qliteral_cache,
789 literal_cache_value, Qnil);
790 scan_nested_comments_forward (from, from_byte,
791 state.location,
792 literal_cache_value);
793 from = state.location;
794 from_byte = state.location_byte;
799 /* Scan forward over all text between literal-cache-hwm and TO,
800 marking literals (strings and comments) with the `literal-cache'
801 text property. `literal-cache-hwm' is updated to TO. */
802 static void
803 scan_comments_forward_to (ptrdiff_t to, ptrdiff_t to_byte)
805 ptrdiff_t count = SPECPDL_INDEX ();
806 ptrdiff_t hwm, hwm_byte;
807 struct lisp_parse_state state;
808 ptrdiff_t orig_begv = BEGV, orig_begv_byte = BEGV_BYTE;
809 ptrdiff_t tmp, tmp_byte;
810 int c, syntax;
811 enum syntaxcode code;
812 Lisp_Object depth;
813 Lisp_Object literal_cache_value;
814 Lisp_Object tem;
816 hwm = XINT (BVAR (current_buffer, literal_cache_hwm));
818 if (hwm < to)
820 record_unwind_protect (save_restriction_restore,
821 save_restriction_save ());
822 BEGV = BEG; BEGV_BYTE = BEG_BYTE;
824 hwm_byte = CHAR_TO_BYTE (hwm);
825 /* We mustn't start scanning just after the first half of a
826 double character comment starter or ender. */
827 if (hwm > BEG)
829 tmp = hwm; tmp_byte = hwm_byte;
832 DEC_BOTH (tmp, tmp_byte);
833 UPDATE_SYNTAX_TABLE_BACKWARD (tmp);
834 c = FETCH_CHAR_AS_MULTIBYTE (tmp_byte);
835 syntax = SYNTAX_WITH_FLAGS (c);
836 code = SYNTAX (c);
838 while (tmp > BEG
839 && (code == Sescape
840 || (syntax & 0xF0000))); /* Flags `1', `2', `3', `4'. */
841 if (tmp > BEG)
842 INC_BOTH (tmp, tmp_byte);
843 hwm = tmp; hwm_byte = tmp_byte;
846 internalize_parse_state (Qnil, &state);
847 if (hwm > BEG)
848 /* Initialize STATE with the current value of the
849 `literal-cache' text property. */
851 depth = Fget_text_property (make_number (hwm - 1),
852 Qliteral_cache, Qnil);
853 if (CONSP (depth))
855 if (EQ (Fcar (depth), Qstring))
857 state.instring = XINT (Fcdr (depth));
858 state.incomment = 0;
860 else
862 state.instring = -1;
863 state.incomment = XINT (Fcar (depth));
864 state.comstyle = XINT (Fcdr (depth));
870 /* Setup the buffer to write text properties discreetly. */
871 Lisp_Object modified = Fbuffer_modified_p (Qnil);
872 ptrdiff_t count1 = SPECPDL_INDEX ();
874 specbind (Qinhibit_modification_hooks, Qt);
875 specbind (intern ("buffer-undo-list"), Qt);
876 specbind (Qinhibit_read_only, Qt);
877 specbind (Qdeactivate_mark, Qnil);
878 if (NILP (modified))
879 record_unwind_protect
880 ((void (*) (Lisp_Object))Frestore_buffer_modified_p, Qnil);
882 while (hwm < to)
884 /* For each literal we scan, we apply the `literal-cache'
885 property on its innards and closing delimiter. Calculate
886 the value we will use first. */
887 literal_cache_value = (state.instring != -1)
888 ? Fcons (Qstring, make_number (state.instring))
889 : (state.incomment
890 ? Fcons (make_number (state.incomment),
891 make_number (state.comstyle))
892 : Qnil);
893 /* Ensure all `equal' values of literal-cache-value are also `eq'. */
894 if (!NILP (literal_cache_value))
896 tem = Fmember (literal_cache_value, Vliteral_cache_values);
897 if (CONSP (tem))
898 literal_cache_value = XCAR (tem);
899 else
900 Vliteral_cache_values = Fcons (literal_cache_value,
901 Vliteral_cache_values);
904 scan_sexps_forward (&state, hwm, hwm_byte, to,
905 TYPE_MINIMUM (EMACS_INT), false,
906 -1); /* stop after literal boundary */
908 if (!NILP (literal_cache_value))
909 Fput_text_property (make_number (hwm),
910 make_number (state.location),
911 Qliteral_cache,
912 literal_cache_value, Qnil);
913 else
914 Fremove_list_of_text_properties
915 (make_number (hwm),
916 make_number (state.location),
917 Fcons (Qliteral_cache, Qnil), Qnil);
919 if (!NILP (literal_cache_value)
920 && NUMBERP (XCAR (literal_cache_value))
921 && XINT (XCAR (literal_cache_value)) > 0)
922 scan_nested_comments_forward
923 (hwm, hwm_byte, state.location, literal_cache_value);
925 hwm = state.location;
926 hwm_byte = state.location_byte;
928 unbind_to (count1, Qnil);
929 if (NILP (modified))
930 /* Frestore_buffer_modified_p overwrites gl_state, hence: */
931 SETUP_SYNTAX_TABLE (to, -1);
933 BVAR (current_buffer, literal_cache_hwm) = make_number (hwm);
934 unbind_to (count, Qnil);
938 /* Check whether charpos FROM is at the end of a comment.
939 FROM_BYTE is the bytepos corresponding to FROM.
940 Do not move back before STOP.
942 Return true if we find a comment ending at FROM/FROM_BYTE.
944 If successful, store the charpos of the comment's beginning
945 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
947 Global syntax data remains valid for backward search starting at
948 the returned value (or at FROM, if the search was not successful). */
949 static bool
950 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
951 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
952 ptrdiff_t *bytepos_ptr)
954 Lisp_Object depth;
955 ptrdiff_t literal_cache, target_depth, comment_style;
956 Lisp_Object temp;
957 int c;
958 int syntax, code;
960 scan_comments_forward_to (from, from_byte);
961 if (from <= stop)
962 return false;
963 depth = Fget_text_property (make_number (from - 1), Qliteral_cache, Qnil);
964 if (!CONSP (depth) /* nil, not in a literal. */
965 || !INTEGERP (XCAR (depth))) /* A string. */
966 return false;
967 literal_cache = XINT (XCAR (depth));
968 comment_style = XINT (XCDR (depth));
969 if (comment_style != comstyle) /* Wrong sort of comment. This
970 can happen with "*|" at the
971 end of a "||" line comment. */
972 return false;
974 /* literal_cache: -1 is a non-nested comment, otherwise it's
975 the depth of nesting of nested comments. */
976 target_depth = literal_cache < 0 ? 0 : literal_cache - 1;
979 temp = Fprevious_single_property_change (make_number (from),
980 Qliteral_cache, Qnil, Qnil);
981 if (NILP (temp))
982 return false;
983 from = XINT (temp);
985 while (from > stop
986 && (depth = Fget_text_property (make_number (from - 1),
987 Qliteral_cache, Qnil),
988 !NILP (depth))
989 && XINT (XCAR (depth)) > target_depth);
990 if (from <= stop)
991 return false;
992 from_byte = CHAR_TO_BYTE (from);
994 /* Having passed back over the body of the comment, we should now find a
995 comment opener. */
996 DEC_BOTH (from, from_byte);
997 UPDATE_SYNTAX_TABLE_BACKWARD (from);
999 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1000 syntax = SYNTAX_WITH_FLAGS (c);
1001 code = SYNTAX (c);
1002 if (code != Scomment && code != Scomment_fence)
1004 if (from <= stop)
1005 return false;
1006 if (!SYNTAX_FLAGS_COMSTART_SECOND (syntax))
1007 return false;
1008 DEC_BOTH (from, from_byte);
1009 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1010 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1011 syntax = SYNTAX_WITH_FLAGS (c);
1012 if (!SYNTAX_FLAGS_COMSTART_FIRST (syntax))
1013 return false;
1015 *charpos_ptr = from;
1016 *bytepos_ptr = from_byte;
1017 return true;
1020 /* If the two syntax entries OLD_SYN and NEW_SYN would parse strings
1021 or comments differently return true, otherwise return nil. */
1022 INLINE bool
1023 literally_different (Lisp_Object old_syn, Lisp_Object new_syn)
1025 bool old_literality = SYNTAB_LITERAL (old_syn),
1026 new_literality = SYNTAB_LITERAL (new_syn);
1027 return (old_literality != new_literality)
1028 || (old_literality
1029 && (!EQ (XCAR (old_syn), XCAR (new_syn))));
1032 /* If there is a character position in the range [START, END] for
1033 whose syntaxes in syntax tables OLD and NEW strings or comments
1034 might be parsed differently, return the lowest character for which
1035 this holds. Otherwise, return -1. */
1037 syntax_table_ranges_differ_literally_p (Lisp_Object old, Lisp_Object new,
1038 int start, int end)
1040 int old_from, new_from, old_to, new_to;
1041 Lisp_Object old_syn, new_syn;
1042 bool old_literality, new_literality;
1044 new_from = old_from = start;
1045 new_to = old_to = -1;
1047 while ((old_from < end) && (new_from < end))
1049 if (old_from == new_from)
1051 old_syn = char_table_ref_and_range_with_parents (old, old_from,
1052 &old_from, &old_to);
1053 new_syn = char_table_ref_and_range_with_parents (new, new_from,
1054 &new_from, &new_to);
1055 if (literally_different (old_syn, new_syn))
1056 return old_from;
1057 old_from = old_to + 1;
1058 new_from = new_to + 1;
1059 old_to = -1;
1060 new_to = -1;
1062 else if (old_from < new_from)
1064 old_syn = char_table_ref_and_range_with_parents (old, old_from,
1065 &old_from, &old_to);
1066 if (literally_different (old_syn, new_syn))
1067 return old_from;
1068 old_from = old_to + 1;
1069 old_to = -1;
1071 else
1073 new_syn = char_table_ref_and_range_with_parents (new, new_from,
1074 &new_from, &new_to);
1075 if (literally_different (old_syn, new_syn))
1076 return new_from;
1077 new_from = new_to + 1;
1078 new_to = -1;
1081 return -1;
1084 DEFUN ("least-literal-difference-between-syntax-tables",
1085 Fleast_literal_difference_between_syntax_tables,
1086 Sleast_literal_difference_between_syntax_tables,
1087 2, 2, 0,
1088 doc: /* Lowest char whose different syntaxes in OLD and NEW parse literals differently.
1089 OLD and NEW are syntax tables. */)
1090 (Lisp_Object old, Lisp_Object new)
1092 int c;
1094 check_syntax_table (old);
1095 check_syntax_table (new);
1096 c = syntax_table_ranges_differ_literally_p (old, new, 0, MAX_CHAR + 1);
1097 if (c >= 0)
1098 return make_number (c);
1099 return Qnil;
1102 DEFUN ("syntax-tables-literally-different-p",
1103 Fsyntax_tables_literally_different_p,
1104 Ssyntax_tables_literally_different_p,
1105 2, 2, 0,
1106 doc: /* Will syntax tables OLD and NEW parse literals differently?
1107 Return t when OLD and NEW might parse comments and strings differently,
1108 otherwise nil. (Use `least-literal-difference-between-syntax-tables'
1109 to locate a character position where the tables differ.) */)
1110 (Lisp_Object old, Lisp_Object new)
1112 Lisp_Object extra;
1114 check_syntax_table (old);
1115 check_syntax_table (new);
1116 /* Check to see if there is a cached relationship between the tables. */
1117 if (Fmemq (new, XCHAR_TABLE (old)->extras[0]))
1118 return Qnil;
1119 if (Fmemq (new, XCHAR_TABLE (old)->extras[1]))
1120 return Qt;
1121 /* the two tables have no known relationship, so we'll have
1122 laboriously to compare them. */
1123 if (syntax_table_ranges_differ_literally_p (old, new, 0, MAX_CHAR + 1) >= 0)
1125 /* mark the "literally different" relationship between the OLD and
1126 NEW syntax tables. */
1127 extra = Fcons (new, XCHAR_TABLE (old)->extras[1]);
1128 XCHAR_TABLE (old)->extras[1] = extra;
1129 extra = Fcons (old, XCHAR_TABLE (new)->extras[1]);
1130 XCHAR_TABLE (new)->extras[1] = extra;
1131 return Qt;
1133 else
1135 /* mark the "not literally different" relationship between the OLD
1136 and NEW syntax tables. */
1137 extra = Fcons (new, XCHAR_TABLE (old)->extras[0]);
1138 XCHAR_TABLE (old)->extras[0] = extra;
1139 extra = Fcons (old, XCHAR_TABLE (new)->extras[0]);
1140 XCHAR_TABLE (new)->extras[0] = extra;
1141 return Qnil;
1145 /* If any character in the range [START, END) has an entry in syntax
1146 table SYNTAB which is relevant to literal parsing, return true,
1147 else return false. */
1148 bool
1149 syntax_table_value_range_is_interesting_for_literals (Lisp_Object syntab,
1150 int start, int end)
1152 int from, to;
1153 Lisp_Object syn;
1155 from = start;
1156 to = end;
1157 while (from < to)
1159 syn = char_table_ref_and_range_with_parents (syntab, from, &from, &to);
1160 if (SYNTAB_LITERAL (syn))
1161 return true;
1162 from = to + 1;
1163 to = end;
1165 return false;
1169 /* In the syntax table SYNTAB, in the 0th and 1st extra slots are
1170 lists of other syntax tables which are known to be "literally the
1171 same" and "literally different" respectively. Those other tables
1172 will each contain SYNTAB in their extra slots. Remove all these
1173 syntax tables from all these extra slots; this will leave both of
1174 the slots on SYNTAB nil. */
1175 void
1176 break_off_syntax_tables_literal_relations (Lisp_Object syntab)
1178 struct Lisp_Char_Table *c = XCHAR_TABLE (syntab);
1179 Lisp_Object remote_tab;
1180 struct Lisp_Char_Table *r;
1181 Lisp_Object syntab_extra, remote_extra;
1183 syntab_extra = c->extras[0];
1184 while (!NILP (syntab_extra))
1186 remote_tab = XCAR (syntab_extra);
1187 r = XCHAR_TABLE (remote_tab);
1188 remote_extra = r->extras[0];
1189 r->extras[0] = Fdelq (syntab, remote_extra);
1190 syntab_extra = XCDR (syntab_extra);
1192 c->extras[0] = Qnil;
1194 syntab_extra = c->extras[1];
1195 while (!NILP (syntab_extra))
1197 remote_tab = XCAR (syntab_extra);
1198 r = XCHAR_TABLE (remote_tab);
1199 remote_extra = r->extras[1];
1200 r->extras[1] = Fdelq (syntab, remote_extra);
1201 syntab_extra = XCDR (syntab_extra);
1203 c->extras[1] = Qnil;
1207 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
1208 doc: /* Return t if OBJECT is a syntax table.
1209 Currently, any char-table counts as a syntax table. */)
1210 (Lisp_Object object)
1212 if (CHAR_TABLE_P (object)
1213 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
1214 return Qt;
1215 return Qnil;
1218 static void
1219 check_syntax_table (Lisp_Object obj)
1221 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
1222 Qsyntax_table_p, obj);
1225 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
1226 doc: /* Return the current syntax table.
1227 This is the one specified by the current buffer. */)
1228 (void)
1230 return BVAR (current_buffer, syntax_table);
1233 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
1234 Sstandard_syntax_table, 0, 0, 0,
1235 doc: /* Return the standard syntax table.
1236 This is the one used for new buffers. */)
1237 (void)
1239 return Vstandard_syntax_table;
1242 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1243 doc: /* Construct a new syntax table and return it.
1244 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1245 (Lisp_Object table)
1247 Lisp_Object copy;
1249 if (!NILP (table))
1250 check_syntax_table (table);
1251 else
1252 table = Vstandard_syntax_table;
1254 copy = Fcopy_sequence (table);
1256 /* Only the standard syntax table should have a default element.
1257 Other syntax tables should inherit from parents instead. */
1258 set_char_table_defalt (copy, Qnil);
1260 /* Copied syntax tables should all have parents.
1261 If we copied one with no parent, such as the standard syntax table,
1262 use the standard syntax table as the copy's parent. */
1263 if (NILP (XCHAR_TABLE (copy)->parent))
1264 Fset_char_table_parent (copy, Vstandard_syntax_table);
1265 return copy;
1268 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1269 doc: /* Select a new syntax table for the current buffer.
1270 One argument, a syntax table. */)
1271 (Lisp_Object table)
1273 int idx;
1274 check_syntax_table (table);
1275 if (Fsyntax_table_p (BVAR (current_buffer, syntax_table))
1276 && !NILP (Fsyntax_tables_literally_different_p
1277 (BVAR (current_buffer, syntax_table), table)))
1278 Ftrim_literal_cache (Qnil);
1279 bset_syntax_table (current_buffer, table);
1280 /* Indicate that this buffer now has a specified syntax table. */
1281 idx = PER_BUFFER_VAR_IDX (syntax_table);
1282 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1283 return table;
1286 /* Convert a letter which signifies a syntax code
1287 into the code it signifies.
1288 This is used by modify-syntax-entry, and other things. */
1290 unsigned char const syntax_spec_code[0400] =
1291 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1292 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1293 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1294 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1295 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1296 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1297 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1298 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1299 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1300 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1301 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1302 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1303 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1304 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1305 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1306 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1309 /* Indexed by syntax code, give the letter that describes it. */
1311 char const syntax_code_spec[16] =
1313 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1314 '!', '|'
1317 /* Indexed by syntax code, give the object (cons of syntax code and
1318 nil) to be stored in syntax table. Since these objects can be
1319 shared among syntax tables, we generate them in advance. By
1320 sharing objects, the function `describe-syntax' can give a more
1321 compact listing. */
1322 static Lisp_Object Vsyntax_code_object;
1325 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1326 doc: /* Return the syntax code of CHARACTER, described by a character.
1327 For example, if CHARACTER is a word constituent, the
1328 character `w' (119) is returned.
1329 The characters that correspond to various syntax codes
1330 are listed in the documentation of `modify-syntax-entry'. */)
1331 (Lisp_Object character)
1333 int char_int;
1334 CHECK_CHARACTER (character);
1335 char_int = XINT (character);
1336 SETUP_BUFFER_SYNTAX_TABLE ();
1337 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1340 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1341 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1342 (Lisp_Object character)
1344 int char_int;
1345 enum syntaxcode code;
1346 CHECK_CHARACTER (character);
1347 char_int = XINT (character);
1348 SETUP_BUFFER_SYNTAX_TABLE ();
1349 code = SYNTAX (char_int);
1350 if (code == Sopen || code == Sclose)
1351 return SYNTAX_MATCH (char_int);
1352 return Qnil;
1355 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1356 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1357 STRING should be a string of the form allowed as argument of
1358 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1359 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1360 the value of a `syntax-table' text property. */)
1361 (Lisp_Object string)
1363 const unsigned char *p;
1364 int val;
1365 Lisp_Object match;
1367 CHECK_STRING (string);
1369 p = SDATA (string);
1370 val = syntax_spec_code[*p++];
1371 if (val == 0377)
1372 error ("Invalid syntax description letter: %c", p[-1]);
1374 if (val == Sinherit)
1375 return Qnil;
1377 if (*p)
1379 int len;
1380 int character = STRING_CHAR_AND_LENGTH (p, len);
1381 XSETINT (match, character);
1382 if (XFASTINT (match) == ' ')
1383 match = Qnil;
1384 p += len;
1386 else
1387 match = Qnil;
1389 while (*p)
1390 switch (*p++)
1392 case '1':
1393 val |= 1 << 16;
1394 break;
1396 case '2':
1397 val |= 1 << 17;
1398 break;
1400 case '3':
1401 val |= 1 << 18;
1402 break;
1404 case '4':
1405 val |= 1 << 19;
1406 break;
1408 case 'p':
1409 val |= 1 << 20;
1410 break;
1412 case 'b':
1413 val |= 1 << 21;
1414 break;
1416 case 'n':
1417 val |= 1 << 22;
1418 break;
1420 case 'c':
1421 val |= 1 << 23;
1422 break;
1425 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1426 return AREF (Vsyntax_code_object, val);
1427 else
1428 /* Since we can't use a shared object, let's make a new one. */
1429 return Fcons (make_number (val), match);
1432 /* I really don't know why this is interactive
1433 help-form should at least be made useful whilst reading the second arg. */
1434 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1435 "cSet syntax for character: \nsSet syntax for %s to: ",
1436 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1437 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1438 the current buffer's syntax table.
1439 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1440 in the range MIN to MAX are changed.
1441 The first character of NEWENTRY should be one of the following:
1442 Space or - whitespace syntax. w word constituent.
1443 _ symbol constituent. . punctuation.
1444 ( open-parenthesis. ) close-parenthesis.
1445 " string quote. \\ escape.
1446 $ paired delimiter. \\=' expression quote or prefix operator.
1447 < comment starter. > comment ender.
1448 / character-quote. @ inherit from parent table.
1449 | generic string fence. ! generic comment fence.
1451 Only single-character comment start and end sequences are represented thus.
1452 Two-character sequences are represented as described below.
1453 The second character of NEWENTRY is the matching parenthesis,
1454 used only if the first character is `(' or `)'.
1455 Any additional characters are flags.
1456 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1457 1 means CHAR is the start of a two-char comment start sequence.
1458 2 means CHAR is the second character of such a sequence.
1459 3 means CHAR is the start of a two-char comment end sequence.
1460 4 means CHAR is the second character of such a sequence.
1462 There can be several orthogonal comment sequences. This is to support
1463 language modes such as C++. By default, all comment sequences are of style
1464 a, but you can set the comment sequence style to b (on the second character
1465 of a comment-start, and the first character of a comment-end sequence) and/or
1466 c (on any of its chars) using this flag:
1467 b means CHAR is part of comment sequence b.
1468 c means CHAR is part of comment sequence c.
1469 n means CHAR is part of a nestable comment sequence.
1471 p means CHAR is a prefix character for `backward-prefix-chars';
1472 such characters are treated as whitespace when they occur
1473 between expressions.
1474 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1475 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1477 if (CONSP (c))
1479 CHECK_CHARACTER_CAR (c);
1480 CHECK_CHARACTER_CDR (c);
1482 else
1483 CHECK_CHARACTER (c);
1485 if (NILP (syntax_table))
1486 syntax_table = BVAR (current_buffer, syntax_table);
1487 else
1488 check_syntax_table (syntax_table);
1490 newentry = Fstring_to_syntax (newentry);
1491 if (SYNTAB_LITERAL (newentry)
1492 || (CONSP (c)
1493 ? syntax_table_value_range_is_interesting_for_literals
1494 (syntax_table, XINT (XCAR(c)), XINT (XCDR (c)))
1495 : (SYNTAB_LITERAL (c))))
1497 empty_syntax_tables_buffers_literal_caches (syntax_table);
1498 break_off_syntax_tables_literal_relations (syntax_table);
1501 if (CONSP (c))
1502 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1503 else
1504 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1506 /* We clear the regexp cache, since character classes can now have
1507 different values from those in the compiled regexps.*/
1508 clear_regexp_cache ();
1510 return Qnil;
1514 /* Dump syntax table to buffer in human-readable format */
1516 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1517 Sinternal_describe_syntax_value, 1, 1, 0,
1518 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1519 (Lisp_Object syntax)
1521 int code, syntax_code;
1522 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1523 char str[2];
1524 Lisp_Object first, match_lisp, value = syntax;
1526 if (NILP (value))
1528 insert_string ("default");
1529 return syntax;
1532 if (CHAR_TABLE_P (value))
1534 insert_string ("deeper char-table ...");
1535 return syntax;
1538 if (!CONSP (value))
1540 insert_string ("invalid");
1541 return syntax;
1544 first = XCAR (value);
1545 match_lisp = XCDR (value);
1547 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1549 insert_string ("invalid");
1550 return syntax;
1553 syntax_code = XINT (first) & INT_MAX;
1554 code = syntax_code & 0377;
1555 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1556 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1557 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1558 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1559 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1560 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1561 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1562 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1564 if (Smax <= code)
1566 insert_string ("invalid");
1567 return syntax;
1570 str[0] = syntax_code_spec[code], str[1] = 0;
1571 insert (str, 1);
1573 if (NILP (match_lisp))
1574 insert (" ", 1);
1575 else
1576 insert_char (XINT (match_lisp));
1578 if (start1)
1579 insert ("1", 1);
1580 if (start2)
1581 insert ("2", 1);
1583 if (end1)
1584 insert ("3", 1);
1585 if (end2)
1586 insert ("4", 1);
1588 if (prefix)
1589 insert ("p", 1);
1590 if (comstyleb)
1591 insert ("b", 1);
1592 if (comstylec)
1593 insert ("c", 1);
1594 if (comnested)
1595 insert ("n", 1);
1597 insert_string ("\twhich means: ");
1599 switch (code)
1601 case Swhitespace:
1602 insert_string ("whitespace"); break;
1603 case Spunct:
1604 insert_string ("punctuation"); break;
1605 case Sword:
1606 insert_string ("word"); break;
1607 case Ssymbol:
1608 insert_string ("symbol"); break;
1609 case Sopen:
1610 insert_string ("open"); break;
1611 case Sclose:
1612 insert_string ("close"); break;
1613 case Squote:
1614 insert_string ("prefix"); break;
1615 case Sstring:
1616 insert_string ("string"); break;
1617 case Smath:
1618 insert_string ("math"); break;
1619 case Sescape:
1620 insert_string ("escape"); break;
1621 case Scharquote:
1622 insert_string ("charquote"); break;
1623 case Scomment:
1624 insert_string ("comment"); break;
1625 case Sendcomment:
1626 insert_string ("endcomment"); break;
1627 case Sinherit:
1628 insert_string ("inherit"); break;
1629 case Scomment_fence:
1630 insert_string ("comment fence"); break;
1631 case Sstring_fence:
1632 insert_string ("string fence"); break;
1633 default:
1634 insert_string ("invalid");
1635 return syntax;
1638 if (!NILP (match_lisp))
1640 insert_string (", matches ");
1641 insert_char (XINT (match_lisp));
1644 if (start1)
1645 insert_string (",\n\t is the first character of a comment-start sequence");
1646 if (start2)
1647 insert_string (",\n\t is the second character of a comment-start sequence");
1649 if (end1)
1650 insert_string (",\n\t is the first character of a comment-end sequence");
1651 if (end2)
1652 insert_string (",\n\t is the second character of a comment-end sequence");
1653 if (comstyleb)
1654 insert_string (" (comment style b)");
1655 if (comstylec)
1656 insert_string (" (comment style c)");
1657 if (comnested)
1658 insert_string (" (nestable)");
1660 if (prefix)
1662 AUTO_STRING (prefixdoc,
1663 ",\n\t is a prefix character for `backward-prefix-chars'");
1664 insert1 (Fsubstitute_command_keys (prefixdoc));
1667 return syntax;
1670 /* Return the position across COUNT words from FROM.
1671 If that many words cannot be found before the end of the buffer, return 0.
1672 COUNT negative means scan backward and stop at word beginning. */
1674 ptrdiff_t
1675 scan_words (ptrdiff_t from, EMACS_INT count)
1677 ptrdiff_t beg = BEGV;
1678 ptrdiff_t end = ZV;
1679 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1680 enum syntaxcode code;
1681 int ch0, ch1;
1682 Lisp_Object func, pos;
1684 SETUP_SYNTAX_TABLE (from, count);
1686 while (count > 0)
1688 while (true)
1690 if (from == end)
1691 return 0;
1692 UPDATE_SYNTAX_TABLE_FORWARD (from);
1693 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1694 code = SYNTAX (ch0);
1695 INC_BOTH (from, from_byte);
1696 if (words_include_escapes
1697 && (code == Sescape || code == Scharquote))
1698 break;
1699 if (code == Sword)
1700 break;
1701 rarely_quit (from);
1703 /* Now CH0 is a character which begins a word and FROM is the
1704 position of the next character. */
1705 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1706 if (! NILP (Ffboundp (func)))
1708 pos = call2 (func, make_number (from - 1), make_number (end));
1709 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1711 from = XINT (pos);
1712 from_byte = CHAR_TO_BYTE (from);
1715 else
1717 while (1)
1719 if (from == end) break;
1720 UPDATE_SYNTAX_TABLE_FORWARD (from);
1721 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1722 code = SYNTAX (ch1);
1723 if ((code != Sword
1724 && (! words_include_escapes
1725 || (code != Sescape && code != Scharquote)))
1726 || word_boundary_p (ch0, ch1))
1727 break;
1728 INC_BOTH (from, from_byte);
1729 ch0 = ch1;
1730 rarely_quit (from);
1733 count--;
1735 while (count < 0)
1737 while (true)
1739 if (from == beg)
1740 return 0;
1741 DEC_BOTH (from, from_byte);
1742 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1743 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1744 code = SYNTAX (ch1);
1745 if (words_include_escapes
1746 && (code == Sescape || code == Scharquote))
1747 break;
1748 if (code == Sword)
1749 break;
1750 rarely_quit (from);
1752 /* Now CH1 is a character which ends a word and FROM is the
1753 position of it. */
1754 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1755 if (! NILP (Ffboundp (func)))
1757 pos = call2 (func, make_number (from), make_number (beg));
1758 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1760 from = XINT (pos);
1761 from_byte = CHAR_TO_BYTE (from);
1764 else
1766 while (1)
1768 if (from == beg)
1769 break;
1770 DEC_BOTH (from, from_byte);
1771 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1772 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1773 code = SYNTAX (ch0);
1774 if ((code != Sword
1775 && (! words_include_escapes
1776 || (code != Sescape && code != Scharquote)))
1777 || word_boundary_p (ch0, ch1))
1779 INC_BOTH (from, from_byte);
1780 break;
1782 ch1 = ch0;
1783 rarely_quit (from);
1786 count++;
1789 return from;
1792 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1793 doc: /* Move point forward ARG words (backward if ARG is negative).
1794 If ARG is omitted or nil, move point forward one word.
1795 Normally returns t.
1796 If an edge of the buffer or a field boundary is reached, point is
1797 left there and the function returns nil. Field boundaries are not
1798 noticed if `inhibit-field-text-motion' is non-nil.
1800 The word boundaries are normally determined by the buffer's syntax
1801 table, but `find-word-boundary-function-table', such as set up
1802 by `subword-mode', can change that. If a Lisp program needs to
1803 move by words determined strictly by the syntax table, it should
1804 use `forward-word-strictly' instead. */)
1805 (Lisp_Object arg)
1807 Lisp_Object tmp;
1808 ptrdiff_t orig_val, val;
1810 if (NILP (arg))
1811 XSETFASTINT (arg, 1);
1812 else
1813 CHECK_NUMBER (arg);
1815 val = orig_val = scan_words (PT, XINT (arg));
1816 if (! orig_val)
1817 val = XINT (arg) > 0 ? ZV : BEGV;
1819 /* Avoid jumping out of an input field. */
1820 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1821 Qnil, Qnil, Qnil);
1822 val = XFASTINT (tmp);
1824 SET_PT (val);
1825 return val == orig_val ? Qt : Qnil;
1828 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1829 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1830 STRING is like the inside of a `[...]' in a regular expression
1831 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1832 (but not at the end of a range; quoting is never needed there).
1833 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1834 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1835 Char classes, e.g. `[:alpha:]', are supported.
1837 Returns the distance traveled, either zero or positive. */)
1838 (Lisp_Object string, Lisp_Object lim)
1840 return skip_chars (1, string, lim, 1);
1843 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1844 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1845 See `skip-chars-forward' for details.
1846 Returns the distance traveled, either zero or negative. */)
1847 (Lisp_Object string, Lisp_Object lim)
1849 return skip_chars (0, string, lim, 1);
1852 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1853 doc: /* Move point forward across chars in specified syntax classes.
1854 SYNTAX is a string of syntax code characters.
1855 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1856 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1857 This function returns the distance traveled, either zero or positive. */)
1858 (Lisp_Object syntax, Lisp_Object lim)
1860 return skip_syntaxes (1, syntax, lim);
1863 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1864 doc: /* Move point backward across chars in specified syntax classes.
1865 SYNTAX is a string of syntax code characters.
1866 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1867 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1868 This function returns either zero or a negative number, and the absolute value
1869 of this is the distance traveled. */)
1870 (Lisp_Object syntax, Lisp_Object lim)
1872 return skip_syntaxes (0, syntax, lim);
1875 static Lisp_Object
1876 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1877 bool handle_iso_classes)
1879 int c;
1880 char fastmap[0400];
1881 /* Store the ranges of non-ASCII characters. */
1882 int *char_ranges UNINIT;
1883 int n_char_ranges = 0;
1884 bool negate = 0;
1885 ptrdiff_t i, i_byte;
1886 /* True if the current buffer is multibyte and the region contains
1887 non-ASCII chars. */
1888 bool multibyte;
1889 /* True if STRING is multibyte and it contains non-ASCII chars. */
1890 bool string_multibyte;
1891 ptrdiff_t size_byte;
1892 const unsigned char *str;
1893 int len;
1894 Lisp_Object iso_classes;
1895 USE_SAFE_ALLOCA;
1897 CHECK_STRING (string);
1898 iso_classes = Qnil;
1900 if (NILP (lim))
1901 XSETINT (lim, forwardp ? ZV : BEGV);
1902 else
1903 CHECK_NUMBER_COERCE_MARKER (lim);
1905 /* In any case, don't allow scan outside bounds of buffer. */
1906 if (XINT (lim) > ZV)
1907 XSETFASTINT (lim, ZV);
1908 if (XINT (lim) < BEGV)
1909 XSETFASTINT (lim, BEGV);
1911 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1912 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1913 string_multibyte = SBYTES (string) > SCHARS (string);
1915 memset (fastmap, 0, sizeof fastmap);
1917 str = SDATA (string);
1918 size_byte = SBYTES (string);
1920 i_byte = 0;
1921 if (i_byte < size_byte
1922 && SREF (string, 0) == '^')
1924 negate = 1; i_byte++;
1927 /* Find the characters specified and set their elements of fastmap.
1928 Handle backslashes and ranges specially.
1930 If STRING contains non-ASCII characters, setup char_ranges for
1931 them and use fastmap only for their leading codes. */
1933 if (! string_multibyte)
1935 bool string_has_eight_bit = 0;
1937 /* At first setup fastmap. */
1938 while (i_byte < size_byte)
1940 if (handle_iso_classes)
1942 const unsigned char *ch = str + i_byte;
1943 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1944 if (cc == 0)
1945 error ("Invalid ISO C character class");
1946 if (cc != -1)
1948 iso_classes = Fcons (make_number (cc), iso_classes);
1949 i_byte = ch - str;
1950 continue;
1954 c = str[i_byte++];
1956 if (c == '\\')
1958 if (i_byte == size_byte)
1959 break;
1961 c = str[i_byte++];
1963 /* Treat `-' as range character only if another character
1964 follows. */
1965 if (i_byte + 1 < size_byte
1966 && str[i_byte] == '-')
1968 int c2;
1970 /* Skip over the dash. */
1971 i_byte++;
1973 /* Get the end of the range. */
1974 c2 = str[i_byte++];
1975 if (c2 == '\\'
1976 && i_byte < size_byte)
1977 c2 = str[i_byte++];
1979 if (c <= c2)
1981 int lim2 = c2 + 1;
1982 while (c < lim2)
1983 fastmap[c++] = 1;
1984 if (! ASCII_CHAR_P (c2))
1985 string_has_eight_bit = 1;
1988 else
1990 fastmap[c] = 1;
1991 if (! ASCII_CHAR_P (c))
1992 string_has_eight_bit = 1;
1996 /* If the current range is multibyte and STRING contains
1997 eight-bit chars, arrange fastmap and setup char_ranges for
1998 the corresponding multibyte chars. */
1999 if (multibyte && string_has_eight_bit)
2001 char *p1;
2002 char himap[0200 + 1];
2003 memcpy (himap, fastmap + 0200, 0200);
2004 himap[0200] = 0;
2005 memset (fastmap + 0200, 0, 0200);
2006 SAFE_NALLOCA (char_ranges, 2, 128);
2007 i = 0;
2009 while ((p1 = memchr (himap + i, 1, 0200 - i)))
2011 /* Deduce the next range C..C2 from the next clump of 1s
2012 in HIMAP starting with &HIMAP[I]. HIMAP is the high
2013 order half of the old FASTMAP. */
2014 int c2, leading_code;
2015 i = p1 - himap;
2016 c = BYTE8_TO_CHAR (i + 0200);
2017 i += strlen (p1);
2018 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
2020 char_ranges[n_char_ranges++] = c;
2021 char_ranges[n_char_ranges++] = c2;
2022 leading_code = CHAR_LEADING_CODE (c);
2023 memset (fastmap + leading_code, 1,
2024 CHAR_LEADING_CODE (c2) - leading_code + 1);
2028 else /* STRING is multibyte */
2030 SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
2032 while (i_byte < size_byte)
2034 int leading_code = str[i_byte];
2036 if (handle_iso_classes)
2038 const unsigned char *ch = str + i_byte;
2039 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
2040 if (cc == 0)
2041 error ("Invalid ISO C character class");
2042 if (cc != -1)
2044 iso_classes = Fcons (make_number (cc), iso_classes);
2045 i_byte = ch - str;
2046 continue;
2050 if (leading_code== '\\')
2052 if (++i_byte == size_byte)
2053 break;
2055 leading_code = str[i_byte];
2057 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
2058 i_byte += len;
2061 /* Treat `-' as range character only if another character
2062 follows. */
2063 if (i_byte + 1 < size_byte
2064 && str[i_byte] == '-')
2066 int c2, leading_code2;
2068 /* Skip over the dash. */
2069 i_byte++;
2071 /* Get the end of the range. */
2072 leading_code2 = str[i_byte];
2073 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
2074 i_byte += len;
2076 if (c2 == '\\'
2077 && i_byte < size_byte)
2079 leading_code2 = str[i_byte];
2080 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
2081 i_byte += len;
2084 if (c > c2)
2085 continue;
2086 if (ASCII_CHAR_P (c))
2088 while (c <= c2 && c < 0x80)
2089 fastmap[c++] = 1;
2090 leading_code = CHAR_LEADING_CODE (c);
2092 if (! ASCII_CHAR_P (c))
2094 int lim2 = leading_code2 + 1;
2095 while (leading_code < lim2)
2096 fastmap[leading_code++] = 1;
2097 if (c <= c2)
2099 char_ranges[n_char_ranges++] = c;
2100 char_ranges[n_char_ranges++] = c2;
2104 else
2106 if (ASCII_CHAR_P (c))
2107 fastmap[c] = 1;
2108 else
2110 fastmap[leading_code] = 1;
2111 char_ranges[n_char_ranges++] = c;
2112 char_ranges[n_char_ranges++] = c;
2117 /* If the current range is unibyte and STRING contains non-ASCII
2118 chars, arrange fastmap for the corresponding unibyte
2119 chars. */
2121 if (! multibyte && n_char_ranges > 0)
2123 memset (fastmap + 0200, 0, 0200);
2124 for (i = 0; i < n_char_ranges; i += 2)
2126 int c1 = char_ranges[i];
2127 int lim2 = char_ranges[i + 1] + 1;
2129 for (; c1 < lim2; c1++)
2131 int b = CHAR_TO_BYTE_SAFE (c1);
2132 if (b >= 0)
2133 fastmap[b] = 1;
2139 /* If ^ was the first character, complement the fastmap. */
2140 if (negate)
2142 if (! multibyte)
2143 for (i = 0; i < sizeof fastmap; i++)
2144 fastmap[i] ^= 1;
2145 else
2147 for (i = 0; i < 0200; i++)
2148 fastmap[i] ^= 1;
2149 /* All non-ASCII chars possibly match. */
2150 for (; i < sizeof fastmap; i++)
2151 fastmap[i] = 1;
2156 ptrdiff_t start_point = PT;
2157 ptrdiff_t pos = PT;
2158 ptrdiff_t pos_byte = PT_BYTE;
2159 unsigned char *p = PT_ADDR, *endp, *stop;
2161 if (forwardp)
2163 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2164 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
2166 else
2168 endp = CHAR_POS_ADDR (XINT (lim));
2169 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
2172 /* This code may look up syntax tables using functions that rely on the
2173 gl_state object. To make sure this object is not out of date,
2174 let's initialize it manually.
2175 We ignore syntax-table text-properties for now, since that's
2176 what we've done in the past. */
2177 SETUP_BUFFER_SYNTAX_TABLE ();
2178 if (forwardp)
2180 if (multibyte)
2181 while (1)
2183 int nbytes;
2185 if (p >= stop)
2187 if (p >= endp)
2188 break;
2189 p = GAP_END_ADDR;
2190 stop = endp;
2192 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2193 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2195 if (negate)
2196 break;
2197 else
2198 goto fwd_ok;
2201 if (! fastmap[*p])
2202 break;
2203 if (! ASCII_CHAR_P (c))
2205 /* As we are looking at a multibyte character, we
2206 must look up the character in the table
2207 CHAR_RANGES. If there's no data in the table,
2208 that character is not what we want to skip. */
2210 /* The following code do the right thing even if
2211 n_char_ranges is zero (i.e. no data in
2212 CHAR_RANGES). */
2213 for (i = 0; i < n_char_ranges; i += 2)
2214 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2215 break;
2216 if (!(negate ^ (i < n_char_ranges)))
2217 break;
2219 fwd_ok:
2220 p += nbytes, pos++, pos_byte += nbytes;
2221 rarely_quit (pos);
2223 else
2224 while (true)
2226 if (p >= stop)
2228 if (p >= endp)
2229 break;
2230 p = GAP_END_ADDR;
2231 stop = endp;
2234 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
2236 if (negate)
2237 break;
2238 else
2239 goto fwd_unibyte_ok;
2242 if (!fastmap[*p])
2243 break;
2244 fwd_unibyte_ok:
2245 p++, pos++, pos_byte++;
2246 rarely_quit (pos);
2249 else
2251 if (multibyte)
2252 while (true)
2254 if (p <= stop)
2256 if (p <= endp)
2257 break;
2258 p = GPT_ADDR;
2259 stop = endp;
2261 unsigned char *prev_p = p;
2263 p--;
2264 while (stop <= p && ! CHAR_HEAD_P (*p));
2266 c = STRING_CHAR (p);
2268 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2270 if (negate)
2271 break;
2272 else
2273 goto back_ok;
2276 if (! fastmap[*p])
2277 break;
2278 if (! ASCII_CHAR_P (c))
2280 /* See the comment in the previous similar code. */
2281 for (i = 0; i < n_char_ranges; i += 2)
2282 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2283 break;
2284 if (!(negate ^ (i < n_char_ranges)))
2285 break;
2287 back_ok:
2288 pos--, pos_byte -= prev_p - p;
2289 rarely_quit (pos);
2291 else
2292 while (true)
2294 if (p <= stop)
2296 if (p <= endp)
2297 break;
2298 p = GPT_ADDR;
2299 stop = endp;
2302 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2304 if (negate)
2305 break;
2306 else
2307 goto back_unibyte_ok;
2310 if (!fastmap[p[-1]])
2311 break;
2312 back_unibyte_ok:
2313 p--, pos--, pos_byte--;
2314 rarely_quit (pos);
2318 SET_PT_BOTH (pos, pos_byte);
2320 SAFE_FREE ();
2321 return make_number (PT - start_point);
2326 static Lisp_Object
2327 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2329 int c;
2330 unsigned char fastmap[0400];
2331 bool negate = 0;
2332 ptrdiff_t i, i_byte;
2333 bool multibyte;
2334 ptrdiff_t size_byte;
2335 unsigned char *str;
2337 CHECK_STRING (string);
2339 if (NILP (lim))
2340 XSETINT (lim, forwardp ? ZV : BEGV);
2341 else
2342 CHECK_NUMBER_COERCE_MARKER (lim);
2344 /* In any case, don't allow scan outside bounds of buffer. */
2345 if (XINT (lim) > ZV)
2346 XSETFASTINT (lim, ZV);
2347 if (XINT (lim) < BEGV)
2348 XSETFASTINT (lim, BEGV);
2350 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2351 return make_number (0);
2353 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2354 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2356 memset (fastmap, 0, sizeof fastmap);
2358 if (SBYTES (string) > SCHARS (string))
2359 /* As this is very rare case (syntax spec is ASCII only), don't
2360 consider efficiency. */
2361 string = string_make_unibyte (string);
2363 str = SDATA (string);
2364 size_byte = SBYTES (string);
2366 i_byte = 0;
2367 if (i_byte < size_byte
2368 && SREF (string, 0) == '^')
2370 negate = 1; i_byte++;
2373 /* Find the syntaxes specified and set their elements of fastmap. */
2375 while (i_byte < size_byte)
2377 c = str[i_byte++];
2378 fastmap[syntax_spec_code[c]] = 1;
2381 /* If ^ was the first character, complement the fastmap. */
2382 if (negate)
2383 for (i = 0; i < sizeof fastmap; i++)
2384 fastmap[i] ^= 1;
2387 ptrdiff_t start_point = PT;
2388 ptrdiff_t pos = PT;
2389 ptrdiff_t pos_byte = PT_BYTE;
2390 unsigned char *p, *endp, *stop;
2392 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2394 if (forwardp)
2396 while (true)
2398 p = BYTE_POS_ADDR (pos_byte);
2399 endp = XINT (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2400 stop = pos < GPT && GPT < XINT (lim) ? GPT_ADDR : endp;
2404 int nbytes;
2406 if (p >= stop)
2408 if (p >= endp)
2409 goto done;
2410 p = GAP_END_ADDR;
2411 stop = endp;
2413 if (multibyte)
2414 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2415 else
2416 c = *p, nbytes = 1;
2417 if (! fastmap[SYNTAX (c)])
2418 goto done;
2419 p += nbytes, pos++, pos_byte += nbytes;
2420 rarely_quit (pos);
2422 while (!parse_sexp_lookup_properties
2423 || pos < gl_state.e_property);
2425 update_syntax_table_forward (pos + gl_state.offset,
2426 false, gl_state.object);
2429 else
2431 p = BYTE_POS_ADDR (pos_byte);
2432 endp = CHAR_POS_ADDR (XINT (lim));
2433 stop = pos >= GPT && GPT > XINT (lim) ? GAP_END_ADDR : endp;
2435 if (multibyte)
2437 while (true)
2439 if (p <= stop)
2441 if (p <= endp)
2442 break;
2443 p = GPT_ADDR;
2444 stop = endp;
2446 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2448 unsigned char *prev_p = p;
2450 p--;
2451 while (stop <= p && ! CHAR_HEAD_P (*p));
2453 c = STRING_CHAR (p);
2454 if (! fastmap[SYNTAX (c)])
2455 break;
2456 pos--, pos_byte -= prev_p - p;
2457 rarely_quit (pos);
2460 else
2462 while (true)
2464 if (p <= stop)
2466 if (p <= endp)
2467 break;
2468 p = GPT_ADDR;
2469 stop = endp;
2471 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2472 if (! fastmap[SYNTAX (p[-1])])
2473 break;
2474 p--, pos--, pos_byte--;
2475 rarely_quit (pos);
2480 done:
2481 SET_PT_BOTH (pos, pos_byte);
2483 return make_number (PT - start_point);
2487 /* Return true if character C belongs to one of the ISO classes
2488 in the list ISO_CLASSES. Each class is represented by an
2489 integer which is its type according to re_wctype. */
2491 static bool
2492 in_classes (int c, Lisp_Object iso_classes)
2494 bool fits_class = 0;
2496 while (CONSP (iso_classes))
2498 Lisp_Object elt;
2499 elt = XCAR (iso_classes);
2500 iso_classes = XCDR (iso_classes);
2502 if (re_iswctype (c, XFASTINT (elt)))
2503 fits_class = 1;
2506 return fits_class;
2509 /* Jump over a comment, assuming we are at the beginning of one.
2510 FROM is the current position.
2511 FROM_BYTE is the bytepos corresponding to FROM.
2512 Do not move past STOP (a charpos).
2513 The comment over which we have to jump is of style STYLE
2514 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2515 NESTING should be positive to indicate the nesting at the beginning
2516 for nested comments and should be zero or negative else.
2517 ST_COMMENT_STYLE cannot be nested.
2518 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2519 (or 0 If the search cannot start in the middle of a two-character).
2521 If successful, return true and store the charpos of the comment's
2522 end into *CHARPOS_PTR and the corresponding bytepos into
2523 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2524 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2525 current nesting (as defined for state->incomment) in
2526 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2527 comment be a possible first character of a two character construct,
2528 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2529 we store Smax into *last_syntax_ptr.
2531 The comment end is the last character of the comment rather than the
2532 character just after the comment.
2534 Global syntax data is assumed to initially be valid for FROM and
2535 remains valid for forward search starting at the returned position. */
2537 static bool
2538 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2539 EMACS_INT nesting, int style, int prev_syntax,
2540 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2541 EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2543 unsigned short int quit_count = 0;
2544 int c, c1;
2545 enum syntaxcode code;
2546 int syntax, other_syntax;
2548 if (nesting <= 0) nesting = -1;
2550 /* Enter the loop in the middle so that we find
2551 a 2-char comment ender if we start in the middle of it. */
2552 syntax = prev_syntax;
2553 code = syntax & 0xff;
2554 if (syntax != 0 && from < stop) goto forw_incomment;
2556 while (1)
2558 if (from == stop)
2560 *incomment_ptr = nesting;
2561 *charpos_ptr = from;
2562 *bytepos_ptr = from_byte;
2563 *last_syntax_ptr =
2564 (code == Sescape || code == Scharquote
2565 || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2566 || (nesting > 0
2567 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2568 ? syntax : Smax ;
2569 return 0;
2571 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2572 syntax = SYNTAX_WITH_FLAGS (c);
2573 code = syntax & 0xff;
2574 if (code == Sendcomment
2575 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2576 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2577 (nesting > 0 && --nesting == 0) : nesting < 0)
2578 && !(Vcomment_end_can_be_escaped && char_quoted (from, from_byte)))
2579 /* We have encountered a comment end of the same style
2580 as the comment sequence which began this comment
2581 section. */
2582 break;
2583 if (code == Scomment_fence
2584 && style == ST_COMMENT_STYLE)
2585 /* We have encountered a comment end of the same style
2586 as the comment sequence which began this comment
2587 section. */
2588 break;
2589 if (nesting > 0
2590 && code == Scomment
2591 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2592 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2593 /* We have encountered a nested comment of the same style
2594 as the comment sequence which began this comment section. */
2595 nesting++;
2596 INC_BOTH (from, from_byte);
2597 UPDATE_SYNTAX_TABLE_FORWARD (from);
2599 forw_incomment:
2600 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2601 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2602 other_syntax = SYNTAX_WITH_FLAGS (c1),
2603 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2604 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2605 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2606 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2607 ? nesting > 0 : nesting < 0))
2609 syntax = Smax; /* So that "|#" (lisp) can not return
2610 the syntax of "#" in *last_syntax_ptr. */
2611 if (--nesting <= 0)
2612 /* We have encountered a comment end of the same style
2613 as the comment sequence which began this comment section. */
2614 break;
2615 else
2617 INC_BOTH (from, from_byte);
2618 UPDATE_SYNTAX_TABLE_FORWARD (from);
2621 if (nesting > 0
2622 && from < stop
2623 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2624 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2625 other_syntax = SYNTAX_WITH_FLAGS (c1),
2626 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2627 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2628 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2629 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2630 /* We have encountered a nested comment of the same style
2631 as the comment sequence which began this comment section. */
2633 syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2634 INC_BOTH (from, from_byte);
2635 UPDATE_SYNTAX_TABLE_FORWARD (from);
2636 nesting++;
2639 rarely_quit (++quit_count);
2641 *charpos_ptr = from;
2642 *bytepos_ptr = from_byte;
2643 *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2644 used up. */
2645 return 1;
2648 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2649 doc: /*
2650 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2651 Stop scanning if we find something other than a comment or whitespace.
2652 Set point to where scanning stops.
2653 If COUNT comments are found as expected, with nothing except whitespace
2654 between them, return t; otherwise return nil. */)
2655 (Lisp_Object count)
2657 ptrdiff_t from, from_byte, stop;
2658 int c, c1;
2659 enum syntaxcode code;
2660 int comstyle = 0; /* style of comment encountered */
2661 bool comnested = 0; /* whether the comment is nestable or not */
2662 bool found;
2663 EMACS_INT count1;
2664 ptrdiff_t out_charpos, out_bytepos;
2665 EMACS_INT dummy;
2666 int dummy2;
2667 unsigned short int quit_count = 0;
2669 CHECK_NUMBER (count);
2670 count1 = XINT (count);
2671 stop = count1 > 0 ? ZV : BEGV;
2673 from = PT;
2674 from_byte = PT_BYTE;
2676 SETUP_SYNTAX_TABLE (from, count1);
2677 while (count1 > 0)
2681 bool comstart_first;
2682 int syntax, other_syntax;
2684 if (from == stop)
2686 SET_PT_BOTH (from, from_byte);
2687 return Qnil;
2689 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2690 syntax = SYNTAX_WITH_FLAGS (c);
2691 code = SYNTAX (c);
2692 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2693 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2694 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2695 INC_BOTH (from, from_byte);
2696 UPDATE_SYNTAX_TABLE_FORWARD (from);
2697 if (from < stop && comstart_first
2698 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2699 other_syntax = SYNTAX_WITH_FLAGS (c1),
2700 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2702 /* We have encountered a comment start sequence and we
2703 are ignoring all text inside comments. We must record
2704 the comment style this sequence begins so that later,
2705 only a comment end of the same style actually ends
2706 the comment section. */
2707 code = Scomment;
2708 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2709 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2710 INC_BOTH (from, from_byte);
2711 UPDATE_SYNTAX_TABLE_FORWARD (from);
2713 rarely_quit (++quit_count);
2715 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2717 if (code == Scomment_fence)
2718 comstyle = ST_COMMENT_STYLE;
2719 else if (code != Scomment)
2721 DEC_BOTH (from, from_byte);
2722 SET_PT_BOTH (from, from_byte);
2723 return Qnil;
2725 /* We're at the start of a comment. */
2726 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2727 &out_charpos, &out_bytepos, &dummy, &dummy2);
2728 from = out_charpos; from_byte = out_bytepos;
2729 if (!found)
2731 SET_PT_BOTH (from, from_byte);
2732 return Qnil;
2734 INC_BOTH (from, from_byte);
2735 UPDATE_SYNTAX_TABLE_FORWARD (from);
2736 /* We have skipped one comment. */
2737 count1--;
2740 while (count1 < 0)
2742 while (true)
2744 if (from <= stop)
2746 SET_PT_BOTH (BEGV, BEGV_BYTE);
2747 return Qnil;
2750 DEC_BOTH (from, from_byte);
2751 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2752 bool quoted = char_quoted (from, from_byte);
2753 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2754 int syntax = SYNTAX_WITH_FLAGS (c);
2755 code = SYNTAX (c);
2756 comstyle = 0;
2757 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2758 if (code == Sendcomment)
2759 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2760 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2761 && prev_char_comend_first (from, from_byte)
2762 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2764 int other_syntax;
2765 /* We must record the comment style encountered so that
2766 later, we can match only the proper comment begin
2767 sequence of the same style. */
2768 DEC_BOTH (from, from_byte);
2769 code = Sendcomment;
2770 /* Calling char_quoted, above, set up global syntax position
2771 at the new value of FROM. */
2772 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2773 other_syntax = SYNTAX_WITH_FLAGS (c1);
2774 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2775 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2778 if (code == Scomment_fence)
2780 /* Skip until first preceding unquoted comment_fence. */
2781 bool fence_found = 0;
2782 ptrdiff_t ini = from, ini_byte = from_byte;
2784 while (1)
2786 DEC_BOTH (from, from_byte);
2787 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2788 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2789 if (SYNTAX (c) == Scomment_fence
2790 && !char_quoted (from, from_byte))
2792 fence_found = 1;
2793 break;
2795 else if (from == stop)
2796 break;
2797 rarely_quit (++quit_count);
2799 if (fence_found == 0)
2801 from = ini; /* Set point to ini + 1. */
2802 from_byte = ini_byte;
2803 goto leave;
2805 else
2806 /* We have skipped one comment. */
2807 break;
2809 else if (code == Sendcomment)
2811 found = back_comment (from, from_byte, stop, comnested, comstyle,
2812 &out_charpos, &out_bytepos);
2813 if (!found)
2815 if (c == '\n')
2816 /* This end-of-line is not an end-of-comment.
2817 Treat it like a whitespace.
2818 CC-mode (and maybe others) relies on this behavior. */
2820 else
2822 /* Failure: we should go back to the end of this
2823 not-quite-endcomment. */
2824 if (SYNTAX (c) != code)
2825 /* It was a two-char Sendcomment. */
2826 INC_BOTH (from, from_byte);
2827 goto leave;
2830 else
2832 /* We have skipped one comment. */
2833 from = out_charpos, from_byte = out_bytepos;
2834 break;
2837 else if (code != Swhitespace || quoted)
2839 leave:
2840 INC_BOTH (from, from_byte);
2841 SET_PT_BOTH (from, from_byte);
2842 return Qnil;
2845 rarely_quit (++quit_count);
2848 count1++;
2851 SET_PT_BOTH (from, from_byte);
2852 return Qt;
2855 /* Return syntax code of character C if C is an ASCII character
2856 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2858 static enum syntaxcode
2859 syntax_multibyte (int c, bool multibyte_symbol_p)
2861 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2864 static Lisp_Object
2865 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2867 Lisp_Object val;
2868 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2869 int c, c1;
2870 int stringterm;
2871 bool quoted;
2872 bool mathexit = 0;
2873 enum syntaxcode code;
2874 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2875 int comstyle = 0; /* Style of comment encountered. */
2876 bool comnested = 0; /* Whether the comment is nestable or not. */
2877 ptrdiff_t temp_pos;
2878 EMACS_INT last_good = from;
2879 bool found;
2880 ptrdiff_t from_byte;
2881 ptrdiff_t out_bytepos, out_charpos;
2882 EMACS_INT dummy;
2883 int dummy2;
2884 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2885 unsigned short int quit_count = 0;
2887 if (depth > 0) min_depth = 0;
2889 if (from > ZV) from = ZV;
2890 if (from < BEGV) from = BEGV;
2892 from_byte = CHAR_TO_BYTE (from);
2894 maybe_quit ();
2896 SETUP_SYNTAX_TABLE (from, count);
2897 while (count > 0)
2899 while (from < stop)
2901 rarely_quit (++quit_count);
2902 bool comstart_first, prefix;
2903 int syntax, other_syntax;
2904 UPDATE_SYNTAX_TABLE_FORWARD (from);
2905 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2906 syntax = SYNTAX_WITH_FLAGS (c);
2907 code = syntax_multibyte (c, multibyte_symbol_p);
2908 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2909 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2910 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2911 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2912 if (depth == min_depth)
2913 last_good = from;
2914 INC_BOTH (from, from_byte);
2915 UPDATE_SYNTAX_TABLE_FORWARD (from);
2916 if (from < stop && comstart_first
2917 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2918 other_syntax = SYNTAX_WITH_FLAGS (c),
2919 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2920 && parse_sexp_ignore_comments)
2922 /* We have encountered a comment start sequence and we
2923 are ignoring all text inside comments. We must record
2924 the comment style this sequence begins so that later,
2925 only a comment end of the same style actually ends
2926 the comment section. */
2927 code = Scomment;
2928 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2929 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2930 INC_BOTH (from, from_byte);
2931 UPDATE_SYNTAX_TABLE_FORWARD (from);
2934 if (prefix)
2935 continue;
2937 switch (code)
2939 case Sescape:
2940 case Scharquote:
2941 if (from == stop)
2942 goto lose;
2943 INC_BOTH (from, from_byte);
2944 /* Treat following character as a word constituent. */
2945 case Sword:
2946 case Ssymbol:
2947 if (depth || !sexpflag) break;
2948 /* This word counts as a sexp; return at end of it. */
2949 while (from < stop)
2951 UPDATE_SYNTAX_TABLE_FORWARD (from);
2953 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2954 switch (syntax_multibyte (c, multibyte_symbol_p))
2956 case Scharquote:
2957 case Sescape:
2958 INC_BOTH (from, from_byte);
2959 if (from == stop)
2960 goto lose;
2961 break;
2962 case Sword:
2963 case Ssymbol:
2964 case Squote:
2965 break;
2966 default:
2967 goto done;
2969 INC_BOTH (from, from_byte);
2970 rarely_quit (++quit_count);
2972 goto done;
2974 case Scomment_fence:
2975 comstyle = ST_COMMENT_STYLE;
2976 /* FALLTHROUGH */
2977 case Scomment:
2978 if (!parse_sexp_ignore_comments) break;
2979 UPDATE_SYNTAX_TABLE_FORWARD (from);
2980 found = forw_comment (from, from_byte, stop,
2981 comnested, comstyle, 0,
2982 &out_charpos, &out_bytepos, &dummy,
2983 &dummy2);
2984 from = out_charpos, from_byte = out_bytepos;
2985 if (!found)
2987 if (depth == 0)
2988 goto done;
2989 goto lose;
2991 INC_BOTH (from, from_byte);
2992 UPDATE_SYNTAX_TABLE_FORWARD (from);
2993 break;
2995 case Smath:
2996 if (!sexpflag)
2997 break;
2998 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
3000 INC_BOTH (from, from_byte);
3002 if (mathexit)
3004 mathexit = 0;
3005 goto close1;
3007 mathexit = 1;
3009 case Sopen:
3010 if (!++depth) goto done;
3011 break;
3013 case Sclose:
3014 close1:
3015 if (!--depth) goto done;
3016 if (depth < min_depth)
3017 xsignal3 (Qscan_error,
3018 build_string ("Containing expression ends prematurely"),
3019 make_number (last_good), make_number (from));
3020 break;
3022 case Sstring:
3023 case Sstring_fence:
3024 temp_pos = dec_bytepos (from_byte);
3025 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
3026 while (1)
3028 enum syntaxcode c_code;
3029 if (from >= stop)
3030 goto lose;
3031 UPDATE_SYNTAX_TABLE_FORWARD (from);
3032 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3033 c_code = syntax_multibyte (c, multibyte_symbol_p);
3034 if (code == Sstring
3035 ? c == stringterm && c_code == Sstring
3036 : c_code == Sstring_fence)
3037 break;
3039 if (c_code == Scharquote || c_code == Sescape)
3040 INC_BOTH (from, from_byte);
3041 INC_BOTH (from, from_byte);
3042 rarely_quit (++quit_count);
3044 INC_BOTH (from, from_byte);
3045 if (!depth && sexpflag) goto done;
3046 break;
3047 default:
3048 /* Ignore whitespace, punctuation, quote, endcomment. */
3049 break;
3053 /* Reached end of buffer. Error if within object, return nil if between */
3054 if (depth)
3055 goto lose;
3057 return Qnil;
3059 /* End of object reached */
3060 done:
3061 count--;
3064 while (count < 0)
3066 while (from > stop)
3068 rarely_quit (++quit_count);
3069 DEC_BOTH (from, from_byte);
3070 UPDATE_SYNTAX_TABLE_BACKWARD (from);
3071 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3072 int syntax = SYNTAX_WITH_FLAGS (c);
3073 code = syntax_multibyte (c, multibyte_symbol_p);
3074 if (depth == min_depth)
3075 last_good = from;
3076 comstyle = 0;
3077 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
3078 if (code == Sendcomment)
3079 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
3080 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
3081 && prev_char_comend_first (from, from_byte)
3082 && parse_sexp_ignore_comments)
3084 /* We must record the comment style encountered so that
3085 later, we can match only the proper comment begin
3086 sequence of the same style. */
3087 int c2, other_syntax;
3088 DEC_BOTH (from, from_byte);
3089 UPDATE_SYNTAX_TABLE_BACKWARD (from);
3090 code = Sendcomment;
3091 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3092 other_syntax = SYNTAX_WITH_FLAGS (c2);
3093 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
3094 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
3097 /* Quoting turns anything except a comment-ender
3098 into a word character. Note that this cannot be true
3099 if we decremented FROM in the if-statement above. */
3100 if (code != Sendcomment && char_quoted (from, from_byte))
3102 DEC_BOTH (from, from_byte);
3103 code = Sword;
3105 else if (SYNTAX_FLAGS_PREFIX (syntax))
3106 continue;
3108 switch (code)
3110 case Sword:
3111 case Ssymbol:
3112 case Sescape:
3113 case Scharquote:
3114 if (depth || !sexpflag) break;
3115 /* This word counts as a sexp; count object finished
3116 after passing it. */
3117 while (from > stop)
3119 temp_pos = from_byte;
3120 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
3121 DEC_POS (temp_pos);
3122 else
3123 temp_pos--;
3124 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
3125 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
3126 /* Don't allow comment-end to be quoted. */
3127 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
3128 goto done2;
3129 quoted = char_quoted (from - 1, temp_pos);
3130 if (quoted)
3132 DEC_BOTH (from, from_byte);
3133 temp_pos = dec_bytepos (temp_pos);
3134 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
3136 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
3137 if (! quoted)
3138 switch (syntax_multibyte (c1, multibyte_symbol_p))
3140 case Sword: case Ssymbol: case Squote: break;
3141 default: goto done2;
3143 DEC_BOTH (from, from_byte);
3144 rarely_quit (++quit_count);
3146 goto done2;
3148 case Smath:
3149 if (!sexpflag)
3150 break;
3151 if (from > BEGV)
3153 temp_pos = dec_bytepos (from_byte);
3154 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
3155 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
3156 DEC_BOTH (from, from_byte);
3158 if (mathexit)
3160 mathexit = 0;
3161 goto open2;
3163 mathexit = 1;
3165 case Sclose:
3166 if (!++depth) goto done2;
3167 break;
3169 case Sopen:
3170 open2:
3171 if (!--depth) goto done2;
3172 if (depth < min_depth)
3173 xsignal3 (Qscan_error,
3174 build_string ("Containing expression ends prematurely"),
3175 make_number (last_good), make_number (from));
3176 break;
3178 case Sendcomment:
3179 if (!parse_sexp_ignore_comments)
3180 break;
3181 found = back_comment (from, from_byte, stop, comnested, comstyle,
3182 &out_charpos, &out_bytepos);
3183 /* FIXME: if !found, it really wasn't a comment-end.
3184 For single-char Sendcomment, we can't do much about it apart
3185 from skipping the char.
3186 For 2-char endcomments, we could try again, taking both
3187 chars as separate entities, but it's a lot of trouble
3188 for very little gain, so we don't bother either. -sm */
3189 if (found)
3190 from = out_charpos, from_byte = out_bytepos;
3191 break;
3193 case Scomment_fence:
3194 case Sstring_fence:
3195 while (1)
3197 if (from == stop)
3198 goto lose;
3199 DEC_BOTH (from, from_byte);
3200 UPDATE_SYNTAX_TABLE_BACKWARD (from);
3201 if (!char_quoted (from, from_byte))
3203 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3204 if (syntax_multibyte (c, multibyte_symbol_p) == code)
3205 break;
3207 rarely_quit (++quit_count);
3209 if (code == Sstring_fence && !depth && sexpflag) goto done2;
3210 break;
3212 case Sstring:
3213 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3214 while (true)
3216 if (from == stop)
3217 goto lose;
3218 DEC_BOTH (from, from_byte);
3219 UPDATE_SYNTAX_TABLE_BACKWARD (from);
3220 if (!char_quoted (from, from_byte))
3222 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3223 if (c == stringterm
3224 && (syntax_multibyte (c, multibyte_symbol_p)
3225 == Sstring))
3226 break;
3228 rarely_quit (++quit_count);
3230 if (!depth && sexpflag) goto done2;
3231 break;
3232 default:
3233 /* Ignore whitespace, punctuation, quote, endcomment. */
3234 break;
3238 /* Reached start of buffer. Error if within object, return nil if between */
3239 if (depth)
3240 goto lose;
3242 return Qnil;
3244 done2:
3245 count++;
3249 XSETFASTINT (val, from);
3250 return val;
3252 lose:
3253 xsignal3 (Qscan_error,
3254 build_string ("Unbalanced parentheses"),
3255 make_number (last_good), make_number (from));
3258 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3259 doc: /* Scan from character number FROM by COUNT lists.
3260 Scan forward if COUNT is positive, backward if COUNT is negative.
3261 Return the character number of the position thus found.
3263 A \"list", in this context, refers to a balanced parenthetical
3264 grouping, as determined by the syntax table.
3266 If DEPTH is nonzero, treat that as the nesting depth of the starting
3267 point (i.e. the starting point is DEPTH parentheses deep). This
3268 function scans over parentheses until the depth goes to zero COUNT
3269 times. Hence, positive DEPTH moves out that number of levels of
3270 parentheses, while negative DEPTH moves to a deeper level.
3272 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3274 If we reach the beginning or end of the accessible part of the buffer
3275 before we have scanned over COUNT lists, return nil if the depth at
3276 that point is zero, and signal a error if the depth is nonzero. */)
3277 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3279 CHECK_NUMBER (from);
3280 CHECK_NUMBER (count);
3281 CHECK_NUMBER (depth);
3283 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
3286 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3287 doc: /* Scan from character number FROM by COUNT balanced expressions.
3288 If COUNT is negative, scan backwards.
3289 Returns the character number of the position thus found.
3291 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3293 If the beginning or end of (the accessible part of) the buffer is reached
3294 in the middle of a parenthetical grouping, an error is signaled.
3295 If the beginning or end is reached between groupings
3296 but before count is used up, nil is returned. */)
3297 (Lisp_Object from, Lisp_Object count)
3299 CHECK_NUMBER (from);
3300 CHECK_NUMBER (count);
3302 return scan_lists (XINT (from), XINT (count), 0, 1);
3305 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3306 0, 0, 0,
3307 doc: /* Move point backward over any number of chars with prefix syntax.
3308 This includes chars with expression prefix syntax class (\\=') and those with
3309 the prefix syntax flag (p). */)
3310 (void)
3312 ptrdiff_t beg = BEGV;
3313 ptrdiff_t opoint = PT;
3314 ptrdiff_t opoint_byte = PT_BYTE;
3315 ptrdiff_t pos = PT;
3316 ptrdiff_t pos_byte = PT_BYTE;
3317 int c;
3319 if (pos <= beg)
3321 SET_PT_BOTH (opoint, opoint_byte);
3323 return Qnil;
3326 SETUP_SYNTAX_TABLE (pos, -1);
3328 DEC_BOTH (pos, pos_byte);
3330 while (!char_quoted (pos, pos_byte)
3331 /* Previous statement updates syntax table. */
3332 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3333 || syntax_prefix_flag_p (c)))
3335 opoint = pos;
3336 opoint_byte = pos_byte;
3338 if (pos <= beg)
3339 break;
3340 DEC_BOTH (pos, pos_byte);
3341 rarely_quit (pos);
3344 SET_PT_BOTH (opoint, opoint_byte);
3346 return Qnil;
3350 /* If the character at FROM_BYTE is the second part of a 2-character
3351 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3352 true. */
3353 static bool
3354 in_2char_comment_start (struct lisp_parse_state *state,
3355 int prev_from_syntax,
3356 ptrdiff_t prev_from,
3357 ptrdiff_t from_byte)
3359 int c1, syntax;
3360 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3361 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
3362 syntax = SYNTAX_WITH_FLAGS (c1),
3363 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3365 /* Record the comment style we have entered so that only
3366 the comment-end sequence of the same style actually
3367 terminates the comment section. */
3368 state->comstyle
3369 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3370 bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3371 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3372 state->incomment = comnested ? 1 : -1;
3373 state->comstr_start = prev_from;
3374 return true;
3376 return false;
3379 /* Parse forward from FROM / FROM_BYTE to END,
3380 assuming that FROM has state STATE,
3381 and return a description of the state of the parse at END.
3382 If STOPBEFORE, stop at the start of an atom.
3383 If COMMENTSTOP is 1, stop at the start of a comment.
3384 If COMMENTSTOP is -1, stop at the start or end of a comment,
3385 after the beginning of a string, or after the end of a string. */
3387 static void
3388 scan_sexps_forward (struct lisp_parse_state *state,
3389 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3390 EMACS_INT targetdepth, bool stopbefore,
3391 int commentstop)
3393 enum syntaxcode code;
3394 struct level { ptrdiff_t last, prev; };
3395 struct level levelstart[100];
3396 struct level *curlevel = levelstart;
3397 struct level *endlevel = levelstart + 100;
3398 EMACS_INT depth; /* Paren depth of current scanning location.
3399 level - levelstart equals this except
3400 when the depth becomes negative. */
3401 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3402 bool start_quoted = 0; /* True means starting after a char quote. */
3403 Lisp_Object tem;
3404 ptrdiff_t prev_from; /* Keep one character before FROM. */
3405 ptrdiff_t prev_from_byte;
3406 int prev_from_syntax, prev_prev_from_syntax;
3407 bool boundary_stop = commentstop == -1;
3408 bool nofence;
3409 bool found;
3410 ptrdiff_t out_bytepos, out_charpos;
3411 int temp;
3412 unsigned short int quit_count = 0;
3414 prev_from = from;
3415 prev_from_byte = from_byte;
3416 if (from != BEGV)
3417 DEC_BOTH (prev_from, prev_from_byte);
3419 /* Use this macro instead of `from++'. */
3420 #define INC_FROM \
3421 do { prev_from = from; \
3422 prev_from_byte = from_byte; \
3423 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3424 prev_prev_from_syntax = prev_from_syntax; \
3425 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3426 INC_BOTH (from, from_byte); \
3427 if (from < end) \
3428 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3429 } while (0)
3431 maybe_quit ();
3433 depth = state->depth;
3434 start_quoted = state->quoted;
3435 prev_prev_from_syntax = Smax;
3436 prev_from_syntax = state->prev_syntax;
3438 tem = state->levelstarts;
3439 while (!NILP (tem)) /* >= second enclosing sexps. */
3441 Lisp_Object temhd = Fcar (tem);
3442 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3443 curlevel->last = XINT (temhd);
3444 if (++curlevel == endlevel)
3445 curlevel--; /* error ("Nesting too deep for parser"); */
3446 curlevel->prev = -1;
3447 curlevel->last = -1;
3448 tem = Fcdr (tem);
3450 curlevel->prev = -1;
3451 curlevel->last = -1;
3453 state->quoted = 0;
3454 mindepth = depth;
3456 SETUP_SYNTAX_TABLE (from, 1);
3458 /* Enter the loop at a place appropriate for initial state. */
3460 if (state->incomment)
3461 goto startincomment;
3462 if (state->instring >= 0)
3464 nofence = state->instring != ST_STRING_STYLE;
3465 if (start_quoted)
3466 goto startquotedinstring;
3467 goto startinstring;
3469 else if (start_quoted)
3470 goto startquoted;
3471 else if ((from < end)
3472 && (in_2char_comment_start (state, prev_from_syntax,
3473 prev_from, from_byte)))
3475 INC_FROM;
3476 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3477 goto atcomment;
3480 while (from < end)
3482 rarely_quit (++quit_count);
3483 INC_FROM;
3485 if ((from < end)
3486 && (in_2char_comment_start (state, prev_from_syntax,
3487 prev_from, from_byte)))
3489 INC_FROM;
3490 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3491 goto atcomment;
3494 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3495 continue;
3496 code = prev_from_syntax & 0xff;
3497 switch (code)
3499 case Sescape:
3500 case Scharquote:
3501 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3502 curlevel->last = prev_from;
3503 startquoted:
3504 if (from == end) goto endquoted;
3505 INC_FROM;
3506 goto symstarted;
3507 /* treat following character as a word constituent */
3508 case Sword:
3509 case Ssymbol:
3510 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3511 curlevel->last = prev_from;
3512 symstarted:
3513 while (from < end)
3515 if (in_2char_comment_start (state, prev_from_syntax,
3516 prev_from, from_byte))
3518 INC_FROM;
3519 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3520 goto atcomment;
3523 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3524 switch (SYNTAX (symchar))
3526 case Scharquote:
3527 case Sescape:
3528 INC_FROM;
3529 if (from == end) goto endquoted;
3530 break;
3531 case Sword:
3532 case Ssymbol:
3533 case Squote:
3534 break;
3535 default:
3536 goto symdone;
3538 INC_FROM;
3539 rarely_quit (++quit_count);
3541 symdone:
3542 curlevel->prev = curlevel->last;
3543 break;
3545 case Scomment_fence:
3546 /* Record the comment style we have entered so that only
3547 the comment-end sequence of the same style actually
3548 terminates the comment section. */
3549 state->comstyle = ST_COMMENT_STYLE;
3550 state->incomment = -1;
3551 state->comstr_start = prev_from;
3552 goto atcomment;
3553 case Scomment:
3554 state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3555 state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3556 1 : -1);
3557 state->comstr_start = prev_from;
3558 atcomment:
3559 if (commentstop || boundary_stop) goto done;
3560 startincomment:
3561 /* The (from == BEGV) test was to enter the loop in the middle so
3562 that we find a 2-char comment ender even if we start in the
3563 middle of it. We don't want to do that if we're just at the
3564 beginning of the comment (think of (*) ... (*)). */
3565 found = forw_comment (from, from_byte, end,
3566 state->incomment, state->comstyle,
3567 from == BEGV ? 0 : prev_from_syntax,
3568 &out_charpos, &out_bytepos, &state->incomment,
3569 &prev_from_syntax);
3570 from = out_charpos; from_byte = out_bytepos;
3571 /* Beware! prev_from and friends (except prev_from_syntax)
3572 are invalid now. Luckily, the `done' doesn't use them
3573 and the INC_FROM sets them to a sane value without
3574 looking at them. */
3575 if (!found) goto done;
3576 INC_FROM;
3577 state->incomment = 0;
3578 state->comstyle = 0; /* reset the comment style */
3579 prev_from_syntax = Smax; /* For the comment closer */
3580 if (boundary_stop) goto done;
3581 break;
3583 case Sopen:
3584 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3585 depth++;
3586 /* curlevel++->last ran into compiler bug on Apollo */
3587 curlevel->last = prev_from;
3588 if (++curlevel == endlevel)
3589 curlevel--; /* error ("Nesting too deep for parser"); */
3590 curlevel->prev = -1;
3591 curlevel->last = -1;
3592 if (targetdepth == depth) goto done;
3593 break;
3595 case Sclose:
3596 depth--;
3597 if (depth < mindepth)
3598 mindepth = depth;
3599 if (curlevel != levelstart)
3600 curlevel--;
3601 curlevel->prev = curlevel->last;
3602 if (targetdepth == depth) goto done;
3603 break;
3605 case Sstring:
3606 case Sstring_fence:
3607 state->comstr_start = from - 1;
3608 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3609 curlevel->last = prev_from;
3610 state->instring = (code == Sstring
3611 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3612 : ST_STRING_STYLE);
3613 if (boundary_stop) goto done;
3614 startinstring:
3616 nofence = state->instring != ST_STRING_STYLE;
3618 while (1)
3620 int c;
3621 enum syntaxcode c_code;
3623 if (from >= end) goto done;
3624 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3625 c_code = SYNTAX (c);
3627 /* Check C_CODE here so that if the char has
3628 a syntax-table property which says it is NOT
3629 a string character, it does not end the string. */
3630 if (nofence && c == state->instring && c_code == Sstring)
3631 break;
3633 switch (c_code)
3635 case Sstring_fence:
3636 if (!nofence) goto string_end;
3637 break;
3639 case Scharquote:
3640 case Sescape:
3641 INC_FROM;
3642 startquotedinstring:
3643 if (from >= end) goto endquoted;
3644 break;
3646 default:
3647 break;
3649 INC_FROM;
3650 rarely_quit (++quit_count);
3653 string_end:
3654 state->instring = -1;
3655 curlevel->prev = curlevel->last;
3656 INC_FROM;
3657 if (boundary_stop) goto done;
3658 break;
3660 case Smath:
3661 /* FIXME: We should do something with it. */
3662 break;
3663 default:
3664 /* Ignore whitespace, punctuation, quote, endcomment. */
3665 break;
3668 goto done;
3670 stop: /* Here if stopping before start of sexp. */
3671 from = prev_from; /* We have just fetched the char that starts it; */
3672 from_byte = prev_from_byte;
3673 prev_from_syntax = prev_prev_from_syntax;
3674 goto done; /* but return the position before it. */
3676 endquoted:
3677 state->quoted = 1;
3678 done:
3679 state->depth = depth;
3680 state->mindepth = mindepth;
3681 state->thislevelstart = curlevel->prev;
3682 state->prevlevelstart
3683 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3684 state->location = from;
3685 state->location_byte = from_byte;
3686 state->levelstarts = Qnil;
3687 while (curlevel > levelstart)
3688 state->levelstarts = Fcons (make_number ((--curlevel)->last),
3689 state->levelstarts);
3690 state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3691 || state->quoted) ? prev_from_syntax : Smax;
3694 /* Convert a (lisp) parse state to the internal form used in
3695 scan_sexps_forward. */
3696 static void
3697 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3699 Lisp_Object tem;
3701 if (NILP (external))
3703 state->depth = 0;
3704 state->instring = -1;
3705 state->incomment = 0;
3706 state->quoted = 0;
3707 state->comstyle = 0; /* comment style a by default. */
3708 state->comstr_start = -1; /* no comment/string seen. */
3709 state->levelstarts = Qnil;
3710 state->prev_syntax = Smax;
3712 else
3714 tem = Fcar (external);
3715 if (!NILP (tem))
3716 state->depth = XINT (tem);
3717 else
3718 state->depth = 0;
3720 external = Fcdr (external);
3721 external = Fcdr (external);
3722 external = Fcdr (external);
3723 tem = Fcar (external);
3724 /* Check whether we are inside string_fence-style string: */
3725 state->instring = (!NILP (tem)
3726 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3727 : -1);
3729 external = Fcdr (external);
3730 tem = Fcar (external);
3731 state->incomment = (!NILP (tem)
3732 ? (INTEGERP (tem) ? XINT (tem) : -1)
3733 : 0);
3735 external = Fcdr (external);
3736 tem = Fcar (external);
3737 state->quoted = !NILP (tem);
3739 /* if the eighth element of the list is nil, we are in comment
3740 style a. If it is non-nil, we are in comment style b */
3741 external = Fcdr (external);
3742 external = Fcdr (external);
3743 tem = Fcar (external);
3744 state->comstyle = (NILP (tem)
3746 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3747 ? XINT (tem)
3748 : ST_COMMENT_STYLE));
3750 external = Fcdr (external);
3751 tem = Fcar (external);
3752 state->comstr_start =
3753 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3754 external = Fcdr (external);
3755 tem = Fcar (external);
3756 state->levelstarts = tem;
3758 external = Fcdr (external);
3759 tem = Fcar (external);
3760 state->prev_syntax = NILP (tem) ? Smax : XINT (tem);
3764 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3765 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3766 Parsing stops at TO or when certain criteria are met;
3767 point is set to where parsing stops.
3768 If fifth arg OLDSTATE is omitted or nil,
3769 parsing assumes that FROM is the beginning of a function.
3771 Value is a list of elements describing final state of parsing:
3772 0. depth in parens.
3773 1. character address of start of innermost containing list; nil if none.
3774 2. character address of start of last complete sexp terminated.
3775 3. non-nil if inside a string.
3776 (it is the character that will terminate the string,
3777 or t if the string should be terminated by a generic string delimiter.)
3778 4. nil if outside a comment, t if inside a non-nestable comment,
3779 else an integer (the current comment nesting).
3780 5. t if following a quote character.
3781 6. the minimum paren-depth encountered during this scan.
3782 7. style of comment, if any.
3783 8. character address of start of comment or string; nil if not in one.
3784 9. List of positions of currently open parens, outermost first.
3785 10. When the last position scanned holds the first character of a
3786 (potential) two character construct, the syntax of that position,
3787 otherwise nil. That construct can be a two character comment
3788 delimiter or an Escaped or Char-quoted character.
3789 11..... Possible further internal information used by `parse-partial-sexp'.
3791 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3792 in parentheses becomes equal to TARGETDEPTH.
3793 Fourth arg STOPBEFORE non-nil means stop when we come to
3794 any character that starts a sexp.
3795 Fifth arg OLDSTATE is a list like what this function returns.
3796 It is used to initialize the state of the parse. Elements number 1, 2, 6
3797 are ignored.
3798 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3799 If it is the symbol `syntax-table', stop after the start of a comment or a
3800 string, or after end of a comment or a string. */)
3801 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3802 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3804 struct lisp_parse_state state;
3805 EMACS_INT target;
3807 if (!NILP (targetdepth))
3809 CHECK_NUMBER (targetdepth);
3810 target = XINT (targetdepth);
3812 else
3813 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth. */
3815 validate_region (&from, &to);
3816 internalize_parse_state (oldstate, &state);
3817 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3818 XINT (to),
3819 target, !NILP (stopbefore),
3820 (NILP (commentstop)
3821 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3823 SET_PT_BOTH (state.location, state.location_byte);
3825 return
3826 Fcons (make_number (state.depth),
3827 Fcons (state.prevlevelstart < 0
3828 ? Qnil : make_number (state.prevlevelstart),
3829 Fcons (state.thislevelstart < 0
3830 ? Qnil : make_number (state.thislevelstart),
3831 Fcons (state.instring >= 0
3832 ? (state.instring == ST_STRING_STYLE
3833 ? Qt : make_number (state.instring)) : Qnil,
3834 Fcons (state.incomment < 0 ? Qt :
3835 (state.incomment == 0 ? Qnil :
3836 make_number (state.incomment)),
3837 Fcons (state.quoted ? Qt : Qnil,
3838 Fcons (make_number (state.mindepth),
3839 Fcons ((state.comstyle
3840 ? (state.comstyle == ST_COMMENT_STYLE
3841 ? Qsyntax_table
3842 : make_number (state.comstyle))
3843 : Qnil),
3844 Fcons (((state.incomment
3845 || (state.instring >= 0))
3846 ? make_number (state.comstr_start)
3847 : Qnil),
3848 Fcons (state.levelstarts,
3849 Fcons (state.prev_syntax == Smax
3850 ? Qnil
3851 : make_number (state.prev_syntax),
3852 Qnil)))))))))));
3855 void
3856 init_syntax_once (void)
3858 register int i, c;
3859 Lisp_Object temp;
3861 /* This has to be done here, before we call Fmake_char_table. */
3862 DEFSYM (Qsyntax_table, "syntax-table");
3863 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (2));
3865 /* Create objects which can be shared among syntax tables. */
3866 Vsyntax_code_object = make_uninit_vector (Smax);
3867 for (i = 0; i < Smax; i++)
3868 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3870 /* Now we are ready to set up this property, so we can
3871 create syntax tables. */
3872 /* Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0)); */
3874 temp = AREF (Vsyntax_code_object, Swhitespace);
3876 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3878 /* Control characters should not be whitespace. */
3879 temp = AREF (Vsyntax_code_object, Spunct);
3880 for (i = 0; i <= ' ' - 1; i++)
3881 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3882 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3884 /* Except that a few really are whitespace. */
3885 temp = AREF (Vsyntax_code_object, Swhitespace);
3886 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3887 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3888 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3889 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3890 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3892 temp = AREF (Vsyntax_code_object, Sword);
3893 for (i = 'a'; i <= 'z'; i++)
3894 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3895 for (i = 'A'; i <= 'Z'; i++)
3896 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3897 for (i = '0'; i <= '9'; i++)
3898 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3900 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3901 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3903 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3904 Fcons (make_number (Sopen), make_number (')')));
3905 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3906 Fcons (make_number (Sclose), make_number ('(')));
3907 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3908 Fcons (make_number (Sopen), make_number (']')));
3909 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3910 Fcons (make_number (Sclose), make_number ('[')));
3911 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3912 Fcons (make_number (Sopen), make_number ('}')));
3913 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3914 Fcons (make_number (Sclose), make_number ('{')));
3915 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3916 Fcons (make_number (Sstring), Qnil));
3917 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3918 Fcons (make_number (Sescape), Qnil));
3920 temp = AREF (Vsyntax_code_object, Ssymbol);
3921 for (i = 0; i < 10; i++)
3923 c = "_-+*/&|<>="[i];
3924 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3927 temp = AREF (Vsyntax_code_object, Spunct);
3928 for (i = 0; i < 12; i++)
3930 c = ".,;:?!#@~^'`"[i];
3931 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3934 /* All multibyte characters have syntax `word' by default. */
3935 temp = AREF (Vsyntax_code_object, Sword);
3936 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3939 void
3940 syms_of_syntax (void)
3942 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3944 staticpro (&Vsyntax_code_object);
3946 staticpro (&gl_state.object);
3947 staticpro (&gl_state.global_code);
3948 staticpro (&gl_state.current_syntax_table);
3949 staticpro (&gl_state.old_prop);
3951 /* Defined in regex.c. */
3952 staticpro (&re_match_object);
3954 DEFSYM (Qscan_error, "scan-error");
3955 Fput (Qscan_error, Qerror_conditions,
3956 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3957 Fput (Qscan_error, Qerror_message,
3958 build_pure_c_string ("Scan error"));
3960 DEFSYM (Qliteral_cache, "literal-cache");
3961 DEFVAR_LISP ("literal-cache-values", Vliteral_cache_values,
3962 doc: /* A list of values which the text property `literal-cache' can assume.
3963 This is to ensure that any values which are `equal' are also `eq', as required by the text
3964 property functions. The list starts off empty, and any time a new value is needed, it is
3965 pushed onto the list. The second time a value is needed, it is found by `member', and the
3966 canonical equivalent used. */);
3967 Vliteral_cache_values = Qnil;
3969 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3970 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3972 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3973 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3974 Otherwise, that text property is simply ignored.
3975 See the info node `(elisp)Syntax Properties' for a description of the
3976 `syntax-table' property. */);
3978 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3979 doc: /* Position up to which syntax-table properties have been set. */);
3980 syntax_propertize__done = -1;
3981 DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3982 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3984 words_include_escapes = 0;
3985 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3986 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3988 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3989 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3990 multibyte_syntax_as_symbol = 0;
3992 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3993 open_paren_in_column_0_is_defun_start,
3994 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3995 open_paren_in_column_0_is_defun_start = 1;
3998 DEFVAR_LISP ("find-word-boundary-function-table",
3999 Vfind_word_boundary_function_table,
4000 doc: /*
4001 Char table of functions to search for the word boundary.
4002 Each function is called with two arguments; POS and LIMIT.
4003 POS and LIMIT are character positions in the current buffer.
4005 If POS is less than LIMIT, POS is at the first character of a word,
4006 and the return value of a function should be a position after the
4007 last character of that word.
4009 If POS is not less than LIMIT, POS is at the last character of a word,
4010 and the return value of a function should be a position at the first
4011 character of that word.
4013 In both cases, LIMIT bounds the search. */);
4014 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
4016 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped,
4017 doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
4018 Vcomment_end_can_be_escaped = 0;
4019 DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
4020 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
4022 defsubr (&Strim_literal_cache);
4023 defsubr (&Sleast_literal_difference_between_syntax_tables);
4024 defsubr (&Ssyntax_tables_literally_different_p);
4025 defsubr (&Ssyntax_table_p);
4026 defsubr (&Ssyntax_table);
4027 defsubr (&Sstandard_syntax_table);
4028 defsubr (&Scopy_syntax_table);
4029 defsubr (&Sset_syntax_table);
4030 defsubr (&Schar_syntax);
4031 defsubr (&Smatching_paren);
4032 defsubr (&Sstring_to_syntax);
4033 defsubr (&Smodify_syntax_entry);
4034 defsubr (&Sinternal_describe_syntax_value);
4036 defsubr (&Sforward_word);
4038 defsubr (&Sskip_chars_forward);
4039 defsubr (&Sskip_chars_backward);
4040 defsubr (&Sskip_syntax_forward);
4041 defsubr (&Sskip_syntax_backward);
4043 defsubr (&Sforward_comment);
4044 defsubr (&Sscan_lists);
4045 defsubr (&Sscan_sexps);
4046 defsubr (&Sbackward_prefix_chars);
4047 defsubr (&Sparse_partial_sexp);