* src/syntax.c (char-syntax): Warn about ignoring text properties (Bug#22765).
[emacs.git] / src / syntax.c
blobe6a21e5433eec6ab085832de7190c9646bc017d7
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2018 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "buffer.h"
26 #include "regex.h"
27 #include "syntax.h"
28 #include "intervals.h"
29 #include "category.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
56 the chars. */
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
61 static bool
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
64 return (flags >> 16) & 1;
66 static bool
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
69 return (flags >> 17) & 1;
71 static bool
72 SYNTAX_FLAGS_COMEND_FIRST (int flags)
74 return (flags >> 18) & 1;
76 static bool
77 SYNTAX_FLAGS_COMEND_SECOND (int flags)
79 return (flags >> 19) & 1;
81 static bool
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
84 return (flags & 0x50000) != 0;
86 static bool
87 SYNTAX_FLAGS_PREFIX (int flags)
89 return (flags >> 20) & 1;
91 static bool
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
94 return (flags >> 21) & 1;
96 static bool
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
99 return (flags >> 23) & 1;
101 static int
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
104 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
106 static bool
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
109 return (flags >> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
114 static int
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
122 /* Extract a particular flag for a given character. */
124 static bool
125 SYNTAX_COMEND_FIRST (int c)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
134 enum
136 ST_COMMENT_STYLE = 256 + 1,
137 ST_STRING_STYLE = 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth; /* Depth at end of parsing. */
145 int instring; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
147 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart;
154 ptrdiff_t location; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte; /* Corresponding byte position. */
156 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos;
174 static ptrdiff_t find_start_value;
175 static ptrdiff_t find_start_value_byte;
176 static struct buffer *find_start_buffer;
177 static ptrdiff_t find_start_begv;
178 static EMACS_INT find_start_modiff;
181 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
182 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
183 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
184 static void scan_sexps_forward (struct lisp_parse_state *,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
186 bool, int);
187 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
188 static bool in_classes (int, Lisp_Object);
189 static void parse_sexp_propertize (ptrdiff_t charpos);
191 /* This setter is used only in this file, so it can be private. */
192 static void
193 bset_syntax_table (struct buffer *b, Lisp_Object val)
195 b->syntax_table_ = val;
198 /* Whether the syntax of the character C has the prefix flag set. */
199 bool
200 syntax_prefix_flag_p (int c)
202 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
205 struct gl_state_s gl_state; /* Global state of syntax parser. */
207 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
208 to scan to property-change. */
210 /* Set the syntax entry VAL for char C in table TABLE. */
212 static void
213 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
215 CHAR_TABLE_SET (table, c, val);
218 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
219 RANGE is a cons (FROM . TO) specifying the range of characters. */
221 static void
222 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
223 Lisp_Object val)
225 Fset_char_table_range (table, range, val);
228 /* Extract the information from the entry for character C
229 in the current syntax table. */
231 static Lisp_Object
232 SYNTAX_MATCH (int c)
234 Lisp_Object ent = SYNTAX_ENTRY (c);
235 return CONSP (ent) ? XCDR (ent) : Qnil;
238 /* This should be called with FROM at the start of forward
239 search, or after the last position of the backward search. It
240 makes sure that the first char is picked up with correct table, so
241 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
242 call.
243 Sign of COUNT gives the direction of the search.
246 static void
247 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
249 SETUP_BUFFER_SYNTAX_TABLE ();
250 gl_state.b_property = BEGV;
251 gl_state.e_property = ZV + 1;
252 gl_state.object = Qnil;
253 gl_state.offset = 0;
254 if (parse_sexp_lookup_properties)
256 if (count > 0)
257 update_syntax_table_forward (from, true, Qnil);
258 else if (from > BEGV)
260 update_syntax_table (from - 1, count, true, Qnil);
261 parse_sexp_propertize (from - 1);
266 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
267 If it is t (which is only used in fast_c_string_match_ignore_case),
268 ignore properties altogether.
270 This is meant for regex.c to use. For buffers, regex.c passes arguments
271 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
272 So if it is a buffer, we set the offset field to BEGV. */
274 void
275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
276 ptrdiff_t from, ptrdiff_t count)
278 SETUP_BUFFER_SYNTAX_TABLE ();
279 gl_state.object = object;
280 if (BUFFERP (gl_state.object))
282 struct buffer *buf = XBUFFER (gl_state.object);
283 gl_state.b_property = 1;
284 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
285 gl_state.offset = BUF_BEGV (buf) - 1;
287 else if (NILP (gl_state.object))
289 gl_state.b_property = 1;
290 gl_state.e_property = ZV - BEGV + 1;
291 gl_state.offset = BEGV - 1;
293 else if (EQ (gl_state.object, Qt))
295 gl_state.b_property = 0;
296 gl_state.e_property = PTRDIFF_MAX;
297 gl_state.offset = 0;
299 else
301 gl_state.b_property = 0;
302 gl_state.e_property = 1 + SCHARS (gl_state.object);
303 gl_state.offset = 0;
305 if (parse_sexp_lookup_properties)
306 update_syntax_table (from + gl_state.offset - (count <= 0),
307 count, 1, gl_state.object);
310 /* Update gl_state to an appropriate interval which contains CHARPOS. The
311 sign of COUNT give the relative position of CHARPOS wrt the previously
312 valid interval. If INIT, only [be]_property fields of gl_state are
313 valid at start, the rest is filled basing on OBJECT.
315 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316 direction than the intervals - or in an interval. We update the
317 current syntax-table basing on the property of this interval, and
318 update the interval to start further than CHARPOS - or be
319 NULL. We also update lim_property to be the next value of
320 charpos to call this subroutine again - or be before/after the
321 start/end of OBJECT. */
323 void
324 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
325 Lisp_Object object)
327 Lisp_Object tmp_table;
328 int cnt = 0;
329 bool invalidate = true;
330 INTERVAL i;
332 if (init)
334 gl_state.old_prop = Qnil;
335 gl_state.start = gl_state.b_property;
336 gl_state.stop = gl_state.e_property;
337 i = interval_of (charpos, object);
338 gl_state.backward_i = gl_state.forward_i = i;
339 invalidate = false;
340 if (!i)
341 return;
342 /* interval_of updates only ->position of the return value, so
343 update the parents manually to speed up update_interval. */
344 while (!NULL_PARENT (i))
346 if (AM_RIGHT_CHILD (i))
347 INTERVAL_PARENT (i)->position = i->position
348 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
349 - TOTAL_LENGTH (INTERVAL_PARENT (i))
350 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
351 else
352 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
353 + TOTAL_LENGTH (i);
354 i = INTERVAL_PARENT (i);
356 i = gl_state.forward_i;
357 gl_state.b_property = i->position - gl_state.offset;
358 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
359 goto update;
361 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
363 /* We are guaranteed to be called with CHARPOS either in i,
364 or further off. */
365 if (!i)
366 error ("Error in syntax_table logic for to-the-end intervals");
367 else if (charpos < i->position) /* Move left. */
369 if (count > 0)
370 error ("Error in syntax_table logic for intervals <-");
371 /* Update the interval. */
372 i = update_interval (i, charpos);
373 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
375 invalidate = false;
376 gl_state.forward_i = i;
377 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
380 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
382 if (count < 0)
383 error ("Error in syntax_table logic for intervals ->");
384 /* Update the interval. */
385 i = update_interval (i, charpos);
386 if (i->position != gl_state.e_property)
388 invalidate = false;
389 gl_state.backward_i = i;
390 gl_state.b_property = i->position - gl_state.offset;
394 update:
395 tmp_table = textget (i->plist, Qsyntax_table);
397 if (invalidate)
398 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
400 if (invalidate) /* Did not get to adjacent interval. */
401 { /* with the same table => */
402 /* invalidate the old range. */
403 if (count > 0)
405 gl_state.backward_i = i;
406 gl_state.b_property = i->position - gl_state.offset;
408 else
410 gl_state.forward_i = i;
411 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
415 if (!EQ (tmp_table, gl_state.old_prop))
417 gl_state.current_syntax_table = tmp_table;
418 gl_state.old_prop = tmp_table;
419 if (EQ (Fsyntax_table_p (tmp_table), Qt))
421 gl_state.use_global = 0;
423 else if (CONSP (tmp_table))
425 gl_state.use_global = 1;
426 gl_state.global_code = tmp_table;
428 else
430 gl_state.use_global = 0;
431 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
435 while (i)
437 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
439 if (count > 0)
441 gl_state.e_property = i->position - gl_state.offset;
442 gl_state.forward_i = i;
444 else
446 gl_state.b_property
447 = i->position + LENGTH (i) - gl_state.offset;
448 gl_state.backward_i = i;
450 return;
452 else if (cnt == INTERVALS_AT_ONCE)
454 if (count > 0)
456 gl_state.e_property
457 = i->position + LENGTH (i) - gl_state.offset
458 /* e_property at EOB is not set to ZV but to ZV+1, so that
459 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
460 having to check eob between the two. */
461 + (next_interval (i) ? 0 : 1);
462 gl_state.forward_i = i;
464 else
466 gl_state.b_property = i->position - gl_state.offset;
467 gl_state.backward_i = i;
469 return;
471 cnt++;
472 i = count > 0 ? next_interval (i) : previous_interval (i);
474 eassert (i == NULL); /* This property goes to the end. */
475 if (count > 0)
477 gl_state.e_property = gl_state.stop;
478 gl_state.forward_i = i;
480 else
481 gl_state.b_property = gl_state.start;
484 static void
485 parse_sexp_propertize (ptrdiff_t charpos)
487 EMACS_INT zv = ZV;
488 if (syntax_propertize__done <= charpos
489 && syntax_propertize__done < zv)
491 EMACS_INT modiffs = CHARS_MODIFF;
492 safe_call1 (Qinternal__syntax_propertize,
493 make_number (min (zv, 1 + charpos)));
494 if (modiffs != CHARS_MODIFF)
495 error ("parse-sexp-propertize-function modified the buffer!");
496 if (syntax_propertize__done <= charpos
497 && syntax_propertize__done < zv)
498 error ("parse-sexp-propertize-function did not move"
499 " syntax-propertize--done");
500 SETUP_SYNTAX_TABLE (charpos, 1);
502 else if (gl_state.e_property > syntax_propertize__done)
504 gl_state.e_property = syntax_propertize__done;
505 gl_state.e_property_truncated = true;
507 else if (gl_state.e_property_truncated
508 && gl_state.e_property < syntax_propertize__done)
509 { /* When moving backward, e_property might be set without resetting
510 e_property_truncated, so the e_property_truncated flag may
511 occasionally be left raised spuriously. This should be rare. */
512 gl_state.e_property_truncated = false;
513 update_syntax_table_forward (charpos, false, Qnil);
517 void
518 update_syntax_table_forward (ptrdiff_t charpos, bool init,
519 Lisp_Object object)
521 if (gl_state.e_property_truncated)
523 eassert (NILP (object));
524 eassert (charpos >= gl_state.e_property);
525 parse_sexp_propertize (charpos);
527 else
529 update_syntax_table (charpos, 1, init, object);
530 if (NILP (object) && gl_state.e_property > syntax_propertize__done)
531 parse_sexp_propertize (charpos);
535 /* Returns true if char at CHARPOS is quoted.
536 Global syntax-table data should be set up already to be good at CHARPOS
537 or after. On return global syntax data is good for lookup at CHARPOS. */
539 static bool
540 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
542 enum syntaxcode code;
543 ptrdiff_t beg = BEGV;
544 bool quoted = 0;
545 ptrdiff_t orig = charpos;
547 while (charpos > beg)
549 int c;
550 DEC_BOTH (charpos, bytepos);
552 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
553 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
554 code = SYNTAX (c);
555 if (! (code == Scharquote || code == Sescape))
556 break;
558 quoted = !quoted;
561 UPDATE_SYNTAX_TABLE (orig);
562 return quoted;
565 /* Return the bytepos one character before BYTEPOS.
566 We assume that BYTEPOS is not at the start of the buffer. */
568 static ptrdiff_t
569 dec_bytepos (ptrdiff_t bytepos)
571 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
572 return bytepos - 1;
574 DEC_POS (bytepos);
575 return bytepos;
578 /* Return a defun-start position before POS and not too far before.
579 It should be the last one before POS, or nearly the last.
581 When open_paren_in_column_0_is_defun_start is nonzero,
582 only the beginning of the buffer is treated as a defun-start.
584 We record the information about where the scan started
585 and what its result was, so that another call in the same area
586 can return the same value very quickly.
588 There is no promise at which position the global syntax data is
589 valid on return from the subroutine, so the caller should explicitly
590 update the global data. */
592 static ptrdiff_t
593 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
595 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
597 /* Use previous finding, if it's valid and applies to this inquiry. */
598 if (current_buffer == find_start_buffer
599 /* Reuse the defun-start even if POS is a little farther on.
600 POS might be in the next defun, but that's ok.
601 Our value may not be the best possible, but will still be usable. */
602 && pos <= find_start_pos + 1000
603 && pos >= find_start_value
604 && BEGV == find_start_begv
605 && MODIFF == find_start_modiff)
606 return find_start_value;
608 if (!open_paren_in_column_0_is_defun_start)
610 find_start_value = BEGV;
611 find_start_value_byte = BEGV_BYTE;
612 goto found;
615 /* Back up to start of line. */
616 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
618 /* We optimize syntax-table lookup for rare updates. Thus we accept
619 only those `^\s(' which are good in global _and_ text-property
620 syntax-tables. */
621 SETUP_BUFFER_SYNTAX_TABLE ();
622 while (PT > BEGV)
624 /* Open-paren at start of line means we may have found our
625 defun-start. */
626 int c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
627 if (SYNTAX (c) == Sopen)
629 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
630 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
631 if (SYNTAX (c) == Sopen)
632 break;
633 /* Now fallback to the default value. */
634 SETUP_BUFFER_SYNTAX_TABLE ();
636 /* Move to beg of previous line. */
637 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
640 /* Record what we found, for the next try. */
641 find_start_value = PT;
642 find_start_value_byte = PT_BYTE;
643 TEMP_SET_PT_BOTH (opoint, opoint_byte);
645 found:
646 find_start_buffer = current_buffer;
647 find_start_modiff = MODIFF;
648 find_start_begv = BEGV;
649 find_start_pos = pos;
651 return find_start_value;
654 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
656 static bool
657 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
659 int c;
660 bool val;
662 DEC_BOTH (pos, pos_byte);
663 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
664 c = FETCH_CHAR (pos_byte);
665 val = SYNTAX_COMEND_FIRST (c);
666 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
667 return val;
670 /* Check whether charpos FROM is at the end of a comment.
671 FROM_BYTE is the bytepos corresponding to FROM.
672 Do not move back before STOP.
674 Return true if we find a comment ending at FROM/FROM_BYTE.
676 If successful, store the charpos of the comment's beginning
677 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
679 Global syntax data remains valid for backward search starting at
680 the returned value (or at FROM, if the search was not successful). */
682 static bool
683 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
684 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
685 ptrdiff_t *bytepos_ptr)
687 /* Look back, counting the parity of string-quotes,
688 and recording the comment-starters seen.
689 When we reach a safe place, assume that's not in a string;
690 then step the main scan to the earliest comment-starter seen
691 an even number of string quotes away from the safe place.
693 OFROM[I] is position of the earliest comment-starter seen
694 which is I+2X quotes from the comment-end.
695 PARITY is current parity of quotes from the comment end. */
696 int string_style = -1; /* Presumed outside of any string. */
697 bool string_lossage = 0;
698 /* Not a real lossage: indicates that we have passed a matching comment
699 starter plus a non-matching comment-ender, meaning that any matching
700 comment-starter we might see later could be a false positive (hidden
701 inside another comment).
702 Test case: { a (* b } c (* d *) */
703 bool comment_lossage = 0;
704 ptrdiff_t comment_end = from;
705 ptrdiff_t comment_end_byte = from_byte;
706 ptrdiff_t comstart_pos = 0;
707 ptrdiff_t comstart_byte;
708 /* Place where the containing defun starts,
709 or 0 if we didn't come across it yet. */
710 ptrdiff_t defun_start = 0;
711 ptrdiff_t defun_start_byte = 0;
712 enum syntaxcode code;
713 ptrdiff_t nesting = 1; /* Current comment nesting. */
714 int c;
715 int syntax = 0;
716 unsigned short int quit_count = 0;
718 /* FIXME: A }} comment-ender style leads to incorrect behavior
719 in the case of {{ c }}} because we ignore the last two chars which are
720 assumed to be comment-enders although they aren't. */
722 /* At beginning of range to scan, we're outside of strings;
723 that determines quote parity to the comment-end. */
724 while (from != stop)
726 rarely_quit (++quit_count);
728 ptrdiff_t temp_byte;
729 int prev_syntax;
730 bool com2start, com2end, comstart;
732 /* Move back and examine a character. */
733 DEC_BOTH (from, from_byte);
734 UPDATE_SYNTAX_TABLE_BACKWARD (from);
736 prev_syntax = syntax;
737 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
738 syntax = SYNTAX_WITH_FLAGS (c);
739 code = SYNTAX (c);
741 /* Check for 2-char comment markers. */
742 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
743 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
744 && (comstyle
745 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
746 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
747 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
748 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
749 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
750 comstart = (com2start || code == Scomment);
752 /* Nasty cases with overlapping 2-char comment markers:
753 - snmp-mode: -- c -- foo -- c --
754 --- c --
755 ------ c --
756 - c-mode: *||*
757 |* *|* *|
758 |*| |* |*|
759 /// */
761 /* If a 2-char comment sequence partly overlaps with another,
762 we don't try to be clever. E.g. |*| in C, or }% in modes that
763 have %..\n and %{..}%. */
764 if (from > stop && (com2end || comstart))
766 ptrdiff_t next = from, next_byte = from_byte;
767 int next_c, next_syntax;
768 DEC_BOTH (next, next_byte);
769 UPDATE_SYNTAX_TABLE_BACKWARD (next);
770 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
771 next_syntax = SYNTAX_WITH_FLAGS (next_c);
772 if (((comstart || comnested)
773 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
774 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
775 || ((com2end || comnested)
776 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
777 && (comstyle
778 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
779 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
780 goto lossage;
781 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
784 if (com2start && comstart_pos == 0)
785 /* We're looking at a comment starter. But it might be a comment
786 ender as well (see snmp-mode). The first time we see one, we
787 need to consider it as a comment starter,
788 and the subsequent times as a comment ender. */
789 com2end = 0;
791 /* Turn a 2-char comment sequences into the appropriate syntax. */
792 if (com2end)
793 code = Sendcomment;
794 else if (com2start)
795 code = Scomment;
796 /* Ignore comment starters of a different style. */
797 else if (code == Scomment
798 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
799 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
800 continue;
802 /* Ignore escaped characters, except comment-enders which cannot
803 be escaped. */
804 if ((Vcomment_end_can_be_escaped || code != Sendcomment)
805 && char_quoted (from, from_byte))
806 continue;
808 switch (code)
810 case Sstring_fence:
811 case Scomment_fence:
812 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
813 FALLTHROUGH;
814 case Sstring:
815 /* Track parity of quotes. */
816 if (string_style == -1)
817 /* Entering a string. */
818 string_style = c;
819 else if (string_style == c)
820 /* Leaving the string. */
821 string_style = -1;
822 else
823 /* If we have two kinds of string delimiters.
824 There's no way to grok this scanning backwards. */
825 string_lossage = 1;
826 break;
828 case Scomment:
829 /* We've already checked that it is the relevant comstyle. */
830 if (string_style != -1 || comment_lossage || string_lossage)
831 /* There are odd string quotes involved, so let's be careful.
832 Test case in Pascal: " { " a { " } */
833 goto lossage;
835 if (!comnested)
837 /* Record best comment-starter so far. */
838 comstart_pos = from;
839 comstart_byte = from_byte;
841 else if (--nesting <= 0)
842 /* nested comments have to be balanced, so we don't need to
843 keep looking for earlier ones. We use here the same (slightly
844 incorrect) reasoning as below: since it is followed by uniform
845 paired string quotes, this comment-start has to be outside of
846 strings, else the comment-end itself would be inside a string. */
847 goto done;
848 break;
850 case Sendcomment:
851 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
852 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
853 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
854 /* This is the same style of comment ender as ours. */
856 if (comnested)
857 nesting++;
858 else
859 /* Anything before that can't count because it would match
860 this comment-ender rather than ours. */
861 from = stop; /* Break out of the loop. */
863 else if (comstart_pos != 0 || c != '\n')
864 /* We're mixing comment styles here, so we'd better be careful.
865 The (comstart_pos != 0 || c != '\n') check is not quite correct
866 (we should just always set comment_lossage), but removing it
867 would imply that any multiline comment in C would go through
868 lossage, which seems overkill.
869 The failure should only happen in the rare cases such as
870 { (* } *) */
871 comment_lossage = 1;
872 break;
874 case Sopen:
875 /* Assume a defun-start point is outside of strings. */
876 if (open_paren_in_column_0_is_defun_start
877 && (from == stop
878 || (temp_byte = dec_bytepos (from_byte),
879 FETCH_CHAR (temp_byte) == '\n')))
881 defun_start = from;
882 defun_start_byte = from_byte;
883 from = stop; /* Break out of the loop. */
885 break;
887 default:
888 break;
892 if (comstart_pos == 0)
894 from = comment_end;
895 from_byte = comment_end_byte;
896 UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
898 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
899 or `done'), then we've found the beginning of the non-nested comment. */
900 else if (1) /* !comnested */
902 from = comstart_pos;
903 from_byte = comstart_byte;
904 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
906 else lossage:
908 struct lisp_parse_state state;
909 bool adjusted = true;
910 /* We had two kinds of string delimiters mixed up
911 together. Decode this going forwards.
912 Scan fwd from a known safe place (beginning-of-defun)
913 to the one in question; this records where we
914 last passed a comment starter. */
915 /* If we did not already find the defun start, find it now. */
916 if (defun_start == 0)
918 defun_start = find_defun_start (comment_end, comment_end_byte);
919 defun_start_byte = find_start_value_byte;
920 adjusted = (defun_start > BEGV);
924 internalize_parse_state (Qnil, &state);
925 scan_sexps_forward (&state,
926 defun_start, defun_start_byte,
927 comment_end, TYPE_MINIMUM (EMACS_INT),
928 0, 0);
929 defun_start = comment_end;
930 if (!adjusted)
932 adjusted = true;
933 find_start_value
934 = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
935 : state.thislevelstart >= 0 ? state.thislevelstart
936 : find_start_value;
937 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
940 if (state.incomment == (comnested ? 1 : -1)
941 && state.comstyle == comstyle)
942 from = state.comstr_start;
943 else
945 from = comment_end;
946 if (state.incomment)
947 /* If comment_end is inside some other comment, maybe ours
948 is nested, so we need to try again from within the
949 surrounding comment. Example: { a (* " *) */
951 /* FIXME: We should advance by one or two chars. */
952 defun_start = state.comstr_start + 2;
953 defun_start_byte = CHAR_TO_BYTE (defun_start);
956 rarely_quit (++quit_count);
958 while (defun_start < comment_end);
960 from_byte = CHAR_TO_BYTE (from);
961 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
964 done:
965 *charpos_ptr = from;
966 *bytepos_ptr = from_byte;
968 return from != comment_end;
971 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
972 doc: /* Return t if OBJECT is a syntax table.
973 Currently, any char-table counts as a syntax table. */)
974 (Lisp_Object object)
976 if (CHAR_TABLE_P (object)
977 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
978 return Qt;
979 return Qnil;
982 static void
983 check_syntax_table (Lisp_Object obj)
985 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
986 Qsyntax_table_p, obj);
989 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
990 doc: /* Return the current syntax table.
991 This is the one specified by the current buffer. */)
992 (void)
994 return BVAR (current_buffer, syntax_table);
997 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
998 Sstandard_syntax_table, 0, 0, 0,
999 doc: /* Return the standard syntax table.
1000 This is the one used for new buffers. */)
1001 (void)
1003 return Vstandard_syntax_table;
1006 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1007 doc: /* Construct a new syntax table and return it.
1008 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1009 (Lisp_Object table)
1011 Lisp_Object copy;
1013 if (!NILP (table))
1014 check_syntax_table (table);
1015 else
1016 table = Vstandard_syntax_table;
1018 copy = Fcopy_sequence (table);
1020 /* Only the standard syntax table should have a default element.
1021 Other syntax tables should inherit from parents instead. */
1022 set_char_table_defalt (copy, Qnil);
1024 /* Copied syntax tables should all have parents.
1025 If we copied one with no parent, such as the standard syntax table,
1026 use the standard syntax table as the copy's parent. */
1027 if (NILP (XCHAR_TABLE (copy)->parent))
1028 Fset_char_table_parent (copy, Vstandard_syntax_table);
1029 return copy;
1032 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1033 doc: /* Select a new syntax table for the current buffer.
1034 One argument, a syntax table. */)
1035 (Lisp_Object table)
1037 int idx;
1038 check_syntax_table (table);
1039 bset_syntax_table (current_buffer, table);
1040 /* Indicate that this buffer now has a specified syntax table. */
1041 idx = PER_BUFFER_VAR_IDX (syntax_table);
1042 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1043 return table;
1046 /* Convert a letter which signifies a syntax code
1047 into the code it signifies.
1048 This is used by modify-syntax-entry, and other things. */
1050 unsigned char const syntax_spec_code[0400] =
1051 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1052 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1053 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1054 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1055 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1056 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1057 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1058 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1059 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1060 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1061 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1062 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1063 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1064 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1065 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1066 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1069 /* Indexed by syntax code, give the letter that describes it. */
1071 char const syntax_code_spec[16] =
1073 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1074 '!', '|'
1077 /* Indexed by syntax code, give the object (cons of syntax code and
1078 nil) to be stored in syntax table. Since these objects can be
1079 shared among syntax tables, we generate them in advance. By
1080 sharing objects, the function `describe-syntax' can give a more
1081 compact listing. */
1082 static Lisp_Object Vsyntax_code_object;
1085 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1086 doc: /* Return the syntax code of CHARACTER, described by a character.
1087 For example, if CHARACTER is a word constituent, the
1088 character `w' (119) is returned.
1089 The characters that correspond to various syntax codes
1090 are listed in the documentation of `modify-syntax-entry'.
1092 If you're trying to determine the syntax of characters in the buffer,
1093 this is probably the wrong function to use, because it can't take
1094 `syntax-table' text properties into account. Consider using
1095 `syntax-after' instead. */)
1096 (Lisp_Object character)
1098 int char_int;
1099 CHECK_CHARACTER (character);
1100 char_int = XINT (character);
1101 SETUP_BUFFER_SYNTAX_TABLE ();
1102 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1105 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1106 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1107 (Lisp_Object character)
1109 int char_int;
1110 enum syntaxcode code;
1111 CHECK_CHARACTER (character);
1112 char_int = XINT (character);
1113 SETUP_BUFFER_SYNTAX_TABLE ();
1114 code = SYNTAX (char_int);
1115 if (code == Sopen || code == Sclose)
1116 return SYNTAX_MATCH (char_int);
1117 return Qnil;
1120 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1121 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1122 STRING should be a string of the form allowed as argument of
1123 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1124 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1125 the value of a `syntax-table' text property. */)
1126 (Lisp_Object string)
1128 const unsigned char *p;
1129 int val;
1130 Lisp_Object match;
1132 CHECK_STRING (string);
1134 p = SDATA (string);
1135 val = syntax_spec_code[*p++];
1136 if (val == 0377)
1137 error ("Invalid syntax description letter: %c", p[-1]);
1139 if (val == Sinherit)
1140 return Qnil;
1142 if (*p)
1144 int len;
1145 int character = STRING_CHAR_AND_LENGTH (p, len);
1146 XSETINT (match, character);
1147 if (XFASTINT (match) == ' ')
1148 match = Qnil;
1149 p += len;
1151 else
1152 match = Qnil;
1154 while (*p)
1155 switch (*p++)
1157 case '1':
1158 val |= 1 << 16;
1159 break;
1161 case '2':
1162 val |= 1 << 17;
1163 break;
1165 case '3':
1166 val |= 1 << 18;
1167 break;
1169 case '4':
1170 val |= 1 << 19;
1171 break;
1173 case 'p':
1174 val |= 1 << 20;
1175 break;
1177 case 'b':
1178 val |= 1 << 21;
1179 break;
1181 case 'n':
1182 val |= 1 << 22;
1183 break;
1185 case 'c':
1186 val |= 1 << 23;
1187 break;
1190 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1191 return AREF (Vsyntax_code_object, val);
1192 else
1193 /* Since we can't use a shared object, let's make a new one. */
1194 return Fcons (make_number (val), match);
1197 /* I really don't know why this is interactive
1198 help-form should at least be made useful whilst reading the second arg. */
1199 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1200 "cSet syntax for character: \nsSet syntax for %s to: ",
1201 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1202 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1203 the current buffer's syntax table.
1204 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1205 in the range MIN to MAX are changed.
1206 The first character of NEWENTRY should be one of the following:
1207 Space or - whitespace syntax. w word constituent.
1208 _ symbol constituent. . punctuation.
1209 ( open-parenthesis. ) close-parenthesis.
1210 " string quote. \\ escape.
1211 $ paired delimiter. \\=' expression quote or prefix operator.
1212 < comment starter. > comment ender.
1213 / character-quote. @ inherit from parent table.
1214 | generic string fence. ! generic comment fence.
1216 Only single-character comment start and end sequences are represented thus.
1217 Two-character sequences are represented as described below.
1218 The second character of NEWENTRY is the matching parenthesis,
1219 used only if the first character is `(' or `)'.
1220 Any additional characters are flags.
1221 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1222 1 means CHAR is the start of a two-char comment start sequence.
1223 2 means CHAR is the second character of such a sequence.
1224 3 means CHAR is the start of a two-char comment end sequence.
1225 4 means CHAR is the second character of such a sequence.
1227 There can be several orthogonal comment sequences. This is to support
1228 language modes such as C++. By default, all comment sequences are of style
1229 a, but you can set the comment sequence style to b (on the second character
1230 of a comment-start, and the first character of a comment-end sequence) and/or
1231 c (on any of its chars) using this flag:
1232 b means CHAR is part of comment sequence b.
1233 c means CHAR is part of comment sequence c.
1234 n means CHAR is part of a nestable comment sequence.
1236 p means CHAR is a prefix character for `backward-prefix-chars';
1237 such characters are treated as whitespace when they occur
1238 between expressions.
1239 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1240 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1242 if (CONSP (c))
1244 CHECK_CHARACTER_CAR (c);
1245 CHECK_CHARACTER_CDR (c);
1247 else
1248 CHECK_CHARACTER (c);
1250 if (NILP (syntax_table))
1251 syntax_table = BVAR (current_buffer, syntax_table);
1252 else
1253 check_syntax_table (syntax_table);
1255 newentry = Fstring_to_syntax (newentry);
1256 if (CONSP (c))
1257 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1258 else
1259 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1261 /* We clear the regexp cache, since character classes can now have
1262 different values from those in the compiled regexps.*/
1263 clear_regexp_cache ();
1265 return Qnil;
1268 /* Dump syntax table to buffer in human-readable format */
1270 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1271 Sinternal_describe_syntax_value, 1, 1, 0,
1272 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1273 (Lisp_Object syntax)
1275 int code, syntax_code;
1276 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1277 char str[2];
1278 Lisp_Object first, match_lisp, value = syntax;
1280 if (NILP (value))
1282 insert_string ("default");
1283 return syntax;
1286 if (CHAR_TABLE_P (value))
1288 insert_string ("deeper char-table ...");
1289 return syntax;
1292 if (!CONSP (value))
1294 insert_string ("invalid");
1295 return syntax;
1298 first = XCAR (value);
1299 match_lisp = XCDR (value);
1301 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1303 insert_string ("invalid");
1304 return syntax;
1307 syntax_code = XINT (first) & INT_MAX;
1308 code = syntax_code & 0377;
1309 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1310 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1311 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1312 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1313 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1314 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1315 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1316 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1318 if (Smax <= code)
1320 insert_string ("invalid");
1321 return syntax;
1324 str[0] = syntax_code_spec[code], str[1] = 0;
1325 insert (str, 1);
1327 if (NILP (match_lisp))
1328 insert (" ", 1);
1329 else
1330 insert_char (XINT (match_lisp));
1332 if (start1)
1333 insert ("1", 1);
1334 if (start2)
1335 insert ("2", 1);
1337 if (end1)
1338 insert ("3", 1);
1339 if (end2)
1340 insert ("4", 1);
1342 if (prefix)
1343 insert ("p", 1);
1344 if (comstyleb)
1345 insert ("b", 1);
1346 if (comstylec)
1347 insert ("c", 1);
1348 if (comnested)
1349 insert ("n", 1);
1351 insert_string ("\twhich means: ");
1353 switch (code)
1355 case Swhitespace:
1356 insert_string ("whitespace"); break;
1357 case Spunct:
1358 insert_string ("punctuation"); break;
1359 case Sword:
1360 insert_string ("word"); break;
1361 case Ssymbol:
1362 insert_string ("symbol"); break;
1363 case Sopen:
1364 insert_string ("open"); break;
1365 case Sclose:
1366 insert_string ("close"); break;
1367 case Squote:
1368 insert_string ("prefix"); break;
1369 case Sstring:
1370 insert_string ("string"); break;
1371 case Smath:
1372 insert_string ("math"); break;
1373 case Sescape:
1374 insert_string ("escape"); break;
1375 case Scharquote:
1376 insert_string ("charquote"); break;
1377 case Scomment:
1378 insert_string ("comment"); break;
1379 case Sendcomment:
1380 insert_string ("endcomment"); break;
1381 case Sinherit:
1382 insert_string ("inherit"); break;
1383 case Scomment_fence:
1384 insert_string ("comment fence"); break;
1385 case Sstring_fence:
1386 insert_string ("string fence"); break;
1387 default:
1388 insert_string ("invalid");
1389 return syntax;
1392 if (!NILP (match_lisp))
1394 insert_string (", matches ");
1395 insert_char (XINT (match_lisp));
1398 if (start1)
1399 insert_string (",\n\t is the first character of a comment-start sequence");
1400 if (start2)
1401 insert_string (",\n\t is the second character of a comment-start sequence");
1403 if (end1)
1404 insert_string (",\n\t is the first character of a comment-end sequence");
1405 if (end2)
1406 insert_string (",\n\t is the second character of a comment-end sequence");
1407 if (comstyleb)
1408 insert_string (" (comment style b)");
1409 if (comstylec)
1410 insert_string (" (comment style c)");
1411 if (comnested)
1412 insert_string (" (nestable)");
1414 if (prefix)
1416 AUTO_STRING (prefixdoc,
1417 ",\n\t is a prefix character for `backward-prefix-chars'");
1418 insert1 (Fsubstitute_command_keys (prefixdoc));
1421 return syntax;
1424 /* Return the position across COUNT words from FROM.
1425 If that many words cannot be found before the end of the buffer, return 0.
1426 COUNT negative means scan backward and stop at word beginning. */
1428 ptrdiff_t
1429 scan_words (ptrdiff_t from, EMACS_INT count)
1431 ptrdiff_t beg = BEGV;
1432 ptrdiff_t end = ZV;
1433 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1434 enum syntaxcode code;
1435 int ch0, ch1;
1436 Lisp_Object func, pos;
1438 SETUP_SYNTAX_TABLE (from, count);
1440 while (count > 0)
1442 while (true)
1444 if (from == end)
1445 return 0;
1446 UPDATE_SYNTAX_TABLE_FORWARD (from);
1447 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1448 code = SYNTAX (ch0);
1449 INC_BOTH (from, from_byte);
1450 if (words_include_escapes
1451 && (code == Sescape || code == Scharquote))
1452 break;
1453 if (code == Sword)
1454 break;
1455 rarely_quit (from);
1457 /* Now CH0 is a character which begins a word and FROM is the
1458 position of the next character. */
1459 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1460 if (! NILP (Ffboundp (func)))
1462 pos = call2 (func, make_number (from - 1), make_number (end));
1463 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1465 from = XINT (pos);
1466 from_byte = CHAR_TO_BYTE (from);
1469 else
1471 while (1)
1473 if (from == end) break;
1474 UPDATE_SYNTAX_TABLE_FORWARD (from);
1475 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1476 code = SYNTAX (ch1);
1477 if ((code != Sword
1478 && (! words_include_escapes
1479 || (code != Sescape && code != Scharquote)))
1480 || word_boundary_p (ch0, ch1))
1481 break;
1482 INC_BOTH (from, from_byte);
1483 ch0 = ch1;
1484 rarely_quit (from);
1487 count--;
1489 while (count < 0)
1491 while (true)
1493 if (from == beg)
1494 return 0;
1495 DEC_BOTH (from, from_byte);
1496 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1497 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1498 code = SYNTAX (ch1);
1499 if (words_include_escapes
1500 && (code == Sescape || code == Scharquote))
1501 break;
1502 if (code == Sword)
1503 break;
1504 rarely_quit (from);
1506 /* Now CH1 is a character which ends a word and FROM is the
1507 position of it. */
1508 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1509 if (! NILP (Ffboundp (func)))
1511 pos = call2 (func, make_number (from), make_number (beg));
1512 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1514 from = XINT (pos);
1515 from_byte = CHAR_TO_BYTE (from);
1518 else
1520 while (1)
1522 if (from == beg)
1523 break;
1524 DEC_BOTH (from, from_byte);
1525 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1526 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1527 code = SYNTAX (ch0);
1528 if ((code != Sword
1529 && (! words_include_escapes
1530 || (code != Sescape && code != Scharquote)))
1531 || word_boundary_p (ch0, ch1))
1533 INC_BOTH (from, from_byte);
1534 break;
1536 ch1 = ch0;
1537 rarely_quit (from);
1540 count++;
1543 return from;
1546 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1547 doc: /* Move point forward ARG words (backward if ARG is negative).
1548 If ARG is omitted or nil, move point forward one word.
1549 Normally returns t.
1550 If an edge of the buffer or a field boundary is reached, point is
1551 left there and the function returns nil. Field boundaries are not
1552 noticed if `inhibit-field-text-motion' is non-nil.
1554 The word boundaries are normally determined by the buffer's syntax
1555 table, but `find-word-boundary-function-table', such as set up
1556 by `subword-mode', can change that. If a Lisp program needs to
1557 move by words determined strictly by the syntax table, it should
1558 use `forward-word-strictly' instead. */)
1559 (Lisp_Object arg)
1561 Lisp_Object tmp;
1562 ptrdiff_t orig_val, val;
1564 if (NILP (arg))
1565 XSETFASTINT (arg, 1);
1566 else
1567 CHECK_NUMBER (arg);
1569 val = orig_val = scan_words (PT, XINT (arg));
1570 if (! orig_val)
1571 val = XINT (arg) > 0 ? ZV : BEGV;
1573 /* Avoid jumping out of an input field. */
1574 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1575 Qnil, Qnil, Qnil);
1576 val = XFASTINT (tmp);
1578 SET_PT (val);
1579 return val == orig_val ? Qt : Qnil;
1582 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1583 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1584 STRING is like the inside of a `[...]' in a regular expression
1585 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1586 (but not at the end of a range; quoting is never needed there).
1587 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1588 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1589 Char classes, e.g. `[:alpha:]', are supported.
1591 Returns the distance traveled, either zero or positive. */)
1592 (Lisp_Object string, Lisp_Object lim)
1594 return skip_chars (1, string, lim, 1);
1597 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1598 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1599 See `skip-chars-forward' for details.
1600 Returns the distance traveled, either zero or negative. */)
1601 (Lisp_Object string, Lisp_Object lim)
1603 return skip_chars (0, string, lim, 1);
1606 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1607 doc: /* Move point forward across chars in specified syntax classes.
1608 SYNTAX is a string of syntax code characters.
1609 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1610 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1611 This function returns the distance traveled, either zero or positive. */)
1612 (Lisp_Object syntax, Lisp_Object lim)
1614 return skip_syntaxes (1, syntax, lim);
1617 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1618 doc: /* Move point backward across chars in specified syntax classes.
1619 SYNTAX is a string of syntax code characters.
1620 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1621 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1622 This function returns either zero or a negative number, and the absolute value
1623 of this is the distance traveled. */)
1624 (Lisp_Object syntax, Lisp_Object lim)
1626 return skip_syntaxes (0, syntax, lim);
1629 static Lisp_Object
1630 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1631 bool handle_iso_classes)
1633 int c;
1634 char fastmap[0400];
1635 /* Store the ranges of non-ASCII characters. */
1636 int *char_ranges UNINIT;
1637 int n_char_ranges = 0;
1638 bool negate = 0;
1639 ptrdiff_t i, i_byte;
1640 /* True if the current buffer is multibyte and the region contains
1641 non-ASCII chars. */
1642 bool multibyte;
1643 /* True if STRING is multibyte and it contains non-ASCII chars. */
1644 bool string_multibyte;
1645 ptrdiff_t size_byte;
1646 const unsigned char *str;
1647 int len;
1648 Lisp_Object iso_classes;
1649 USE_SAFE_ALLOCA;
1651 CHECK_STRING (string);
1652 iso_classes = Qnil;
1654 if (NILP (lim))
1655 XSETINT (lim, forwardp ? ZV : BEGV);
1656 else
1657 CHECK_NUMBER_COERCE_MARKER (lim);
1659 /* In any case, don't allow scan outside bounds of buffer. */
1660 if (XINT (lim) > ZV)
1661 XSETFASTINT (lim, ZV);
1662 if (XINT (lim) < BEGV)
1663 XSETFASTINT (lim, BEGV);
1665 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1666 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1667 string_multibyte = SBYTES (string) > SCHARS (string);
1669 memset (fastmap, 0, sizeof fastmap);
1671 str = SDATA (string);
1672 size_byte = SBYTES (string);
1674 i_byte = 0;
1675 if (i_byte < size_byte
1676 && SREF (string, 0) == '^')
1678 negate = 1; i_byte++;
1681 /* Find the characters specified and set their elements of fastmap.
1682 Handle backslashes and ranges specially.
1684 If STRING contains non-ASCII characters, setup char_ranges for
1685 them and use fastmap only for their leading codes. */
1687 if (! string_multibyte)
1689 bool string_has_eight_bit = 0;
1691 /* At first setup fastmap. */
1692 while (i_byte < size_byte)
1694 if (handle_iso_classes)
1696 const unsigned char *ch = str + i_byte;
1697 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1698 if (cc == 0)
1699 error ("Invalid ISO C character class");
1700 if (cc != -1)
1702 iso_classes = Fcons (make_number (cc), iso_classes);
1703 i_byte = ch - str;
1704 continue;
1708 c = str[i_byte++];
1710 if (c == '\\')
1712 if (i_byte == size_byte)
1713 break;
1715 c = str[i_byte++];
1717 /* Treat `-' as range character only if another character
1718 follows. */
1719 if (i_byte + 1 < size_byte
1720 && str[i_byte] == '-')
1722 int c2;
1724 /* Skip over the dash. */
1725 i_byte++;
1727 /* Get the end of the range. */
1728 c2 = str[i_byte++];
1729 if (c2 == '\\'
1730 && i_byte < size_byte)
1731 c2 = str[i_byte++];
1733 if (c <= c2)
1735 int lim2 = c2 + 1;
1736 while (c < lim2)
1737 fastmap[c++] = 1;
1738 if (! ASCII_CHAR_P (c2))
1739 string_has_eight_bit = 1;
1742 else
1744 fastmap[c] = 1;
1745 if (! ASCII_CHAR_P (c))
1746 string_has_eight_bit = 1;
1750 /* If the current range is multibyte and STRING contains
1751 eight-bit chars, arrange fastmap and setup char_ranges for
1752 the corresponding multibyte chars. */
1753 if (multibyte && string_has_eight_bit)
1755 char *p1;
1756 char himap[0200 + 1];
1757 memcpy (himap, fastmap + 0200, 0200);
1758 himap[0200] = 0;
1759 memset (fastmap + 0200, 0, 0200);
1760 SAFE_NALLOCA (char_ranges, 2, 128);
1761 i = 0;
1763 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1765 /* Deduce the next range C..C2 from the next clump of 1s
1766 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1767 order half of the old FASTMAP. */
1768 int c2, leading_code;
1769 i = p1 - himap;
1770 c = BYTE8_TO_CHAR (i + 0200);
1771 i += strlen (p1);
1772 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1774 char_ranges[n_char_ranges++] = c;
1775 char_ranges[n_char_ranges++] = c2;
1776 leading_code = CHAR_LEADING_CODE (c);
1777 memset (fastmap + leading_code, 1,
1778 CHAR_LEADING_CODE (c2) - leading_code + 1);
1782 else /* STRING is multibyte */
1784 SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1786 while (i_byte < size_byte)
1788 int leading_code = str[i_byte];
1790 if (handle_iso_classes)
1792 const unsigned char *ch = str + i_byte;
1793 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1794 if (cc == 0)
1795 error ("Invalid ISO C character class");
1796 if (cc != -1)
1798 iso_classes = Fcons (make_number (cc), iso_classes);
1799 i_byte = ch - str;
1800 continue;
1804 if (leading_code== '\\')
1806 if (++i_byte == size_byte)
1807 break;
1809 leading_code = str[i_byte];
1811 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1812 i_byte += len;
1815 /* Treat `-' as range character only if another character
1816 follows. */
1817 if (i_byte + 1 < size_byte
1818 && str[i_byte] == '-')
1820 int c2, leading_code2;
1822 /* Skip over the dash. */
1823 i_byte++;
1825 /* Get the end of the range. */
1826 leading_code2 = str[i_byte];
1827 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1828 i_byte += len;
1830 if (c2 == '\\'
1831 && i_byte < size_byte)
1833 leading_code2 = str[i_byte];
1834 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1835 i_byte += len;
1838 if (c > c2)
1839 continue;
1840 if (ASCII_CHAR_P (c))
1842 while (c <= c2 && c < 0x80)
1843 fastmap[c++] = 1;
1844 leading_code = CHAR_LEADING_CODE (c);
1846 if (! ASCII_CHAR_P (c))
1848 int lim2 = leading_code2 + 1;
1849 while (leading_code < lim2)
1850 fastmap[leading_code++] = 1;
1851 if (c <= c2)
1853 char_ranges[n_char_ranges++] = c;
1854 char_ranges[n_char_ranges++] = c2;
1858 else
1860 if (ASCII_CHAR_P (c))
1861 fastmap[c] = 1;
1862 else
1864 fastmap[leading_code] = 1;
1865 char_ranges[n_char_ranges++] = c;
1866 char_ranges[n_char_ranges++] = c;
1871 /* If the current range is unibyte and STRING contains non-ASCII
1872 chars, arrange fastmap for the corresponding unibyte
1873 chars. */
1875 if (! multibyte && n_char_ranges > 0)
1877 memset (fastmap + 0200, 0, 0200);
1878 for (i = 0; i < n_char_ranges; i += 2)
1880 int c1 = char_ranges[i];
1881 int lim2 = char_ranges[i + 1] + 1;
1883 for (; c1 < lim2; c1++)
1885 int b = CHAR_TO_BYTE_SAFE (c1);
1886 if (b >= 0)
1887 fastmap[b] = 1;
1893 /* If ^ was the first character, complement the fastmap. */
1894 if (negate)
1896 if (! multibyte)
1897 for (i = 0; i < sizeof fastmap; i++)
1898 fastmap[i] ^= 1;
1899 else
1901 for (i = 0; i < 0200; i++)
1902 fastmap[i] ^= 1;
1903 /* All non-ASCII chars possibly match. */
1904 for (; i < sizeof fastmap; i++)
1905 fastmap[i] = 1;
1910 ptrdiff_t start_point = PT;
1911 ptrdiff_t pos = PT;
1912 ptrdiff_t pos_byte = PT_BYTE;
1913 unsigned char *p = PT_ADDR, *endp, *stop;
1915 if (forwardp)
1917 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1918 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1920 else
1922 endp = CHAR_POS_ADDR (XINT (lim));
1923 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1926 /* This code may look up syntax tables using functions that rely on the
1927 gl_state object. To make sure this object is not out of date,
1928 let's initialize it manually.
1929 We ignore syntax-table text-properties for now, since that's
1930 what we've done in the past. */
1931 SETUP_BUFFER_SYNTAX_TABLE ();
1932 if (forwardp)
1934 if (multibyte)
1935 while (1)
1937 int nbytes;
1939 if (p >= stop)
1941 if (p >= endp)
1942 break;
1943 p = GAP_END_ADDR;
1944 stop = endp;
1946 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1947 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1949 if (negate)
1950 break;
1951 else
1952 goto fwd_ok;
1955 if (! fastmap[*p])
1956 break;
1957 if (! ASCII_CHAR_P (c))
1959 /* As we are looking at a multibyte character, we
1960 must look up the character in the table
1961 CHAR_RANGES. If there's no data in the table,
1962 that character is not what we want to skip. */
1964 /* The following code do the right thing even if
1965 n_char_ranges is zero (i.e. no data in
1966 CHAR_RANGES). */
1967 for (i = 0; i < n_char_ranges; i += 2)
1968 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1969 break;
1970 if (!(negate ^ (i < n_char_ranges)))
1971 break;
1973 fwd_ok:
1974 p += nbytes, pos++, pos_byte += nbytes;
1975 rarely_quit (pos);
1977 else
1978 while (true)
1980 if (p >= stop)
1982 if (p >= endp)
1983 break;
1984 p = GAP_END_ADDR;
1985 stop = endp;
1988 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1990 if (negate)
1991 break;
1992 else
1993 goto fwd_unibyte_ok;
1996 if (!fastmap[*p])
1997 break;
1998 fwd_unibyte_ok:
1999 p++, pos++, pos_byte++;
2000 rarely_quit (pos);
2003 else
2005 if (multibyte)
2006 while (true)
2008 if (p <= stop)
2010 if (p <= endp)
2011 break;
2012 p = GPT_ADDR;
2013 stop = endp;
2015 unsigned char *prev_p = p;
2017 p--;
2018 while (stop <= p && ! CHAR_HEAD_P (*p));
2020 c = STRING_CHAR (p);
2022 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2024 if (negate)
2025 break;
2026 else
2027 goto back_ok;
2030 if (! fastmap[*p])
2031 break;
2032 if (! ASCII_CHAR_P (c))
2034 /* See the comment in the previous similar code. */
2035 for (i = 0; i < n_char_ranges; i += 2)
2036 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2037 break;
2038 if (!(negate ^ (i < n_char_ranges)))
2039 break;
2041 back_ok:
2042 pos--, pos_byte -= prev_p - p;
2043 rarely_quit (pos);
2045 else
2046 while (true)
2048 if (p <= stop)
2050 if (p <= endp)
2051 break;
2052 p = GPT_ADDR;
2053 stop = endp;
2056 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2058 if (negate)
2059 break;
2060 else
2061 goto back_unibyte_ok;
2064 if (!fastmap[p[-1]])
2065 break;
2066 back_unibyte_ok:
2067 p--, pos--, pos_byte--;
2068 rarely_quit (pos);
2072 SET_PT_BOTH (pos, pos_byte);
2074 SAFE_FREE ();
2075 return make_number (PT - start_point);
2080 static Lisp_Object
2081 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2083 int c;
2084 unsigned char fastmap[0400];
2085 bool negate = 0;
2086 ptrdiff_t i, i_byte;
2087 bool multibyte;
2088 ptrdiff_t size_byte;
2089 unsigned char *str;
2091 CHECK_STRING (string);
2093 if (NILP (lim))
2094 XSETINT (lim, forwardp ? ZV : BEGV);
2095 else
2096 CHECK_NUMBER_COERCE_MARKER (lim);
2098 /* In any case, don't allow scan outside bounds of buffer. */
2099 if (XINT (lim) > ZV)
2100 XSETFASTINT (lim, ZV);
2101 if (XINT (lim) < BEGV)
2102 XSETFASTINT (lim, BEGV);
2104 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2105 return make_number (0);
2107 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2108 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2110 memset (fastmap, 0, sizeof fastmap);
2112 if (SBYTES (string) > SCHARS (string))
2113 /* As this is very rare case (syntax spec is ASCII only), don't
2114 consider efficiency. */
2115 string = string_make_unibyte (string);
2117 str = SDATA (string);
2118 size_byte = SBYTES (string);
2120 i_byte = 0;
2121 if (i_byte < size_byte
2122 && SREF (string, 0) == '^')
2124 negate = 1; i_byte++;
2127 /* Find the syntaxes specified and set their elements of fastmap. */
2129 while (i_byte < size_byte)
2131 c = str[i_byte++];
2132 fastmap[syntax_spec_code[c]] = 1;
2135 /* If ^ was the first character, complement the fastmap. */
2136 if (negate)
2137 for (i = 0; i < sizeof fastmap; i++)
2138 fastmap[i] ^= 1;
2141 ptrdiff_t start_point = PT;
2142 ptrdiff_t pos = PT;
2143 ptrdiff_t pos_byte = PT_BYTE;
2144 unsigned char *p, *endp, *stop;
2146 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2148 if (forwardp)
2150 while (true)
2152 p = BYTE_POS_ADDR (pos_byte);
2153 endp = XINT (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2154 stop = pos < GPT && GPT < XINT (lim) ? GPT_ADDR : endp;
2158 int nbytes;
2160 if (p >= stop)
2162 if (p >= endp)
2163 goto done;
2164 p = GAP_END_ADDR;
2165 stop = endp;
2167 if (multibyte)
2168 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2169 else
2170 c = *p, nbytes = 1;
2171 if (! fastmap[SYNTAX (c)])
2172 goto done;
2173 p += nbytes, pos++, pos_byte += nbytes;
2174 rarely_quit (pos);
2176 while (!parse_sexp_lookup_properties
2177 || pos < gl_state.e_property);
2179 update_syntax_table_forward (pos + gl_state.offset,
2180 false, gl_state.object);
2183 else
2185 p = BYTE_POS_ADDR (pos_byte);
2186 endp = CHAR_POS_ADDR (XINT (lim));
2187 stop = pos >= GPT && GPT > XINT (lim) ? GAP_END_ADDR : endp;
2189 if (multibyte)
2191 while (true)
2193 if (p <= stop)
2195 if (p <= endp)
2196 break;
2197 p = GPT_ADDR;
2198 stop = endp;
2200 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2202 unsigned char *prev_p = p;
2204 p--;
2205 while (stop <= p && ! CHAR_HEAD_P (*p));
2207 c = STRING_CHAR (p);
2208 if (! fastmap[SYNTAX (c)])
2209 break;
2210 pos--, pos_byte -= prev_p - p;
2211 rarely_quit (pos);
2214 else
2216 while (true)
2218 if (p <= stop)
2220 if (p <= endp)
2221 break;
2222 p = GPT_ADDR;
2223 stop = endp;
2225 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2226 if (! fastmap[SYNTAX (p[-1])])
2227 break;
2228 p--, pos--, pos_byte--;
2229 rarely_quit (pos);
2234 done:
2235 SET_PT_BOTH (pos, pos_byte);
2237 return make_number (PT - start_point);
2241 /* Return true if character C belongs to one of the ISO classes
2242 in the list ISO_CLASSES. Each class is represented by an
2243 integer which is its type according to re_wctype. */
2245 static bool
2246 in_classes (int c, Lisp_Object iso_classes)
2248 bool fits_class = 0;
2250 while (CONSP (iso_classes))
2252 Lisp_Object elt;
2253 elt = XCAR (iso_classes);
2254 iso_classes = XCDR (iso_classes);
2256 if (re_iswctype (c, XFASTINT (elt)))
2257 fits_class = 1;
2260 return fits_class;
2263 /* Jump over a comment, assuming we are at the beginning of one.
2264 FROM is the current position.
2265 FROM_BYTE is the bytepos corresponding to FROM.
2266 Do not move past STOP (a charpos).
2267 The comment over which we have to jump is of style STYLE
2268 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2269 NESTING should be positive to indicate the nesting at the beginning
2270 for nested comments and should be zero or negative else.
2271 ST_COMMENT_STYLE cannot be nested.
2272 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2273 (or 0 If the search cannot start in the middle of a two-character).
2275 If successful, return true and store the charpos of the comment's
2276 end into *CHARPOS_PTR and the corresponding bytepos into
2277 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2278 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2279 current nesting (as defined for state->incomment) in
2280 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2281 comment be a possible first character of a two character construct,
2282 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2283 we store Smax into *last_syntax_ptr.
2285 The comment end is the last character of the comment rather than the
2286 character just after the comment.
2288 Global syntax data is assumed to initially be valid for FROM and
2289 remains valid for forward search starting at the returned position. */
2291 static bool
2292 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2293 EMACS_INT nesting, int style, int prev_syntax,
2294 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2295 EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2297 unsigned short int quit_count = 0;
2298 int c, c1;
2299 enum syntaxcode code;
2300 int syntax, other_syntax;
2302 if (nesting <= 0) nesting = -1;
2304 /* Enter the loop in the middle so that we find
2305 a 2-char comment ender if we start in the middle of it. */
2306 syntax = prev_syntax;
2307 code = syntax & 0xff;
2308 if (syntax != 0 && from < stop) goto forw_incomment;
2310 while (1)
2312 if (from == stop)
2314 *incomment_ptr = nesting;
2315 *charpos_ptr = from;
2316 *bytepos_ptr = from_byte;
2317 *last_syntax_ptr =
2318 (code == Sescape || code == Scharquote
2319 || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2320 || (nesting > 0
2321 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2322 ? syntax : Smax ;
2323 return 0;
2325 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2326 syntax = SYNTAX_WITH_FLAGS (c);
2327 code = syntax & 0xff;
2328 if (code == Sendcomment
2329 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2330 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2331 (nesting > 0 && --nesting == 0) : nesting < 0)
2332 && !(Vcomment_end_can_be_escaped && char_quoted (from, from_byte)))
2333 /* We have encountered a comment end of the same style
2334 as the comment sequence which began this comment
2335 section. */
2336 break;
2337 if (code == Scomment_fence
2338 && style == ST_COMMENT_STYLE)
2339 /* We have encountered a comment end of the same style
2340 as the comment sequence which began this comment
2341 section. */
2342 break;
2343 if (nesting > 0
2344 && code == Scomment
2345 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2346 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2347 /* We have encountered a nested comment of the same style
2348 as the comment sequence which began this comment section. */
2349 nesting++;
2350 INC_BOTH (from, from_byte);
2351 UPDATE_SYNTAX_TABLE_FORWARD (from);
2353 forw_incomment:
2354 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2355 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2356 other_syntax = SYNTAX_WITH_FLAGS (c1),
2357 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2358 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2359 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2360 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2361 ? nesting > 0 : nesting < 0))
2363 syntax = Smax; /* So that "|#" (lisp) can not return
2364 the syntax of "#" in *last_syntax_ptr. */
2365 if (--nesting <= 0)
2366 /* We have encountered a comment end of the same style
2367 as the comment sequence which began this comment section. */
2368 break;
2369 else
2371 INC_BOTH (from, from_byte);
2372 UPDATE_SYNTAX_TABLE_FORWARD (from);
2375 if (nesting > 0
2376 && from < stop
2377 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2378 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2379 other_syntax = SYNTAX_WITH_FLAGS (c1),
2380 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2381 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2382 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2383 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2384 /* We have encountered a nested comment of the same style
2385 as the comment sequence which began this comment section. */
2387 syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2388 INC_BOTH (from, from_byte);
2389 UPDATE_SYNTAX_TABLE_FORWARD (from);
2390 nesting++;
2393 rarely_quit (++quit_count);
2395 *charpos_ptr = from;
2396 *bytepos_ptr = from_byte;
2397 *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2398 used up. */
2399 return 1;
2402 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2403 doc: /*
2404 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2405 Stop scanning if we find something other than a comment or whitespace.
2406 Set point to where scanning stops.
2407 If COUNT comments are found as expected, with nothing except whitespace
2408 between them, return t; otherwise return nil. */)
2409 (Lisp_Object count)
2411 ptrdiff_t from, from_byte, stop;
2412 int c, c1;
2413 enum syntaxcode code;
2414 int comstyle = 0; /* style of comment encountered */
2415 bool comnested = 0; /* whether the comment is nestable or not */
2416 bool found;
2417 EMACS_INT count1;
2418 ptrdiff_t out_charpos, out_bytepos;
2419 EMACS_INT dummy;
2420 int dummy2;
2421 unsigned short int quit_count = 0;
2423 CHECK_NUMBER (count);
2424 count1 = XINT (count);
2425 stop = count1 > 0 ? ZV : BEGV;
2427 from = PT;
2428 from_byte = PT_BYTE;
2430 SETUP_SYNTAX_TABLE (from, count1);
2431 while (count1 > 0)
2435 bool comstart_first;
2436 int syntax, other_syntax;
2438 if (from == stop)
2440 SET_PT_BOTH (from, from_byte);
2441 return Qnil;
2443 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2444 syntax = SYNTAX_WITH_FLAGS (c);
2445 code = SYNTAX (c);
2446 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2447 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2448 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2449 INC_BOTH (from, from_byte);
2450 UPDATE_SYNTAX_TABLE_FORWARD (from);
2451 if (from < stop && comstart_first
2452 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2453 other_syntax = SYNTAX_WITH_FLAGS (c1),
2454 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2456 /* We have encountered a comment start sequence and we
2457 are ignoring all text inside comments. We must record
2458 the comment style this sequence begins so that later,
2459 only a comment end of the same style actually ends
2460 the comment section. */
2461 code = Scomment;
2462 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2463 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2464 INC_BOTH (from, from_byte);
2465 UPDATE_SYNTAX_TABLE_FORWARD (from);
2467 rarely_quit (++quit_count);
2469 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2471 if (code == Scomment_fence)
2472 comstyle = ST_COMMENT_STYLE;
2473 else if (code != Scomment)
2475 DEC_BOTH (from, from_byte);
2476 SET_PT_BOTH (from, from_byte);
2477 return Qnil;
2479 /* We're at the start of a comment. */
2480 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2481 &out_charpos, &out_bytepos, &dummy, &dummy2);
2482 from = out_charpos; from_byte = out_bytepos;
2483 if (!found)
2485 SET_PT_BOTH (from, from_byte);
2486 return Qnil;
2488 INC_BOTH (from, from_byte);
2489 UPDATE_SYNTAX_TABLE_FORWARD (from);
2490 /* We have skipped one comment. */
2491 count1--;
2494 while (count1 < 0)
2496 while (true)
2498 if (from <= stop)
2500 SET_PT_BOTH (BEGV, BEGV_BYTE);
2501 return Qnil;
2504 DEC_BOTH (from, from_byte);
2505 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2506 bool quoted = char_quoted (from, from_byte);
2507 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2508 int syntax = SYNTAX_WITH_FLAGS (c);
2509 code = SYNTAX (c);
2510 comstyle = 0;
2511 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2512 if (code == Sendcomment)
2513 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2514 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2515 && prev_char_comend_first (from, from_byte)
2516 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2518 int other_syntax;
2519 /* We must record the comment style encountered so that
2520 later, we can match only the proper comment begin
2521 sequence of the same style. */
2522 DEC_BOTH (from, from_byte);
2523 code = Sendcomment;
2524 /* Calling char_quoted, above, set up global syntax position
2525 at the new value of FROM. */
2526 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2527 other_syntax = SYNTAX_WITH_FLAGS (c1);
2528 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2529 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2532 if (code == Scomment_fence)
2534 /* Skip until first preceding unquoted comment_fence. */
2535 bool fence_found = 0;
2536 ptrdiff_t ini = from, ini_byte = from_byte;
2538 while (1)
2540 DEC_BOTH (from, from_byte);
2541 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2542 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2543 if (SYNTAX (c) == Scomment_fence
2544 && !char_quoted (from, from_byte))
2546 fence_found = 1;
2547 break;
2549 else if (from == stop)
2550 break;
2551 rarely_quit (++quit_count);
2553 if (fence_found == 0)
2555 from = ini; /* Set point to ini + 1. */
2556 from_byte = ini_byte;
2557 goto leave;
2559 else
2560 /* We have skipped one comment. */
2561 break;
2563 else if (code == Sendcomment)
2565 found = back_comment (from, from_byte, stop, comnested, comstyle,
2566 &out_charpos, &out_bytepos);
2567 if (!found)
2569 if (c == '\n')
2570 /* This end-of-line is not an end-of-comment.
2571 Treat it like a whitespace.
2572 CC-mode (and maybe others) relies on this behavior. */
2574 else
2576 /* Failure: we should go back to the end of this
2577 not-quite-endcomment. */
2578 if (SYNTAX (c) != code)
2579 /* It was a two-char Sendcomment. */
2580 INC_BOTH (from, from_byte);
2581 goto leave;
2584 else
2586 /* We have skipped one comment. */
2587 from = out_charpos, from_byte = out_bytepos;
2588 break;
2591 else if (code != Swhitespace || quoted)
2593 leave:
2594 INC_BOTH (from, from_byte);
2595 SET_PT_BOTH (from, from_byte);
2596 return Qnil;
2599 rarely_quit (++quit_count);
2602 count1++;
2605 SET_PT_BOTH (from, from_byte);
2606 return Qt;
2609 /* Return syntax code of character C if C is an ASCII character
2610 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2612 static enum syntaxcode
2613 syntax_multibyte (int c, bool multibyte_symbol_p)
2615 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2618 static Lisp_Object
2619 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2621 Lisp_Object val;
2622 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2623 int c, c1;
2624 int stringterm;
2625 bool quoted;
2626 bool mathexit = 0;
2627 enum syntaxcode code;
2628 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2629 int comstyle = 0; /* Style of comment encountered. */
2630 bool comnested = 0; /* Whether the comment is nestable or not. */
2631 ptrdiff_t temp_pos;
2632 EMACS_INT last_good = from;
2633 bool found;
2634 ptrdiff_t from_byte;
2635 ptrdiff_t out_bytepos, out_charpos;
2636 EMACS_INT dummy;
2637 int dummy2;
2638 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2639 unsigned short int quit_count = 0;
2641 if (depth > 0) min_depth = 0;
2643 if (from > ZV) from = ZV;
2644 if (from < BEGV) from = BEGV;
2646 from_byte = CHAR_TO_BYTE (from);
2648 maybe_quit ();
2650 SETUP_SYNTAX_TABLE (from, count);
2651 while (count > 0)
2653 while (from < stop)
2655 rarely_quit (++quit_count);
2656 bool comstart_first, prefix;
2657 int syntax, other_syntax;
2658 UPDATE_SYNTAX_TABLE_FORWARD (from);
2659 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2660 syntax = SYNTAX_WITH_FLAGS (c);
2661 code = syntax_multibyte (c, multibyte_symbol_p);
2662 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2663 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2664 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2665 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2666 if (depth == min_depth)
2667 last_good = from;
2668 INC_BOTH (from, from_byte);
2669 UPDATE_SYNTAX_TABLE_FORWARD (from);
2670 if (from < stop && comstart_first
2671 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2672 other_syntax = SYNTAX_WITH_FLAGS (c),
2673 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2674 && parse_sexp_ignore_comments)
2676 /* We have encountered a comment start sequence and we
2677 are ignoring all text inside comments. We must record
2678 the comment style this sequence begins so that later,
2679 only a comment end of the same style actually ends
2680 the comment section. */
2681 code = Scomment;
2682 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2683 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2684 INC_BOTH (from, from_byte);
2685 UPDATE_SYNTAX_TABLE_FORWARD (from);
2688 if (prefix)
2689 continue;
2691 switch (code)
2693 case Sescape:
2694 case Scharquote:
2695 if (from == stop)
2696 goto lose;
2697 INC_BOTH (from, from_byte);
2698 /* Treat following character as a word constituent. */
2699 FALLTHROUGH;
2700 case Sword:
2701 case Ssymbol:
2702 if (depth || !sexpflag) break;
2703 /* This word counts as a sexp; return at end of it. */
2704 while (from < stop)
2706 UPDATE_SYNTAX_TABLE_FORWARD (from);
2708 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2709 switch (syntax_multibyte (c, multibyte_symbol_p))
2711 case Scharquote:
2712 case Sescape:
2713 INC_BOTH (from, from_byte);
2714 if (from == stop)
2715 goto lose;
2716 break;
2717 case Sword:
2718 case Ssymbol:
2719 case Squote:
2720 break;
2721 default:
2722 goto done;
2724 INC_BOTH (from, from_byte);
2725 rarely_quit (++quit_count);
2727 goto done;
2729 case Scomment_fence:
2730 comstyle = ST_COMMENT_STYLE;
2731 FALLTHROUGH;
2732 case Scomment:
2733 if (!parse_sexp_ignore_comments) break;
2734 UPDATE_SYNTAX_TABLE_FORWARD (from);
2735 found = forw_comment (from, from_byte, stop,
2736 comnested, comstyle, 0,
2737 &out_charpos, &out_bytepos, &dummy,
2738 &dummy2);
2739 from = out_charpos, from_byte = out_bytepos;
2740 if (!found)
2742 if (depth == 0)
2743 goto done;
2744 goto lose;
2746 INC_BOTH (from, from_byte);
2747 UPDATE_SYNTAX_TABLE_FORWARD (from);
2748 break;
2750 case Smath:
2751 if (!sexpflag)
2752 break;
2753 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2755 INC_BOTH (from, from_byte);
2757 if (mathexit)
2759 mathexit = 0;
2760 goto close1;
2762 mathexit = 1;
2763 FALLTHROUGH;
2764 case Sopen:
2765 if (!++depth) goto done;
2766 break;
2768 case Sclose:
2769 close1:
2770 if (!--depth) goto done;
2771 if (depth < min_depth)
2772 xsignal3 (Qscan_error,
2773 build_string ("Containing expression ends prematurely"),
2774 make_number (last_good), make_number (from));
2775 break;
2777 case Sstring:
2778 case Sstring_fence:
2779 temp_pos = dec_bytepos (from_byte);
2780 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2781 while (1)
2783 enum syntaxcode c_code;
2784 if (from >= stop)
2785 goto lose;
2786 UPDATE_SYNTAX_TABLE_FORWARD (from);
2787 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2788 c_code = syntax_multibyte (c, multibyte_symbol_p);
2789 if (code == Sstring
2790 ? c == stringterm && c_code == Sstring
2791 : c_code == Sstring_fence)
2792 break;
2794 if (c_code == Scharquote || c_code == Sescape)
2795 INC_BOTH (from, from_byte);
2796 INC_BOTH (from, from_byte);
2797 rarely_quit (++quit_count);
2799 INC_BOTH (from, from_byte);
2800 if (!depth && sexpflag) goto done;
2801 break;
2802 default:
2803 /* Ignore whitespace, punctuation, quote, endcomment. */
2804 break;
2808 /* Reached end of buffer. Error if within object, return nil if between */
2809 if (depth)
2810 goto lose;
2812 return Qnil;
2814 /* End of object reached */
2815 done:
2816 count--;
2819 while (count < 0)
2821 while (from > stop)
2823 rarely_quit (++quit_count);
2824 DEC_BOTH (from, from_byte);
2825 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2826 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2827 int syntax = SYNTAX_WITH_FLAGS (c);
2828 code = syntax_multibyte (c, multibyte_symbol_p);
2829 if (depth == min_depth)
2830 last_good = from;
2831 comstyle = 0;
2832 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2833 if (code == Sendcomment)
2834 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2835 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2836 && prev_char_comend_first (from, from_byte)
2837 && parse_sexp_ignore_comments)
2839 /* We must record the comment style encountered so that
2840 later, we can match only the proper comment begin
2841 sequence of the same style. */
2842 int c2, other_syntax;
2843 DEC_BOTH (from, from_byte);
2844 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2845 code = Sendcomment;
2846 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2847 other_syntax = SYNTAX_WITH_FLAGS (c2);
2848 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2849 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2852 /* Quoting turns anything except a comment-ender
2853 into a word character. Note that this cannot be true
2854 if we decremented FROM in the if-statement above. */
2855 if (code != Sendcomment && char_quoted (from, from_byte))
2857 DEC_BOTH (from, from_byte);
2858 code = Sword;
2860 else if (SYNTAX_FLAGS_PREFIX (syntax))
2861 continue;
2863 switch (code)
2865 case Sword:
2866 case Ssymbol:
2867 case Sescape:
2868 case Scharquote:
2869 if (depth || !sexpflag) break;
2870 /* This word counts as a sexp; count object finished
2871 after passing it. */
2872 while (from > stop)
2874 temp_pos = from_byte;
2875 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2876 DEC_POS (temp_pos);
2877 else
2878 temp_pos--;
2879 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2880 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2881 /* Don't allow comment-end to be quoted. */
2882 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2883 goto done2;
2884 quoted = char_quoted (from - 1, temp_pos);
2885 if (quoted)
2887 DEC_BOTH (from, from_byte);
2888 temp_pos = dec_bytepos (temp_pos);
2889 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2891 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2892 if (! quoted)
2893 switch (syntax_multibyte (c1, multibyte_symbol_p))
2895 case Sword: case Ssymbol: case Squote: break;
2896 default: goto done2;
2898 DEC_BOTH (from, from_byte);
2899 rarely_quit (++quit_count);
2901 goto done2;
2903 case Smath:
2904 if (!sexpflag)
2905 break;
2906 if (from > BEGV)
2908 temp_pos = dec_bytepos (from_byte);
2909 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2910 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2911 DEC_BOTH (from, from_byte);
2913 if (mathexit)
2915 mathexit = 0;
2916 goto open2;
2918 mathexit = 1;
2919 FALLTHROUGH;
2920 case Sclose:
2921 if (!++depth) goto done2;
2922 break;
2924 case Sopen:
2925 open2:
2926 if (!--depth) goto done2;
2927 if (depth < min_depth)
2928 xsignal3 (Qscan_error,
2929 build_string ("Containing expression ends prematurely"),
2930 make_number (last_good), make_number (from));
2931 break;
2933 case Sendcomment:
2934 if (!parse_sexp_ignore_comments)
2935 break;
2936 found = back_comment (from, from_byte, stop, comnested, comstyle,
2937 &out_charpos, &out_bytepos);
2938 /* FIXME: if !found, it really wasn't a comment-end.
2939 For single-char Sendcomment, we can't do much about it apart
2940 from skipping the char.
2941 For 2-char endcomments, we could try again, taking both
2942 chars as separate entities, but it's a lot of trouble
2943 for very little gain, so we don't bother either. -sm */
2944 if (found)
2945 from = out_charpos, from_byte = out_bytepos;
2946 break;
2948 case Scomment_fence:
2949 case Sstring_fence:
2950 while (1)
2952 if (from == stop)
2953 goto lose;
2954 DEC_BOTH (from, from_byte);
2955 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2956 if (!char_quoted (from, from_byte))
2958 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2959 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2960 break;
2962 rarely_quit (++quit_count);
2964 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2965 break;
2967 case Sstring:
2968 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2969 while (true)
2971 if (from == stop)
2972 goto lose;
2973 DEC_BOTH (from, from_byte);
2974 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2975 if (!char_quoted (from, from_byte))
2977 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2978 if (c == stringterm
2979 && (syntax_multibyte (c, multibyte_symbol_p)
2980 == Sstring))
2981 break;
2983 rarely_quit (++quit_count);
2985 if (!depth && sexpflag) goto done2;
2986 break;
2987 default:
2988 /* Ignore whitespace, punctuation, quote, endcomment. */
2989 break;
2993 /* Reached start of buffer. Error if within object, return nil if between */
2994 if (depth)
2995 goto lose;
2997 return Qnil;
2999 done2:
3000 count++;
3004 XSETFASTINT (val, from);
3005 return val;
3007 lose:
3008 xsignal3 (Qscan_error,
3009 build_string ("Unbalanced parentheses"),
3010 make_number (last_good), make_number (from));
3013 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3014 doc: /* Scan from character number FROM by COUNT lists.
3015 Scan forward if COUNT is positive, backward if COUNT is negative.
3016 Return the character number of the position thus found.
3018 A \"list", in this context, refers to a balanced parenthetical
3019 grouping, as determined by the syntax table.
3021 If DEPTH is nonzero, treat that as the nesting depth of the starting
3022 point (i.e. the starting point is DEPTH parentheses deep). This
3023 function scans over parentheses until the depth goes to zero COUNT
3024 times. Hence, positive DEPTH moves out that number of levels of
3025 parentheses, while negative DEPTH moves to a deeper level.
3027 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3029 If we reach the beginning or end of the accessible part of the buffer
3030 before we have scanned over COUNT lists, return nil if the depth at
3031 that point is zero, and signal a error if the depth is nonzero. */)
3032 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3034 CHECK_NUMBER (from);
3035 CHECK_NUMBER (count);
3036 CHECK_NUMBER (depth);
3038 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
3041 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3042 doc: /* Scan from character number FROM by COUNT balanced expressions.
3043 If COUNT is negative, scan backwards.
3044 Returns the character number of the position thus found.
3046 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3048 If the beginning or end of (the accessible part of) the buffer is reached
3049 in the middle of a parenthetical grouping, an error is signaled.
3050 If the beginning or end is reached between groupings
3051 but before count is used up, nil is returned. */)
3052 (Lisp_Object from, Lisp_Object count)
3054 CHECK_NUMBER (from);
3055 CHECK_NUMBER (count);
3057 return scan_lists (XINT (from), XINT (count), 0, 1);
3060 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3061 0, 0, 0,
3062 doc: /* Move point backward over any number of chars with prefix syntax.
3063 This includes chars with expression prefix syntax class (\\=') and those with
3064 the prefix syntax flag (p). */)
3065 (void)
3067 ptrdiff_t beg = BEGV;
3068 ptrdiff_t opoint = PT;
3069 ptrdiff_t opoint_byte = PT_BYTE;
3070 ptrdiff_t pos = PT;
3071 ptrdiff_t pos_byte = PT_BYTE;
3072 int c;
3074 if (pos <= beg)
3076 SET_PT_BOTH (opoint, opoint_byte);
3078 return Qnil;
3081 SETUP_SYNTAX_TABLE (pos, -1);
3083 DEC_BOTH (pos, pos_byte);
3085 while (!char_quoted (pos, pos_byte)
3086 /* Previous statement updates syntax table. */
3087 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3088 || syntax_prefix_flag_p (c)))
3090 opoint = pos;
3091 opoint_byte = pos_byte;
3093 if (pos <= beg)
3094 break;
3095 DEC_BOTH (pos, pos_byte);
3096 rarely_quit (pos);
3099 SET_PT_BOTH (opoint, opoint_byte);
3101 return Qnil;
3105 /* If the character at FROM_BYTE is the second part of a 2-character
3106 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3107 true. */
3108 static bool
3109 in_2char_comment_start (struct lisp_parse_state *state,
3110 int prev_from_syntax,
3111 ptrdiff_t prev_from,
3112 ptrdiff_t from_byte)
3114 int c1, syntax;
3115 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3116 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
3117 syntax = SYNTAX_WITH_FLAGS (c1),
3118 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3120 /* Record the comment style we have entered so that only
3121 the comment-end sequence of the same style actually
3122 terminates the comment section. */
3123 state->comstyle
3124 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3125 bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3126 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3127 state->incomment = comnested ? 1 : -1;
3128 state->comstr_start = prev_from;
3129 return true;
3131 return false;
3134 /* Parse forward from FROM / FROM_BYTE to END,
3135 assuming that FROM has state STATE,
3136 and return a description of the state of the parse at END.
3137 If STOPBEFORE, stop at the start of an atom.
3138 If COMMENTSTOP is 1, stop at the start of a comment.
3139 If COMMENTSTOP is -1, stop at the start or end of a comment,
3140 after the beginning of a string, or after the end of a string. */
3142 static void
3143 scan_sexps_forward (struct lisp_parse_state *state,
3144 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3145 EMACS_INT targetdepth, bool stopbefore,
3146 int commentstop)
3148 enum syntaxcode code;
3149 struct level { ptrdiff_t last, prev; };
3150 struct level levelstart[100];
3151 struct level *curlevel = levelstart;
3152 struct level *endlevel = levelstart + 100;
3153 EMACS_INT depth; /* Paren depth of current scanning location.
3154 level - levelstart equals this except
3155 when the depth becomes negative. */
3156 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3157 bool start_quoted = 0; /* True means starting after a char quote. */
3158 Lisp_Object tem;
3159 ptrdiff_t prev_from; /* Keep one character before FROM. */
3160 ptrdiff_t prev_from_byte;
3161 int prev_from_syntax, prev_prev_from_syntax;
3162 bool boundary_stop = commentstop == -1;
3163 bool nofence;
3164 bool found;
3165 ptrdiff_t out_bytepos, out_charpos;
3166 int temp;
3167 unsigned short int quit_count = 0;
3169 prev_from = from;
3170 prev_from_byte = from_byte;
3171 if (from != BEGV)
3172 DEC_BOTH (prev_from, prev_from_byte);
3174 /* Use this macro instead of `from++'. */
3175 #define INC_FROM \
3176 do { prev_from = from; \
3177 prev_from_byte = from_byte; \
3178 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3179 prev_prev_from_syntax = prev_from_syntax; \
3180 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3181 INC_BOTH (from, from_byte); \
3182 if (from < end) \
3183 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3184 } while (0)
3186 maybe_quit ();
3188 depth = state->depth;
3189 start_quoted = state->quoted;
3190 prev_prev_from_syntax = Smax;
3191 prev_from_syntax = state->prev_syntax;
3193 tem = state->levelstarts;
3194 while (!NILP (tem)) /* >= second enclosing sexps. */
3196 Lisp_Object temhd = Fcar (tem);
3197 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3198 curlevel->last = XINT (temhd);
3199 if (++curlevel == endlevel)
3200 curlevel--; /* error ("Nesting too deep for parser"); */
3201 curlevel->prev = -1;
3202 curlevel->last = -1;
3203 tem = Fcdr (tem);
3205 curlevel->prev = -1;
3206 curlevel->last = -1;
3208 state->quoted = 0;
3209 mindepth = depth;
3211 SETUP_SYNTAX_TABLE (from, 1);
3213 /* Enter the loop at a place appropriate for initial state. */
3215 if (state->incomment)
3216 goto startincomment;
3217 if (state->instring >= 0)
3219 nofence = state->instring != ST_STRING_STYLE;
3220 if (start_quoted)
3221 goto startquotedinstring;
3222 goto startinstring;
3224 else if (start_quoted)
3225 goto startquoted;
3226 else if ((from < end)
3227 && (in_2char_comment_start (state, prev_from_syntax,
3228 prev_from, from_byte)))
3230 INC_FROM;
3231 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3232 goto atcomment;
3235 while (from < end)
3237 rarely_quit (++quit_count);
3238 INC_FROM;
3240 if ((from < end)
3241 && (in_2char_comment_start (state, prev_from_syntax,
3242 prev_from, from_byte)))
3244 INC_FROM;
3245 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3246 goto atcomment;
3249 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3250 continue;
3251 code = prev_from_syntax & 0xff;
3252 switch (code)
3254 case Sescape:
3255 case Scharquote:
3256 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3257 curlevel->last = prev_from;
3258 startquoted:
3259 if (from == end) goto endquoted;
3260 INC_FROM;
3261 goto symstarted;
3262 /* treat following character as a word constituent */
3263 case Sword:
3264 case Ssymbol:
3265 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3266 curlevel->last = prev_from;
3267 symstarted:
3268 while (from < end)
3270 if (in_2char_comment_start (state, prev_from_syntax,
3271 prev_from, from_byte))
3273 INC_FROM;
3274 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3275 goto atcomment;
3278 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3279 switch (SYNTAX (symchar))
3281 case Scharquote:
3282 case Sescape:
3283 INC_FROM;
3284 if (from == end) goto endquoted;
3285 break;
3286 case Sword:
3287 case Ssymbol:
3288 case Squote:
3289 break;
3290 default:
3291 goto symdone;
3293 INC_FROM;
3294 rarely_quit (++quit_count);
3296 symdone:
3297 curlevel->prev = curlevel->last;
3298 break;
3300 case Scomment_fence:
3301 /* Record the comment style we have entered so that only
3302 the comment-end sequence of the same style actually
3303 terminates the comment section. */
3304 state->comstyle = ST_COMMENT_STYLE;
3305 state->incomment = -1;
3306 state->comstr_start = prev_from;
3307 goto atcomment;
3308 case Scomment:
3309 state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3310 state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3311 1 : -1);
3312 state->comstr_start = prev_from;
3313 atcomment:
3314 if (commentstop || boundary_stop) goto done;
3315 startincomment:
3316 /* The (from == BEGV) test was to enter the loop in the middle so
3317 that we find a 2-char comment ender even if we start in the
3318 middle of it. We don't want to do that if we're just at the
3319 beginning of the comment (think of (*) ... (*)). */
3320 found = forw_comment (from, from_byte, end,
3321 state->incomment, state->comstyle,
3322 from == BEGV ? 0 : prev_from_syntax,
3323 &out_charpos, &out_bytepos, &state->incomment,
3324 &prev_from_syntax);
3325 from = out_charpos; from_byte = out_bytepos;
3326 /* Beware! prev_from and friends (except prev_from_syntax)
3327 are invalid now. Luckily, the `done' doesn't use them
3328 and the INC_FROM sets them to a sane value without
3329 looking at them. */
3330 if (!found) goto done;
3331 INC_FROM;
3332 state->incomment = 0;
3333 state->comstyle = 0; /* reset the comment style */
3334 prev_from_syntax = Smax; /* For the comment closer */
3335 if (boundary_stop) goto done;
3336 break;
3338 case Sopen:
3339 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3340 depth++;
3341 /* curlevel++->last ran into compiler bug on Apollo */
3342 curlevel->last = prev_from;
3343 if (++curlevel == endlevel)
3344 curlevel--; /* error ("Nesting too deep for parser"); */
3345 curlevel->prev = -1;
3346 curlevel->last = -1;
3347 if (targetdepth == depth) goto done;
3348 break;
3350 case Sclose:
3351 depth--;
3352 if (depth < mindepth)
3353 mindepth = depth;
3354 if (curlevel != levelstart)
3355 curlevel--;
3356 curlevel->prev = curlevel->last;
3357 if (targetdepth == depth) goto done;
3358 break;
3360 case Sstring:
3361 case Sstring_fence:
3362 state->comstr_start = from - 1;
3363 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3364 curlevel->last = prev_from;
3365 state->instring = (code == Sstring
3366 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3367 : ST_STRING_STYLE);
3368 if (boundary_stop) goto done;
3369 startinstring:
3371 nofence = state->instring != ST_STRING_STYLE;
3373 while (1)
3375 int c;
3376 enum syntaxcode c_code;
3378 if (from >= end) goto done;
3379 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3380 c_code = SYNTAX (c);
3382 /* Check C_CODE here so that if the char has
3383 a syntax-table property which says it is NOT
3384 a string character, it does not end the string. */
3385 if (nofence && c == state->instring && c_code == Sstring)
3386 break;
3388 switch (c_code)
3390 case Sstring_fence:
3391 if (!nofence) goto string_end;
3392 break;
3394 case Scharquote:
3395 case Sescape:
3396 INC_FROM;
3397 startquotedinstring:
3398 if (from >= end) goto endquoted;
3399 break;
3401 default:
3402 break;
3404 INC_FROM;
3405 rarely_quit (++quit_count);
3408 string_end:
3409 state->instring = -1;
3410 curlevel->prev = curlevel->last;
3411 INC_FROM;
3412 if (boundary_stop) goto done;
3413 break;
3415 case Smath:
3416 /* FIXME: We should do something with it. */
3417 break;
3418 default:
3419 /* Ignore whitespace, punctuation, quote, endcomment. */
3420 break;
3423 goto done;
3425 stop: /* Here if stopping before start of sexp. */
3426 from = prev_from; /* We have just fetched the char that starts it; */
3427 from_byte = prev_from_byte;
3428 prev_from_syntax = prev_prev_from_syntax;
3429 goto done; /* but return the position before it. */
3431 endquoted:
3432 state->quoted = 1;
3433 done:
3434 state->depth = depth;
3435 state->mindepth = mindepth;
3436 state->thislevelstart = curlevel->prev;
3437 state->prevlevelstart
3438 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3439 state->location = from;
3440 state->location_byte = from_byte;
3441 state->levelstarts = Qnil;
3442 while (curlevel > levelstart)
3443 state->levelstarts = Fcons (make_number ((--curlevel)->last),
3444 state->levelstarts);
3445 state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3446 || state->quoted) ? prev_from_syntax : Smax;
3449 /* Convert a (lisp) parse state to the internal form used in
3450 scan_sexps_forward. */
3451 static void
3452 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3454 Lisp_Object tem;
3456 if (NILP (external))
3458 state->depth = 0;
3459 state->instring = -1;
3460 state->incomment = 0;
3461 state->quoted = 0;
3462 state->comstyle = 0; /* comment style a by default. */
3463 state->comstr_start = -1; /* no comment/string seen. */
3464 state->levelstarts = Qnil;
3465 state->prev_syntax = Smax;
3467 else
3469 tem = Fcar (external);
3470 if (!NILP (tem))
3471 state->depth = XINT (tem);
3472 else
3473 state->depth = 0;
3475 external = Fcdr (external);
3476 external = Fcdr (external);
3477 external = Fcdr (external);
3478 tem = Fcar (external);
3479 /* Check whether we are inside string_fence-style string: */
3480 state->instring = (!NILP (tem)
3481 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3482 : -1);
3484 external = Fcdr (external);
3485 tem = Fcar (external);
3486 state->incomment = (!NILP (tem)
3487 ? (INTEGERP (tem) ? XINT (tem) : -1)
3488 : 0);
3490 external = Fcdr (external);
3491 tem = Fcar (external);
3492 state->quoted = !NILP (tem);
3494 /* if the eighth element of the list is nil, we are in comment
3495 style a. If it is non-nil, we are in comment style b */
3496 external = Fcdr (external);
3497 external = Fcdr (external);
3498 tem = Fcar (external);
3499 state->comstyle = (NILP (tem)
3501 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3502 ? XINT (tem)
3503 : ST_COMMENT_STYLE));
3505 external = Fcdr (external);
3506 tem = Fcar (external);
3507 state->comstr_start =
3508 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3509 external = Fcdr (external);
3510 tem = Fcar (external);
3511 state->levelstarts = tem;
3513 external = Fcdr (external);
3514 tem = Fcar (external);
3515 state->prev_syntax = NILP (tem) ? Smax : XINT (tem);
3519 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3520 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3521 Parsing stops at TO or when certain criteria are met;
3522 point is set to where parsing stops.
3523 If fifth arg OLDSTATE is omitted or nil,
3524 parsing assumes that FROM is the beginning of a function.
3526 Value is a list of elements describing final state of parsing:
3527 0. depth in parens.
3528 1. character address of start of innermost containing list; nil if none.
3529 2. character address of start of last complete sexp terminated.
3530 3. non-nil if inside a string.
3531 (it is the character that will terminate the string,
3532 or t if the string should be terminated by a generic string delimiter.)
3533 4. nil if outside a comment, t if inside a non-nestable comment,
3534 else an integer (the current comment nesting).
3535 5. t if following a quote character.
3536 6. the minimum paren-depth encountered during this scan.
3537 7. style of comment, if any.
3538 8. character address of start of comment or string; nil if not in one.
3539 9. List of positions of currently open parens, outermost first.
3540 10. When the last position scanned holds the first character of a
3541 (potential) two character construct, the syntax of that position,
3542 otherwise nil. That construct can be a two character comment
3543 delimiter or an Escaped or Char-quoted character.
3544 11..... Possible further internal information used by `parse-partial-sexp'.
3546 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3547 in parentheses becomes equal to TARGETDEPTH.
3548 Fourth arg STOPBEFORE non-nil means stop when we come to
3549 any character that starts a sexp.
3550 Fifth arg OLDSTATE is a list like what this function returns.
3551 It is used to initialize the state of the parse. Elements number 1, 2, 6
3552 are ignored.
3553 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3554 If it is the symbol `syntax-table', stop after the start of a comment or a
3555 string, or after end of a comment or a string. */)
3556 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3557 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3559 struct lisp_parse_state state;
3560 EMACS_INT target;
3562 if (!NILP (targetdepth))
3564 CHECK_NUMBER (targetdepth);
3565 target = XINT (targetdepth);
3567 else
3568 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth. */
3570 validate_region (&from, &to);
3571 internalize_parse_state (oldstate, &state);
3572 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3573 XINT (to),
3574 target, !NILP (stopbefore),
3575 (NILP (commentstop)
3576 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3578 SET_PT_BOTH (state.location, state.location_byte);
3580 return
3581 Fcons (make_number (state.depth),
3582 Fcons (state.prevlevelstart < 0
3583 ? Qnil : make_number (state.prevlevelstart),
3584 Fcons (state.thislevelstart < 0
3585 ? Qnil : make_number (state.thislevelstart),
3586 Fcons (state.instring >= 0
3587 ? (state.instring == ST_STRING_STYLE
3588 ? Qt : make_number (state.instring)) : Qnil,
3589 Fcons (state.incomment < 0 ? Qt :
3590 (state.incomment == 0 ? Qnil :
3591 make_number (state.incomment)),
3592 Fcons (state.quoted ? Qt : Qnil,
3593 Fcons (make_number (state.mindepth),
3594 Fcons ((state.comstyle
3595 ? (state.comstyle == ST_COMMENT_STYLE
3596 ? Qsyntax_table
3597 : make_number (state.comstyle))
3598 : Qnil),
3599 Fcons (((state.incomment
3600 || (state.instring >= 0))
3601 ? make_number (state.comstr_start)
3602 : Qnil),
3603 Fcons (state.levelstarts,
3604 Fcons (state.prev_syntax == Smax
3605 ? Qnil
3606 : make_number (state.prev_syntax),
3607 Qnil)))))))))));
3610 void
3611 init_syntax_once (void)
3613 register int i, c;
3614 Lisp_Object temp;
3616 /* This has to be done here, before we call Fmake_char_table. */
3617 DEFSYM (Qsyntax_table, "syntax-table");
3619 /* Create objects which can be shared among syntax tables. */
3620 Vsyntax_code_object = make_uninit_vector (Smax);
3621 for (i = 0; i < Smax; i++)
3622 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3624 /* Now we are ready to set up this property, so we can
3625 create syntax tables. */
3626 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3628 temp = AREF (Vsyntax_code_object, Swhitespace);
3630 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3632 /* Control characters should not be whitespace. */
3633 temp = AREF (Vsyntax_code_object, Spunct);
3634 for (i = 0; i <= ' ' - 1; i++)
3635 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3636 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3638 /* Except that a few really are whitespace. */
3639 temp = AREF (Vsyntax_code_object, Swhitespace);
3640 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3641 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3642 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3643 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3644 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3646 temp = AREF (Vsyntax_code_object, Sword);
3647 for (i = 'a'; i <= 'z'; i++)
3648 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3649 for (i = 'A'; i <= 'Z'; i++)
3650 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3651 for (i = '0'; i <= '9'; i++)
3652 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3654 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3655 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3657 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3658 Fcons (make_number (Sopen), make_number (')')));
3659 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3660 Fcons (make_number (Sclose), make_number ('(')));
3661 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3662 Fcons (make_number (Sopen), make_number (']')));
3663 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3664 Fcons (make_number (Sclose), make_number ('[')));
3665 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3666 Fcons (make_number (Sopen), make_number ('}')));
3667 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3668 Fcons (make_number (Sclose), make_number ('{')));
3669 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3670 Fcons (make_number (Sstring), Qnil));
3671 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3672 Fcons (make_number (Sescape), Qnil));
3674 temp = AREF (Vsyntax_code_object, Ssymbol);
3675 for (i = 0; i < 10; i++)
3677 c = "_-+*/&|<>="[i];
3678 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3681 temp = AREF (Vsyntax_code_object, Spunct);
3682 for (i = 0; i < 12; i++)
3684 c = ".,;:?!#@~^'`"[i];
3685 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3688 /* All multibyte characters have syntax `word' by default. */
3689 temp = AREF (Vsyntax_code_object, Sword);
3690 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3693 void
3694 syms_of_syntax (void)
3696 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3698 staticpro (&Vsyntax_code_object);
3700 staticpro (&gl_state.object);
3701 staticpro (&gl_state.global_code);
3702 staticpro (&gl_state.current_syntax_table);
3703 staticpro (&gl_state.old_prop);
3705 /* Defined in regex.c. */
3706 staticpro (&re_match_object);
3708 DEFSYM (Qscan_error, "scan-error");
3709 Fput (Qscan_error, Qerror_conditions,
3710 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3711 Fput (Qscan_error, Qerror_message,
3712 build_pure_c_string ("Scan error"));
3714 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3715 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3717 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3718 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3719 Otherwise, that text property is simply ignored.
3720 See the info node `(elisp)Syntax Properties' for a description of the
3721 `syntax-table' property. */);
3723 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3724 doc: /* Position up to which syntax-table properties have been set. */);
3725 syntax_propertize__done = -1;
3726 DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3727 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3729 words_include_escapes = 0;
3730 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3731 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3733 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3734 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3735 multibyte_syntax_as_symbol = 0;
3737 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3738 open_paren_in_column_0_is_defun_start,
3739 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3740 open_paren_in_column_0_is_defun_start = 1;
3743 DEFVAR_LISP ("find-word-boundary-function-table",
3744 Vfind_word_boundary_function_table,
3745 doc: /*
3746 Char table of functions to search for the word boundary.
3747 Each function is called with two arguments; POS and LIMIT.
3748 POS and LIMIT are character positions in the current buffer.
3750 If POS is less than LIMIT, POS is at the first character of a word,
3751 and the return value of a function should be a position after the
3752 last character of that word.
3754 If POS is not less than LIMIT, POS is at the last character of a word,
3755 and the return value of a function should be a position at the first
3756 character of that word.
3758 In both cases, LIMIT bounds the search. */);
3759 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3761 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped,
3762 doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
3763 Vcomment_end_can_be_escaped = 0;
3764 DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
3765 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
3767 defsubr (&Ssyntax_table_p);
3768 defsubr (&Ssyntax_table);
3769 defsubr (&Sstandard_syntax_table);
3770 defsubr (&Scopy_syntax_table);
3771 defsubr (&Sset_syntax_table);
3772 defsubr (&Schar_syntax);
3773 defsubr (&Smatching_paren);
3774 defsubr (&Sstring_to_syntax);
3775 defsubr (&Smodify_syntax_entry);
3776 defsubr (&Sinternal_describe_syntax_value);
3778 defsubr (&Sforward_word);
3780 defsubr (&Sskip_chars_forward);
3781 defsubr (&Sskip_chars_backward);
3782 defsubr (&Sskip_syntax_forward);
3783 defsubr (&Sskip_syntax_backward);
3785 defsubr (&Sforward_comment);
3786 defsubr (&Sscan_lists);
3787 defsubr (&Sscan_sexps);
3788 defsubr (&Sbackward_prefix_chars);
3789 defsubr (&Sparse_partial_sexp);