* etc/NEWS.25: Copy from emacs-25 etc/NEWS.
[emacs.git] / src / syntax.c
blobdcaca22f0e2c04f22098411f003ae224bbd15961
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2017 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "buffer.h"
26 #include "regex.h"
27 #include "syntax.h"
28 #include "intervals.h"
29 #include "category.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
56 the chars. */
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
61 static bool
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
64 return (flags >> 16) & 1;
66 static bool
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
69 return (flags >> 17) & 1;
71 static bool
72 SYNTAX_FLAGS_COMEND_FIRST (int flags)
74 return (flags >> 18) & 1;
76 static bool
77 SYNTAX_FLAGS_COMEND_SECOND (int flags)
79 return (flags >> 19) & 1;
81 static bool
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
84 return (flags & 0x50000) != 0;
86 static bool
87 SYNTAX_FLAGS_PREFIX (int flags)
89 return (flags >> 20) & 1;
91 static bool
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
94 return (flags >> 21) & 1;
96 static bool
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
99 return (flags >> 23) & 1;
101 static int
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
104 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
106 static bool
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
109 return (flags >> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
114 static int
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
122 /* Extract a particular flag for a given character. */
124 static bool
125 SYNTAX_COMEND_FIRST (int c)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
134 enum
136 ST_COMMENT_STYLE = 256 + 1,
137 ST_STRING_STYLE = 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth; /* Depth at end of parsing. */
145 int instring; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
147 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart;
154 ptrdiff_t location; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte; /* Corresponding byte position. */
156 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos;
174 static ptrdiff_t find_start_value;
175 static ptrdiff_t find_start_value_byte;
176 static struct buffer *find_start_buffer;
177 static ptrdiff_t find_start_begv;
178 static EMACS_INT find_start_modiff;
181 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
182 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
183 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
184 static void scan_sexps_forward (struct lisp_parse_state *,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
186 bool, int);
187 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
188 static bool in_classes (int, Lisp_Object);
189 static void parse_sexp_propertize (ptrdiff_t charpos);
191 /* This setter is used only in this file, so it can be private. */
192 static void
193 bset_syntax_table (struct buffer *b, Lisp_Object val)
195 b->syntax_table_ = val;
198 /* Whether the syntax of the character C has the prefix flag set. */
199 bool
200 syntax_prefix_flag_p (int c)
202 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
205 struct gl_state_s gl_state; /* Global state of syntax parser. */
207 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
208 to scan to property-change. */
210 /* Set the syntax entry VAL for char C in table TABLE. */
212 static void
213 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
215 CHAR_TABLE_SET (table, c, val);
218 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
219 RANGE is a cons (FROM . TO) specifying the range of characters. */
221 static void
222 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
223 Lisp_Object val)
225 Fset_char_table_range (table, range, val);
228 /* Extract the information from the entry for character C
229 in the current syntax table. */
231 static Lisp_Object
232 SYNTAX_MATCH (int c)
234 Lisp_Object ent = SYNTAX_ENTRY (c);
235 return CONSP (ent) ? XCDR (ent) : Qnil;
238 /* This should be called with FROM at the start of forward
239 search, or after the last position of the backward search. It
240 makes sure that the first char is picked up with correct table, so
241 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
242 call.
243 Sign of COUNT gives the direction of the search.
246 static void
247 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
249 SETUP_BUFFER_SYNTAX_TABLE ();
250 gl_state.b_property = BEGV;
251 gl_state.e_property = ZV + 1;
252 gl_state.object = Qnil;
253 gl_state.offset = 0;
254 if (parse_sexp_lookup_properties)
256 if (count > 0)
257 update_syntax_table_forward (from, true, Qnil);
258 else if (from > BEGV)
260 update_syntax_table (from - 1, count, true, Qnil);
261 parse_sexp_propertize (from - 1);
266 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
267 If it is t (which is only used in fast_c_string_match_ignore_case),
268 ignore properties altogether.
270 This is meant for regex.c to use. For buffers, regex.c passes arguments
271 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
272 So if it is a buffer, we set the offset field to BEGV. */
274 void
275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
276 ptrdiff_t from, ptrdiff_t count)
278 SETUP_BUFFER_SYNTAX_TABLE ();
279 gl_state.object = object;
280 if (BUFFERP (gl_state.object))
282 struct buffer *buf = XBUFFER (gl_state.object);
283 gl_state.b_property = 1;
284 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
285 gl_state.offset = BUF_BEGV (buf) - 1;
287 else if (NILP (gl_state.object))
289 gl_state.b_property = 1;
290 gl_state.e_property = ZV - BEGV + 1;
291 gl_state.offset = BEGV - 1;
293 else if (EQ (gl_state.object, Qt))
295 gl_state.b_property = 0;
296 gl_state.e_property = PTRDIFF_MAX;
297 gl_state.offset = 0;
299 else
301 gl_state.b_property = 0;
302 gl_state.e_property = 1 + SCHARS (gl_state.object);
303 gl_state.offset = 0;
305 if (parse_sexp_lookup_properties)
306 update_syntax_table (from + gl_state.offset - (count <= 0),
307 count, 1, gl_state.object);
310 /* Update gl_state to an appropriate interval which contains CHARPOS. The
311 sign of COUNT give the relative position of CHARPOS wrt the previously
312 valid interval. If INIT, only [be]_property fields of gl_state are
313 valid at start, the rest is filled basing on OBJECT.
315 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316 direction than the intervals - or in an interval. We update the
317 current syntax-table basing on the property of this interval, and
318 update the interval to start further than CHARPOS - or be
319 NULL. We also update lim_property to be the next value of
320 charpos to call this subroutine again - or be before/after the
321 start/end of OBJECT. */
323 void
324 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
325 Lisp_Object object)
327 Lisp_Object tmp_table;
328 int cnt = 0;
329 bool invalidate = true;
330 INTERVAL i;
332 if (init)
334 gl_state.old_prop = Qnil;
335 gl_state.start = gl_state.b_property;
336 gl_state.stop = gl_state.e_property;
337 i = interval_of (charpos, object);
338 gl_state.backward_i = gl_state.forward_i = i;
339 invalidate = false;
340 if (!i)
341 return;
342 /* interval_of updates only ->position of the return value, so
343 update the parents manually to speed up update_interval. */
344 while (!NULL_PARENT (i))
346 if (AM_RIGHT_CHILD (i))
347 INTERVAL_PARENT (i)->position = i->position
348 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
349 - TOTAL_LENGTH (INTERVAL_PARENT (i))
350 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
351 else
352 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
353 + TOTAL_LENGTH (i);
354 i = INTERVAL_PARENT (i);
356 i = gl_state.forward_i;
357 gl_state.b_property = i->position - gl_state.offset;
358 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
359 goto update;
361 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
363 /* We are guaranteed to be called with CHARPOS either in i,
364 or further off. */
365 if (!i)
366 error ("Error in syntax_table logic for to-the-end intervals");
367 else if (charpos < i->position) /* Move left. */
369 if (count > 0)
370 error ("Error in syntax_table logic for intervals <-");
371 /* Update the interval. */
372 i = update_interval (i, charpos);
373 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
375 invalidate = false;
376 gl_state.forward_i = i;
377 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
380 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
382 if (count < 0)
383 error ("Error in syntax_table logic for intervals ->");
384 /* Update the interval. */
385 i = update_interval (i, charpos);
386 if (i->position != gl_state.e_property)
388 invalidate = false;
389 gl_state.backward_i = i;
390 gl_state.b_property = i->position - gl_state.offset;
394 update:
395 tmp_table = textget (i->plist, Qsyntax_table);
397 if (invalidate)
398 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
400 if (invalidate) /* Did not get to adjacent interval. */
401 { /* with the same table => */
402 /* invalidate the old range. */
403 if (count > 0)
405 gl_state.backward_i = i;
406 gl_state.b_property = i->position - gl_state.offset;
408 else
410 gl_state.forward_i = i;
411 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
415 if (!EQ (tmp_table, gl_state.old_prop))
417 gl_state.current_syntax_table = tmp_table;
418 gl_state.old_prop = tmp_table;
419 if (EQ (Fsyntax_table_p (tmp_table), Qt))
421 gl_state.use_global = 0;
423 else if (CONSP (tmp_table))
425 gl_state.use_global = 1;
426 gl_state.global_code = tmp_table;
428 else
430 gl_state.use_global = 0;
431 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
435 while (i)
437 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
439 if (count > 0)
441 gl_state.e_property = i->position - gl_state.offset;
442 gl_state.forward_i = i;
444 else
446 gl_state.b_property
447 = i->position + LENGTH (i) - gl_state.offset;
448 gl_state.backward_i = i;
450 return;
452 else if (cnt == INTERVALS_AT_ONCE)
454 if (count > 0)
456 gl_state.e_property
457 = i->position + LENGTH (i) - gl_state.offset
458 /* e_property at EOB is not set to ZV but to ZV+1, so that
459 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
460 having to check eob between the two. */
461 + (next_interval (i) ? 0 : 1);
462 gl_state.forward_i = i;
464 else
466 gl_state.b_property = i->position - gl_state.offset;
467 gl_state.backward_i = i;
469 return;
471 cnt++;
472 i = count > 0 ? next_interval (i) : previous_interval (i);
474 eassert (i == NULL); /* This property goes to the end. */
475 if (count > 0)
477 gl_state.e_property = gl_state.stop;
478 gl_state.forward_i = i;
480 else
481 gl_state.b_property = gl_state.start;
484 static void
485 parse_sexp_propertize (ptrdiff_t charpos)
487 EMACS_INT zv = ZV;
488 if (syntax_propertize__done <= charpos
489 && syntax_propertize__done < zv)
491 EMACS_INT modiffs = CHARS_MODIFF;
492 safe_call1 (Qinternal__syntax_propertize,
493 make_number (min (zv, 1 + charpos)));
494 if (modiffs != CHARS_MODIFF)
495 error ("parse-sexp-propertize-function modified the buffer!");
496 if (syntax_propertize__done <= charpos
497 && syntax_propertize__done < zv)
498 error ("parse-sexp-propertize-function did not move"
499 " syntax-propertize--done");
500 SETUP_SYNTAX_TABLE (charpos, 1);
502 else if (gl_state.e_property > syntax_propertize__done)
504 gl_state.e_property = syntax_propertize__done;
505 gl_state.e_property_truncated = true;
507 else if (gl_state.e_property_truncated
508 && gl_state.e_property < syntax_propertize__done)
509 { /* When moving backward, e_property might be set without resetting
510 e_property_truncated, so the e_property_truncated flag may
511 occasionally be left raised spuriously. This should be rare. */
512 gl_state.e_property_truncated = false;
513 update_syntax_table_forward (charpos, false, Qnil);
517 void
518 update_syntax_table_forward (ptrdiff_t charpos, bool init,
519 Lisp_Object object)
521 if (gl_state.e_property_truncated)
523 eassert (NILP (object));
524 eassert (charpos >= gl_state.e_property);
525 parse_sexp_propertize (charpos);
527 else
529 update_syntax_table (charpos, 1, init, object);
530 if (NILP (object) && gl_state.e_property > syntax_propertize__done)
531 parse_sexp_propertize (charpos);
535 /* Returns true if char at CHARPOS is quoted.
536 Global syntax-table data should be set up already to be good at CHARPOS
537 or after. On return global syntax data is good for lookup at CHARPOS. */
539 static bool
540 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
542 enum syntaxcode code;
543 ptrdiff_t beg = BEGV;
544 bool quoted = 0;
545 ptrdiff_t orig = charpos;
547 while (charpos > beg)
549 int c;
550 DEC_BOTH (charpos, bytepos);
552 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
553 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
554 code = SYNTAX (c);
555 if (! (code == Scharquote || code == Sescape))
556 break;
558 quoted = !quoted;
561 UPDATE_SYNTAX_TABLE (orig);
562 return quoted;
565 /* Return the bytepos one character before BYTEPOS.
566 We assume that BYTEPOS is not at the start of the buffer. */
568 static ptrdiff_t
569 dec_bytepos (ptrdiff_t bytepos)
571 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
572 return bytepos - 1;
574 DEC_POS (bytepos);
575 return bytepos;
578 /* Return a defun-start position before POS and not too far before.
579 It should be the last one before POS, or nearly the last.
581 When open_paren_in_column_0_is_defun_start is nonzero,
582 only the beginning of the buffer is treated as a defun-start.
584 We record the information about where the scan started
585 and what its result was, so that another call in the same area
586 can return the same value very quickly.
588 There is no promise at which position the global syntax data is
589 valid on return from the subroutine, so the caller should explicitly
590 update the global data. */
592 static ptrdiff_t
593 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
595 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
597 /* Use previous finding, if it's valid and applies to this inquiry. */
598 if (current_buffer == find_start_buffer
599 /* Reuse the defun-start even if POS is a little farther on.
600 POS might be in the next defun, but that's ok.
601 Our value may not be the best possible, but will still be usable. */
602 && pos <= find_start_pos + 1000
603 && pos >= find_start_value
604 && BEGV == find_start_begv
605 && MODIFF == find_start_modiff)
606 return find_start_value;
608 if (!open_paren_in_column_0_is_defun_start)
610 find_start_value = BEGV;
611 find_start_value_byte = BEGV_BYTE;
612 goto found;
615 /* Back up to start of line. */
616 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
618 /* We optimize syntax-table lookup for rare updates. Thus we accept
619 only those `^\s(' which are good in global _and_ text-property
620 syntax-tables. */
621 SETUP_BUFFER_SYNTAX_TABLE ();
622 while (PT > BEGV)
624 /* Open-paren at start of line means we may have found our
625 defun-start. */
626 int c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
627 if (SYNTAX (c) == Sopen)
629 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
630 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
631 if (SYNTAX (c) == Sopen)
632 break;
633 /* Now fallback to the default value. */
634 SETUP_BUFFER_SYNTAX_TABLE ();
636 /* Move to beg of previous line. */
637 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
640 /* Record what we found, for the next try. */
641 find_start_value = PT;
642 find_start_value_byte = PT_BYTE;
643 TEMP_SET_PT_BOTH (opoint, opoint_byte);
645 found:
646 find_start_buffer = current_buffer;
647 find_start_modiff = MODIFF;
648 find_start_begv = BEGV;
649 find_start_pos = pos;
651 return find_start_value;
654 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
656 static bool
657 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
659 int c;
660 bool val;
662 DEC_BOTH (pos, pos_byte);
663 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
664 c = FETCH_CHAR (pos_byte);
665 val = SYNTAX_COMEND_FIRST (c);
666 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
667 return val;
670 /* Check whether charpos FROM is at the end of a comment.
671 FROM_BYTE is the bytepos corresponding to FROM.
672 Do not move back before STOP.
674 Return true if we find a comment ending at FROM/FROM_BYTE.
676 If successful, store the charpos of the comment's beginning
677 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
679 Global syntax data remains valid for backward search starting at
680 the returned value (or at FROM, if the search was not successful). */
682 static bool
683 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
684 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
685 ptrdiff_t *bytepos_ptr)
687 /* Look back, counting the parity of string-quotes,
688 and recording the comment-starters seen.
689 When we reach a safe place, assume that's not in a string;
690 then step the main scan to the earliest comment-starter seen
691 an even number of string quotes away from the safe place.
693 OFROM[I] is position of the earliest comment-starter seen
694 which is I+2X quotes from the comment-end.
695 PARITY is current parity of quotes from the comment end. */
696 int string_style = -1; /* Presumed outside of any string. */
697 bool string_lossage = 0;
698 /* Not a real lossage: indicates that we have passed a matching comment
699 starter plus a non-matching comment-ender, meaning that any matching
700 comment-starter we might see later could be a false positive (hidden
701 inside another comment).
702 Test case: { a (* b } c (* d *) */
703 bool comment_lossage = 0;
704 ptrdiff_t comment_end = from;
705 ptrdiff_t comment_end_byte = from_byte;
706 ptrdiff_t comstart_pos = 0;
707 ptrdiff_t comstart_byte;
708 /* Place where the containing defun starts,
709 or 0 if we didn't come across it yet. */
710 ptrdiff_t defun_start = 0;
711 ptrdiff_t defun_start_byte = 0;
712 enum syntaxcode code;
713 ptrdiff_t nesting = 1; /* Current comment nesting. */
714 int c;
715 int syntax = 0;
716 unsigned short int quit_count = 0;
718 /* FIXME: A }} comment-ender style leads to incorrect behavior
719 in the case of {{ c }}} because we ignore the last two chars which are
720 assumed to be comment-enders although they aren't. */
722 /* At beginning of range to scan, we're outside of strings;
723 that determines quote parity to the comment-end. */
724 while (from != stop)
726 rarely_quit (++quit_count);
728 ptrdiff_t temp_byte;
729 int prev_syntax;
730 bool com2start, com2end, comstart;
732 /* Move back and examine a character. */
733 DEC_BOTH (from, from_byte);
734 UPDATE_SYNTAX_TABLE_BACKWARD (from);
736 prev_syntax = syntax;
737 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
738 syntax = SYNTAX_WITH_FLAGS (c);
739 code = SYNTAX (c);
741 /* Check for 2-char comment markers. */
742 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
743 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
744 && (comstyle
745 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
746 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
747 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
748 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
749 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
750 comstart = (com2start || code == Scomment);
752 /* Nasty cases with overlapping 2-char comment markers:
753 - snmp-mode: -- c -- foo -- c --
754 --- c --
755 ------ c --
756 - c-mode: *||*
757 |* *|* *|
758 |*| |* |*|
759 /// */
761 /* If a 2-char comment sequence partly overlaps with another,
762 we don't try to be clever. E.g. |*| in C, or }% in modes that
763 have %..\n and %{..}%. */
764 if (from > stop && (com2end || comstart))
766 ptrdiff_t next = from, next_byte = from_byte;
767 int next_c, next_syntax;
768 DEC_BOTH (next, next_byte);
769 UPDATE_SYNTAX_TABLE_BACKWARD (next);
770 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
771 next_syntax = SYNTAX_WITH_FLAGS (next_c);
772 if (((comstart || comnested)
773 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
774 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
775 || ((com2end || comnested)
776 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
777 && (comstyle
778 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
779 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
780 goto lossage;
781 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
784 if (com2start && comstart_pos == 0)
785 /* We're looking at a comment starter. But it might be a comment
786 ender as well (see snmp-mode). The first time we see one, we
787 need to consider it as a comment starter,
788 and the subsequent times as a comment ender. */
789 com2end = 0;
791 /* Turn a 2-char comment sequences into the appropriate syntax. */
792 if (com2end)
793 code = Sendcomment;
794 else if (com2start)
795 code = Scomment;
796 /* Ignore comment starters of a different style. */
797 else if (code == Scomment
798 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
799 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
800 continue;
802 /* Ignore escaped characters, except comment-enders which cannot
803 be escaped. */
804 if ((Vcomment_end_can_be_escaped || code != Sendcomment)
805 && char_quoted (from, from_byte))
806 continue;
808 switch (code)
810 case Sstring_fence:
811 case Scomment_fence:
812 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
813 FALLTHROUGH;
814 case Sstring:
815 /* Track parity of quotes. */
816 if (string_style == -1)
817 /* Entering a string. */
818 string_style = c;
819 else if (string_style == c)
820 /* Leaving the string. */
821 string_style = -1;
822 else
823 /* If we have two kinds of string delimiters.
824 There's no way to grok this scanning backwards. */
825 string_lossage = 1;
826 break;
828 case Scomment:
829 /* We've already checked that it is the relevant comstyle. */
830 if (string_style != -1 || comment_lossage || string_lossage)
831 /* There are odd string quotes involved, so let's be careful.
832 Test case in Pascal: " { " a { " } */
833 goto lossage;
835 if (!comnested)
837 /* Record best comment-starter so far. */
838 comstart_pos = from;
839 comstart_byte = from_byte;
841 else if (--nesting <= 0)
842 /* nested comments have to be balanced, so we don't need to
843 keep looking for earlier ones. We use here the same (slightly
844 incorrect) reasoning as below: since it is followed by uniform
845 paired string quotes, this comment-start has to be outside of
846 strings, else the comment-end itself would be inside a string. */
847 goto done;
848 break;
850 case Sendcomment:
851 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
852 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
853 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
854 /* This is the same style of comment ender as ours. */
856 if (comnested)
857 nesting++;
858 else
859 /* Anything before that can't count because it would match
860 this comment-ender rather than ours. */
861 from = stop; /* Break out of the loop. */
863 else if (comstart_pos != 0 || c != '\n')
864 /* We're mixing comment styles here, so we'd better be careful.
865 The (comstart_pos != 0 || c != '\n') check is not quite correct
866 (we should just always set comment_lossage), but removing it
867 would imply that any multiline comment in C would go through
868 lossage, which seems overkill.
869 The failure should only happen in the rare cases such as
870 { (* } *) */
871 comment_lossage = 1;
872 break;
874 case Sopen:
875 /* Assume a defun-start point is outside of strings. */
876 if (open_paren_in_column_0_is_defun_start
877 && (from == stop
878 || (temp_byte = dec_bytepos (from_byte),
879 FETCH_CHAR (temp_byte) == '\n')))
881 defun_start = from;
882 defun_start_byte = from_byte;
883 from = stop; /* Break out of the loop. */
885 break;
887 default:
888 break;
892 if (comstart_pos == 0)
894 from = comment_end;
895 from_byte = comment_end_byte;
896 UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
898 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
899 or `done'), then we've found the beginning of the non-nested comment. */
900 else if (1) /* !comnested */
902 from = comstart_pos;
903 from_byte = comstart_byte;
904 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
906 else lossage:
908 struct lisp_parse_state state;
909 bool adjusted = true;
910 /* We had two kinds of string delimiters mixed up
911 together. Decode this going forwards.
912 Scan fwd from a known safe place (beginning-of-defun)
913 to the one in question; this records where we
914 last passed a comment starter. */
915 /* If we did not already find the defun start, find it now. */
916 if (defun_start == 0)
918 defun_start = find_defun_start (comment_end, comment_end_byte);
919 defun_start_byte = find_start_value_byte;
920 adjusted = (defun_start > BEGV);
924 internalize_parse_state (Qnil, &state);
925 scan_sexps_forward (&state,
926 defun_start, defun_start_byte,
927 comment_end, TYPE_MINIMUM (EMACS_INT),
928 0, 0);
929 defun_start = comment_end;
930 if (!adjusted)
932 adjusted = true;
933 find_start_value
934 = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
935 : state.thislevelstart >= 0 ? state.thislevelstart
936 : find_start_value;
937 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
940 if (state.incomment == (comnested ? 1 : -1)
941 && state.comstyle == comstyle)
942 from = state.comstr_start;
943 else
945 from = comment_end;
946 if (state.incomment)
947 /* If comment_end is inside some other comment, maybe ours
948 is nested, so we need to try again from within the
949 surrounding comment. Example: { a (* " *) */
951 /* FIXME: We should advance by one or two chars. */
952 defun_start = state.comstr_start + 2;
953 defun_start_byte = CHAR_TO_BYTE (defun_start);
956 rarely_quit (++quit_count);
958 while (defun_start < comment_end);
960 from_byte = CHAR_TO_BYTE (from);
961 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
964 done:
965 *charpos_ptr = from;
966 *bytepos_ptr = from_byte;
968 return from != comment_end;
971 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
972 doc: /* Return t if OBJECT is a syntax table.
973 Currently, any char-table counts as a syntax table. */)
974 (Lisp_Object object)
976 if (CHAR_TABLE_P (object)
977 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
978 return Qt;
979 return Qnil;
982 static void
983 check_syntax_table (Lisp_Object obj)
985 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
986 Qsyntax_table_p, obj);
989 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
990 doc: /* Return the current syntax table.
991 This is the one specified by the current buffer. */)
992 (void)
994 return BVAR (current_buffer, syntax_table);
997 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
998 Sstandard_syntax_table, 0, 0, 0,
999 doc: /* Return the standard syntax table.
1000 This is the one used for new buffers. */)
1001 (void)
1003 return Vstandard_syntax_table;
1006 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1007 doc: /* Construct a new syntax table and return it.
1008 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1009 (Lisp_Object table)
1011 Lisp_Object copy;
1013 if (!NILP (table))
1014 check_syntax_table (table);
1015 else
1016 table = Vstandard_syntax_table;
1018 copy = Fcopy_sequence (table);
1020 /* Only the standard syntax table should have a default element.
1021 Other syntax tables should inherit from parents instead. */
1022 set_char_table_defalt (copy, Qnil);
1024 /* Copied syntax tables should all have parents.
1025 If we copied one with no parent, such as the standard syntax table,
1026 use the standard syntax table as the copy's parent. */
1027 if (NILP (XCHAR_TABLE (copy)->parent))
1028 Fset_char_table_parent (copy, Vstandard_syntax_table);
1029 return copy;
1032 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1033 doc: /* Select a new syntax table for the current buffer.
1034 One argument, a syntax table. */)
1035 (Lisp_Object table)
1037 int idx;
1038 check_syntax_table (table);
1039 bset_syntax_table (current_buffer, table);
1040 /* Indicate that this buffer now has a specified syntax table. */
1041 idx = PER_BUFFER_VAR_IDX (syntax_table);
1042 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1043 return table;
1046 /* Convert a letter which signifies a syntax code
1047 into the code it signifies.
1048 This is used by modify-syntax-entry, and other things. */
1050 unsigned char const syntax_spec_code[0400] =
1051 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1052 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1053 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1054 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1055 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1056 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1057 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1058 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1059 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1060 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1061 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1062 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1063 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1064 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1065 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1066 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1069 /* Indexed by syntax code, give the letter that describes it. */
1071 char const syntax_code_spec[16] =
1073 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1074 '!', '|'
1077 /* Indexed by syntax code, give the object (cons of syntax code and
1078 nil) to be stored in syntax table. Since these objects can be
1079 shared among syntax tables, we generate them in advance. By
1080 sharing objects, the function `describe-syntax' can give a more
1081 compact listing. */
1082 static Lisp_Object Vsyntax_code_object;
1085 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1086 doc: /* Return the syntax code of CHARACTER, described by a character.
1087 For example, if CHARACTER is a word constituent, the
1088 character `w' (119) is returned.
1089 The characters that correspond to various syntax codes
1090 are listed in the documentation of `modify-syntax-entry'. */)
1091 (Lisp_Object character)
1093 int char_int;
1094 CHECK_CHARACTER (character);
1095 char_int = XINT (character);
1096 SETUP_BUFFER_SYNTAX_TABLE ();
1097 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1100 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1101 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1102 (Lisp_Object character)
1104 int char_int;
1105 enum syntaxcode code;
1106 CHECK_CHARACTER (character);
1107 char_int = XINT (character);
1108 SETUP_BUFFER_SYNTAX_TABLE ();
1109 code = SYNTAX (char_int);
1110 if (code == Sopen || code == Sclose)
1111 return SYNTAX_MATCH (char_int);
1112 return Qnil;
1115 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1116 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1117 STRING should be a string of the form allowed as argument of
1118 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1119 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1120 the value of a `syntax-table' text property. */)
1121 (Lisp_Object string)
1123 const unsigned char *p;
1124 int val;
1125 Lisp_Object match;
1127 CHECK_STRING (string);
1129 p = SDATA (string);
1130 val = syntax_spec_code[*p++];
1131 if (val == 0377)
1132 error ("Invalid syntax description letter: %c", p[-1]);
1134 if (val == Sinherit)
1135 return Qnil;
1137 if (*p)
1139 int len;
1140 int character = STRING_CHAR_AND_LENGTH (p, len);
1141 XSETINT (match, character);
1142 if (XFASTINT (match) == ' ')
1143 match = Qnil;
1144 p += len;
1146 else
1147 match = Qnil;
1149 while (*p)
1150 switch (*p++)
1152 case '1':
1153 val |= 1 << 16;
1154 break;
1156 case '2':
1157 val |= 1 << 17;
1158 break;
1160 case '3':
1161 val |= 1 << 18;
1162 break;
1164 case '4':
1165 val |= 1 << 19;
1166 break;
1168 case 'p':
1169 val |= 1 << 20;
1170 break;
1172 case 'b':
1173 val |= 1 << 21;
1174 break;
1176 case 'n':
1177 val |= 1 << 22;
1178 break;
1180 case 'c':
1181 val |= 1 << 23;
1182 break;
1185 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1186 return AREF (Vsyntax_code_object, val);
1187 else
1188 /* Since we can't use a shared object, let's make a new one. */
1189 return Fcons (make_number (val), match);
1192 /* I really don't know why this is interactive
1193 help-form should at least be made useful whilst reading the second arg. */
1194 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1195 "cSet syntax for character: \nsSet syntax for %s to: ",
1196 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1197 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1198 the current buffer's syntax table.
1199 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1200 in the range MIN to MAX are changed.
1201 The first character of NEWENTRY should be one of the following:
1202 Space or - whitespace syntax. w word constituent.
1203 _ symbol constituent. . punctuation.
1204 ( open-parenthesis. ) close-parenthesis.
1205 " string quote. \\ escape.
1206 $ paired delimiter. \\=' expression quote or prefix operator.
1207 < comment starter. > comment ender.
1208 / character-quote. @ inherit from parent table.
1209 | generic string fence. ! generic comment fence.
1211 Only single-character comment start and end sequences are represented thus.
1212 Two-character sequences are represented as described below.
1213 The second character of NEWENTRY is the matching parenthesis,
1214 used only if the first character is `(' or `)'.
1215 Any additional characters are flags.
1216 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1217 1 means CHAR is the start of a two-char comment start sequence.
1218 2 means CHAR is the second character of such a sequence.
1219 3 means CHAR is the start of a two-char comment end sequence.
1220 4 means CHAR is the second character of such a sequence.
1222 There can be several orthogonal comment sequences. This is to support
1223 language modes such as C++. By default, all comment sequences are of style
1224 a, but you can set the comment sequence style to b (on the second character
1225 of a comment-start, and the first character of a comment-end sequence) and/or
1226 c (on any of its chars) using this flag:
1227 b means CHAR is part of comment sequence b.
1228 c means CHAR is part of comment sequence c.
1229 n means CHAR is part of a nestable comment sequence.
1231 p means CHAR is a prefix character for `backward-prefix-chars';
1232 such characters are treated as whitespace when they occur
1233 between expressions.
1234 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1235 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1237 if (CONSP (c))
1239 CHECK_CHARACTER_CAR (c);
1240 CHECK_CHARACTER_CDR (c);
1242 else
1243 CHECK_CHARACTER (c);
1245 if (NILP (syntax_table))
1246 syntax_table = BVAR (current_buffer, syntax_table);
1247 else
1248 check_syntax_table (syntax_table);
1250 newentry = Fstring_to_syntax (newentry);
1251 if (CONSP (c))
1252 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1253 else
1254 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1256 /* We clear the regexp cache, since character classes can now have
1257 different values from those in the compiled regexps.*/
1258 clear_regexp_cache ();
1260 return Qnil;
1263 /* Dump syntax table to buffer in human-readable format */
1265 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1266 Sinternal_describe_syntax_value, 1, 1, 0,
1267 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1268 (Lisp_Object syntax)
1270 int code, syntax_code;
1271 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1272 char str[2];
1273 Lisp_Object first, match_lisp, value = syntax;
1275 if (NILP (value))
1277 insert_string ("default");
1278 return syntax;
1281 if (CHAR_TABLE_P (value))
1283 insert_string ("deeper char-table ...");
1284 return syntax;
1287 if (!CONSP (value))
1289 insert_string ("invalid");
1290 return syntax;
1293 first = XCAR (value);
1294 match_lisp = XCDR (value);
1296 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1298 insert_string ("invalid");
1299 return syntax;
1302 syntax_code = XINT (first) & INT_MAX;
1303 code = syntax_code & 0377;
1304 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1305 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1306 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1307 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1308 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1309 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1310 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1311 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1313 if (Smax <= code)
1315 insert_string ("invalid");
1316 return syntax;
1319 str[0] = syntax_code_spec[code], str[1] = 0;
1320 insert (str, 1);
1322 if (NILP (match_lisp))
1323 insert (" ", 1);
1324 else
1325 insert_char (XINT (match_lisp));
1327 if (start1)
1328 insert ("1", 1);
1329 if (start2)
1330 insert ("2", 1);
1332 if (end1)
1333 insert ("3", 1);
1334 if (end2)
1335 insert ("4", 1);
1337 if (prefix)
1338 insert ("p", 1);
1339 if (comstyleb)
1340 insert ("b", 1);
1341 if (comstylec)
1342 insert ("c", 1);
1343 if (comnested)
1344 insert ("n", 1);
1346 insert_string ("\twhich means: ");
1348 switch (code)
1350 case Swhitespace:
1351 insert_string ("whitespace"); break;
1352 case Spunct:
1353 insert_string ("punctuation"); break;
1354 case Sword:
1355 insert_string ("word"); break;
1356 case Ssymbol:
1357 insert_string ("symbol"); break;
1358 case Sopen:
1359 insert_string ("open"); break;
1360 case Sclose:
1361 insert_string ("close"); break;
1362 case Squote:
1363 insert_string ("prefix"); break;
1364 case Sstring:
1365 insert_string ("string"); break;
1366 case Smath:
1367 insert_string ("math"); break;
1368 case Sescape:
1369 insert_string ("escape"); break;
1370 case Scharquote:
1371 insert_string ("charquote"); break;
1372 case Scomment:
1373 insert_string ("comment"); break;
1374 case Sendcomment:
1375 insert_string ("endcomment"); break;
1376 case Sinherit:
1377 insert_string ("inherit"); break;
1378 case Scomment_fence:
1379 insert_string ("comment fence"); break;
1380 case Sstring_fence:
1381 insert_string ("string fence"); break;
1382 default:
1383 insert_string ("invalid");
1384 return syntax;
1387 if (!NILP (match_lisp))
1389 insert_string (", matches ");
1390 insert_char (XINT (match_lisp));
1393 if (start1)
1394 insert_string (",\n\t is the first character of a comment-start sequence");
1395 if (start2)
1396 insert_string (",\n\t is the second character of a comment-start sequence");
1398 if (end1)
1399 insert_string (",\n\t is the first character of a comment-end sequence");
1400 if (end2)
1401 insert_string (",\n\t is the second character of a comment-end sequence");
1402 if (comstyleb)
1403 insert_string (" (comment style b)");
1404 if (comstylec)
1405 insert_string (" (comment style c)");
1406 if (comnested)
1407 insert_string (" (nestable)");
1409 if (prefix)
1411 AUTO_STRING (prefixdoc,
1412 ",\n\t is a prefix character for `backward-prefix-chars'");
1413 insert1 (Fsubstitute_command_keys (prefixdoc));
1416 return syntax;
1419 /* Return the position across COUNT words from FROM.
1420 If that many words cannot be found before the end of the buffer, return 0.
1421 COUNT negative means scan backward and stop at word beginning. */
1423 ptrdiff_t
1424 scan_words (ptrdiff_t from, EMACS_INT count)
1426 ptrdiff_t beg = BEGV;
1427 ptrdiff_t end = ZV;
1428 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1429 enum syntaxcode code;
1430 int ch0, ch1;
1431 Lisp_Object func, pos;
1433 SETUP_SYNTAX_TABLE (from, count);
1435 while (count > 0)
1437 while (true)
1439 if (from == end)
1440 return 0;
1441 UPDATE_SYNTAX_TABLE_FORWARD (from);
1442 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1443 code = SYNTAX (ch0);
1444 INC_BOTH (from, from_byte);
1445 if (words_include_escapes
1446 && (code == Sescape || code == Scharquote))
1447 break;
1448 if (code == Sword)
1449 break;
1450 rarely_quit (from);
1452 /* Now CH0 is a character which begins a word and FROM is the
1453 position of the next character. */
1454 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1455 if (! NILP (Ffboundp (func)))
1457 pos = call2 (func, make_number (from - 1), make_number (end));
1458 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1460 from = XINT (pos);
1461 from_byte = CHAR_TO_BYTE (from);
1464 else
1466 while (1)
1468 if (from == end) break;
1469 UPDATE_SYNTAX_TABLE_FORWARD (from);
1470 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1471 code = SYNTAX (ch1);
1472 if ((code != Sword
1473 && (! words_include_escapes
1474 || (code != Sescape && code != Scharquote)))
1475 || word_boundary_p (ch0, ch1))
1476 break;
1477 INC_BOTH (from, from_byte);
1478 ch0 = ch1;
1479 rarely_quit (from);
1482 count--;
1484 while (count < 0)
1486 while (true)
1488 if (from == beg)
1489 return 0;
1490 DEC_BOTH (from, from_byte);
1491 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1492 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1493 code = SYNTAX (ch1);
1494 if (words_include_escapes
1495 && (code == Sescape || code == Scharquote))
1496 break;
1497 if (code == Sword)
1498 break;
1499 rarely_quit (from);
1501 /* Now CH1 is a character which ends a word and FROM is the
1502 position of it. */
1503 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1504 if (! NILP (Ffboundp (func)))
1506 pos = call2 (func, make_number (from), make_number (beg));
1507 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1509 from = XINT (pos);
1510 from_byte = CHAR_TO_BYTE (from);
1513 else
1515 while (1)
1517 if (from == beg)
1518 break;
1519 DEC_BOTH (from, from_byte);
1520 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1521 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1522 code = SYNTAX (ch0);
1523 if ((code != Sword
1524 && (! words_include_escapes
1525 || (code != Sescape && code != Scharquote)))
1526 || word_boundary_p (ch0, ch1))
1528 INC_BOTH (from, from_byte);
1529 break;
1531 ch1 = ch0;
1532 rarely_quit (from);
1535 count++;
1538 return from;
1541 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1542 doc: /* Move point forward ARG words (backward if ARG is negative).
1543 If ARG is omitted or nil, move point forward one word.
1544 Normally returns t.
1545 If an edge of the buffer or a field boundary is reached, point is
1546 left there and the function returns nil. Field boundaries are not
1547 noticed if `inhibit-field-text-motion' is non-nil.
1549 The word boundaries are normally determined by the buffer's syntax
1550 table, but `find-word-boundary-function-table', such as set up
1551 by `subword-mode', can change that. If a Lisp program needs to
1552 move by words determined strictly by the syntax table, it should
1553 use `forward-word-strictly' instead. */)
1554 (Lisp_Object arg)
1556 Lisp_Object tmp;
1557 ptrdiff_t orig_val, val;
1559 if (NILP (arg))
1560 XSETFASTINT (arg, 1);
1561 else
1562 CHECK_NUMBER (arg);
1564 val = orig_val = scan_words (PT, XINT (arg));
1565 if (! orig_val)
1566 val = XINT (arg) > 0 ? ZV : BEGV;
1568 /* Avoid jumping out of an input field. */
1569 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1570 Qnil, Qnil, Qnil);
1571 val = XFASTINT (tmp);
1573 SET_PT (val);
1574 return val == orig_val ? Qt : Qnil;
1577 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1578 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1579 STRING is like the inside of a `[...]' in a regular expression
1580 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1581 (but not at the end of a range; quoting is never needed there).
1582 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1583 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1584 Char classes, e.g. `[:alpha:]', are supported.
1586 Returns the distance traveled, either zero or positive. */)
1587 (Lisp_Object string, Lisp_Object lim)
1589 return skip_chars (1, string, lim, 1);
1592 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1593 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1594 See `skip-chars-forward' for details.
1595 Returns the distance traveled, either zero or negative. */)
1596 (Lisp_Object string, Lisp_Object lim)
1598 return skip_chars (0, string, lim, 1);
1601 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1602 doc: /* Move point forward across chars in specified syntax classes.
1603 SYNTAX is a string of syntax code characters.
1604 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1605 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1606 This function returns the distance traveled, either zero or positive. */)
1607 (Lisp_Object syntax, Lisp_Object lim)
1609 return skip_syntaxes (1, syntax, lim);
1612 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1613 doc: /* Move point backward across chars in specified syntax classes.
1614 SYNTAX is a string of syntax code characters.
1615 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1616 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1617 This function returns either zero or a negative number, and the absolute value
1618 of this is the distance traveled. */)
1619 (Lisp_Object syntax, Lisp_Object lim)
1621 return skip_syntaxes (0, syntax, lim);
1624 static Lisp_Object
1625 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1626 bool handle_iso_classes)
1628 int c;
1629 char fastmap[0400];
1630 /* Store the ranges of non-ASCII characters. */
1631 int *char_ranges UNINIT;
1632 int n_char_ranges = 0;
1633 bool negate = 0;
1634 ptrdiff_t i, i_byte;
1635 /* True if the current buffer is multibyte and the region contains
1636 non-ASCII chars. */
1637 bool multibyte;
1638 /* True if STRING is multibyte and it contains non-ASCII chars. */
1639 bool string_multibyte;
1640 ptrdiff_t size_byte;
1641 const unsigned char *str;
1642 int len;
1643 Lisp_Object iso_classes;
1644 USE_SAFE_ALLOCA;
1646 CHECK_STRING (string);
1647 iso_classes = Qnil;
1649 if (NILP (lim))
1650 XSETINT (lim, forwardp ? ZV : BEGV);
1651 else
1652 CHECK_NUMBER_COERCE_MARKER (lim);
1654 /* In any case, don't allow scan outside bounds of buffer. */
1655 if (XINT (lim) > ZV)
1656 XSETFASTINT (lim, ZV);
1657 if (XINT (lim) < BEGV)
1658 XSETFASTINT (lim, BEGV);
1660 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1661 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1662 string_multibyte = SBYTES (string) > SCHARS (string);
1664 memset (fastmap, 0, sizeof fastmap);
1666 str = SDATA (string);
1667 size_byte = SBYTES (string);
1669 i_byte = 0;
1670 if (i_byte < size_byte
1671 && SREF (string, 0) == '^')
1673 negate = 1; i_byte++;
1676 /* Find the characters specified and set their elements of fastmap.
1677 Handle backslashes and ranges specially.
1679 If STRING contains non-ASCII characters, setup char_ranges for
1680 them and use fastmap only for their leading codes. */
1682 if (! string_multibyte)
1684 bool string_has_eight_bit = 0;
1686 /* At first setup fastmap. */
1687 while (i_byte < size_byte)
1689 if (handle_iso_classes)
1691 const unsigned char *ch = str + i_byte;
1692 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1693 if (cc == 0)
1694 error ("Invalid ISO C character class");
1695 if (cc != -1)
1697 iso_classes = Fcons (make_number (cc), iso_classes);
1698 i_byte = ch - str;
1699 continue;
1703 c = str[i_byte++];
1705 if (c == '\\')
1707 if (i_byte == size_byte)
1708 break;
1710 c = str[i_byte++];
1712 /* Treat `-' as range character only if another character
1713 follows. */
1714 if (i_byte + 1 < size_byte
1715 && str[i_byte] == '-')
1717 int c2;
1719 /* Skip over the dash. */
1720 i_byte++;
1722 /* Get the end of the range. */
1723 c2 = str[i_byte++];
1724 if (c2 == '\\'
1725 && i_byte < size_byte)
1726 c2 = str[i_byte++];
1728 if (c <= c2)
1730 int lim2 = c2 + 1;
1731 while (c < lim2)
1732 fastmap[c++] = 1;
1733 if (! ASCII_CHAR_P (c2))
1734 string_has_eight_bit = 1;
1737 else
1739 fastmap[c] = 1;
1740 if (! ASCII_CHAR_P (c))
1741 string_has_eight_bit = 1;
1745 /* If the current range is multibyte and STRING contains
1746 eight-bit chars, arrange fastmap and setup char_ranges for
1747 the corresponding multibyte chars. */
1748 if (multibyte && string_has_eight_bit)
1750 char *p1;
1751 char himap[0200 + 1];
1752 memcpy (himap, fastmap + 0200, 0200);
1753 himap[0200] = 0;
1754 memset (fastmap + 0200, 0, 0200);
1755 SAFE_NALLOCA (char_ranges, 2, 128);
1756 i = 0;
1758 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1760 /* Deduce the next range C..C2 from the next clump of 1s
1761 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1762 order half of the old FASTMAP. */
1763 int c2, leading_code;
1764 i = p1 - himap;
1765 c = BYTE8_TO_CHAR (i + 0200);
1766 i += strlen (p1);
1767 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1769 char_ranges[n_char_ranges++] = c;
1770 char_ranges[n_char_ranges++] = c2;
1771 leading_code = CHAR_LEADING_CODE (c);
1772 memset (fastmap + leading_code, 1,
1773 CHAR_LEADING_CODE (c2) - leading_code + 1);
1777 else /* STRING is multibyte */
1779 SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1781 while (i_byte < size_byte)
1783 int leading_code = str[i_byte];
1785 if (handle_iso_classes)
1787 const unsigned char *ch = str + i_byte;
1788 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1789 if (cc == 0)
1790 error ("Invalid ISO C character class");
1791 if (cc != -1)
1793 iso_classes = Fcons (make_number (cc), iso_classes);
1794 i_byte = ch - str;
1795 continue;
1799 if (leading_code== '\\')
1801 if (++i_byte == size_byte)
1802 break;
1804 leading_code = str[i_byte];
1806 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1807 i_byte += len;
1810 /* Treat `-' as range character only if another character
1811 follows. */
1812 if (i_byte + 1 < size_byte
1813 && str[i_byte] == '-')
1815 int c2, leading_code2;
1817 /* Skip over the dash. */
1818 i_byte++;
1820 /* Get the end of the range. */
1821 leading_code2 = str[i_byte];
1822 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1823 i_byte += len;
1825 if (c2 == '\\'
1826 && i_byte < size_byte)
1828 leading_code2 = str[i_byte];
1829 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1830 i_byte += len;
1833 if (c > c2)
1834 continue;
1835 if (ASCII_CHAR_P (c))
1837 while (c <= c2 && c < 0x80)
1838 fastmap[c++] = 1;
1839 leading_code = CHAR_LEADING_CODE (c);
1841 if (! ASCII_CHAR_P (c))
1843 int lim2 = leading_code2 + 1;
1844 while (leading_code < lim2)
1845 fastmap[leading_code++] = 1;
1846 if (c <= c2)
1848 char_ranges[n_char_ranges++] = c;
1849 char_ranges[n_char_ranges++] = c2;
1853 else
1855 if (ASCII_CHAR_P (c))
1856 fastmap[c] = 1;
1857 else
1859 fastmap[leading_code] = 1;
1860 char_ranges[n_char_ranges++] = c;
1861 char_ranges[n_char_ranges++] = c;
1866 /* If the current range is unibyte and STRING contains non-ASCII
1867 chars, arrange fastmap for the corresponding unibyte
1868 chars. */
1870 if (! multibyte && n_char_ranges > 0)
1872 memset (fastmap + 0200, 0, 0200);
1873 for (i = 0; i < n_char_ranges; i += 2)
1875 int c1 = char_ranges[i];
1876 int lim2 = char_ranges[i + 1] + 1;
1878 for (; c1 < lim2; c1++)
1880 int b = CHAR_TO_BYTE_SAFE (c1);
1881 if (b >= 0)
1882 fastmap[b] = 1;
1888 /* If ^ was the first character, complement the fastmap. */
1889 if (negate)
1891 if (! multibyte)
1892 for (i = 0; i < sizeof fastmap; i++)
1893 fastmap[i] ^= 1;
1894 else
1896 for (i = 0; i < 0200; i++)
1897 fastmap[i] ^= 1;
1898 /* All non-ASCII chars possibly match. */
1899 for (; i < sizeof fastmap; i++)
1900 fastmap[i] = 1;
1905 ptrdiff_t start_point = PT;
1906 ptrdiff_t pos = PT;
1907 ptrdiff_t pos_byte = PT_BYTE;
1908 unsigned char *p = PT_ADDR, *endp, *stop;
1910 if (forwardp)
1912 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1913 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1915 else
1917 endp = CHAR_POS_ADDR (XINT (lim));
1918 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1921 /* This code may look up syntax tables using functions that rely on the
1922 gl_state object. To make sure this object is not out of date,
1923 let's initialize it manually.
1924 We ignore syntax-table text-properties for now, since that's
1925 what we've done in the past. */
1926 SETUP_BUFFER_SYNTAX_TABLE ();
1927 if (forwardp)
1929 if (multibyte)
1930 while (1)
1932 int nbytes;
1934 if (p >= stop)
1936 if (p >= endp)
1937 break;
1938 p = GAP_END_ADDR;
1939 stop = endp;
1941 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1942 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1944 if (negate)
1945 break;
1946 else
1947 goto fwd_ok;
1950 if (! fastmap[*p])
1951 break;
1952 if (! ASCII_CHAR_P (c))
1954 /* As we are looking at a multibyte character, we
1955 must look up the character in the table
1956 CHAR_RANGES. If there's no data in the table,
1957 that character is not what we want to skip. */
1959 /* The following code do the right thing even if
1960 n_char_ranges is zero (i.e. no data in
1961 CHAR_RANGES). */
1962 for (i = 0; i < n_char_ranges; i += 2)
1963 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1964 break;
1965 if (!(negate ^ (i < n_char_ranges)))
1966 break;
1968 fwd_ok:
1969 p += nbytes, pos++, pos_byte += nbytes;
1970 rarely_quit (pos);
1972 else
1973 while (true)
1975 if (p >= stop)
1977 if (p >= endp)
1978 break;
1979 p = GAP_END_ADDR;
1980 stop = endp;
1983 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1985 if (negate)
1986 break;
1987 else
1988 goto fwd_unibyte_ok;
1991 if (!fastmap[*p])
1992 break;
1993 fwd_unibyte_ok:
1994 p++, pos++, pos_byte++;
1995 rarely_quit (pos);
1998 else
2000 if (multibyte)
2001 while (true)
2003 if (p <= stop)
2005 if (p <= endp)
2006 break;
2007 p = GPT_ADDR;
2008 stop = endp;
2010 unsigned char *prev_p = p;
2012 p--;
2013 while (stop <= p && ! CHAR_HEAD_P (*p));
2015 c = STRING_CHAR (p);
2017 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2019 if (negate)
2020 break;
2021 else
2022 goto back_ok;
2025 if (! fastmap[*p])
2026 break;
2027 if (! ASCII_CHAR_P (c))
2029 /* See the comment in the previous similar code. */
2030 for (i = 0; i < n_char_ranges; i += 2)
2031 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2032 break;
2033 if (!(negate ^ (i < n_char_ranges)))
2034 break;
2036 back_ok:
2037 pos--, pos_byte -= prev_p - p;
2038 rarely_quit (pos);
2040 else
2041 while (true)
2043 if (p <= stop)
2045 if (p <= endp)
2046 break;
2047 p = GPT_ADDR;
2048 stop = endp;
2051 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2053 if (negate)
2054 break;
2055 else
2056 goto back_unibyte_ok;
2059 if (!fastmap[p[-1]])
2060 break;
2061 back_unibyte_ok:
2062 p--, pos--, pos_byte--;
2063 rarely_quit (pos);
2067 SET_PT_BOTH (pos, pos_byte);
2069 SAFE_FREE ();
2070 return make_number (PT - start_point);
2075 static Lisp_Object
2076 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2078 int c;
2079 unsigned char fastmap[0400];
2080 bool negate = 0;
2081 ptrdiff_t i, i_byte;
2082 bool multibyte;
2083 ptrdiff_t size_byte;
2084 unsigned char *str;
2086 CHECK_STRING (string);
2088 if (NILP (lim))
2089 XSETINT (lim, forwardp ? ZV : BEGV);
2090 else
2091 CHECK_NUMBER_COERCE_MARKER (lim);
2093 /* In any case, don't allow scan outside bounds of buffer. */
2094 if (XINT (lim) > ZV)
2095 XSETFASTINT (lim, ZV);
2096 if (XINT (lim) < BEGV)
2097 XSETFASTINT (lim, BEGV);
2099 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2100 return make_number (0);
2102 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2103 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2105 memset (fastmap, 0, sizeof fastmap);
2107 if (SBYTES (string) > SCHARS (string))
2108 /* As this is very rare case (syntax spec is ASCII only), don't
2109 consider efficiency. */
2110 string = string_make_unibyte (string);
2112 str = SDATA (string);
2113 size_byte = SBYTES (string);
2115 i_byte = 0;
2116 if (i_byte < size_byte
2117 && SREF (string, 0) == '^')
2119 negate = 1; i_byte++;
2122 /* Find the syntaxes specified and set their elements of fastmap. */
2124 while (i_byte < size_byte)
2126 c = str[i_byte++];
2127 fastmap[syntax_spec_code[c]] = 1;
2130 /* If ^ was the first character, complement the fastmap. */
2131 if (negate)
2132 for (i = 0; i < sizeof fastmap; i++)
2133 fastmap[i] ^= 1;
2136 ptrdiff_t start_point = PT;
2137 ptrdiff_t pos = PT;
2138 ptrdiff_t pos_byte = PT_BYTE;
2139 unsigned char *p, *endp, *stop;
2141 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2143 if (forwardp)
2145 while (true)
2147 p = BYTE_POS_ADDR (pos_byte);
2148 endp = XINT (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2149 stop = pos < GPT && GPT < XINT (lim) ? GPT_ADDR : endp;
2153 int nbytes;
2155 if (p >= stop)
2157 if (p >= endp)
2158 goto done;
2159 p = GAP_END_ADDR;
2160 stop = endp;
2162 if (multibyte)
2163 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2164 else
2165 c = *p, nbytes = 1;
2166 if (! fastmap[SYNTAX (c)])
2167 goto done;
2168 p += nbytes, pos++, pos_byte += nbytes;
2169 rarely_quit (pos);
2171 while (!parse_sexp_lookup_properties
2172 || pos < gl_state.e_property);
2174 update_syntax_table_forward (pos + gl_state.offset,
2175 false, gl_state.object);
2178 else
2180 p = BYTE_POS_ADDR (pos_byte);
2181 endp = CHAR_POS_ADDR (XINT (lim));
2182 stop = pos >= GPT && GPT > XINT (lim) ? GAP_END_ADDR : endp;
2184 if (multibyte)
2186 while (true)
2188 if (p <= stop)
2190 if (p <= endp)
2191 break;
2192 p = GPT_ADDR;
2193 stop = endp;
2195 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2197 unsigned char *prev_p = p;
2199 p--;
2200 while (stop <= p && ! CHAR_HEAD_P (*p));
2202 c = STRING_CHAR (p);
2203 if (! fastmap[SYNTAX (c)])
2204 break;
2205 pos--, pos_byte -= prev_p - p;
2206 rarely_quit (pos);
2209 else
2211 while (true)
2213 if (p <= stop)
2215 if (p <= endp)
2216 break;
2217 p = GPT_ADDR;
2218 stop = endp;
2220 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2221 if (! fastmap[SYNTAX (p[-1])])
2222 break;
2223 p--, pos--, pos_byte--;
2224 rarely_quit (pos);
2229 done:
2230 SET_PT_BOTH (pos, pos_byte);
2232 return make_number (PT - start_point);
2236 /* Return true if character C belongs to one of the ISO classes
2237 in the list ISO_CLASSES. Each class is represented by an
2238 integer which is its type according to re_wctype. */
2240 static bool
2241 in_classes (int c, Lisp_Object iso_classes)
2243 bool fits_class = 0;
2245 while (CONSP (iso_classes))
2247 Lisp_Object elt;
2248 elt = XCAR (iso_classes);
2249 iso_classes = XCDR (iso_classes);
2251 if (re_iswctype (c, XFASTINT (elt)))
2252 fits_class = 1;
2255 return fits_class;
2258 /* Jump over a comment, assuming we are at the beginning of one.
2259 FROM is the current position.
2260 FROM_BYTE is the bytepos corresponding to FROM.
2261 Do not move past STOP (a charpos).
2262 The comment over which we have to jump is of style STYLE
2263 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2264 NESTING should be positive to indicate the nesting at the beginning
2265 for nested comments and should be zero or negative else.
2266 ST_COMMENT_STYLE cannot be nested.
2267 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2268 (or 0 If the search cannot start in the middle of a two-character).
2270 If successful, return true and store the charpos of the comment's
2271 end into *CHARPOS_PTR and the corresponding bytepos into
2272 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2273 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2274 current nesting (as defined for state->incomment) in
2275 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2276 comment be a possible first character of a two character construct,
2277 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2278 we store Smax into *last_syntax_ptr.
2280 The comment end is the last character of the comment rather than the
2281 character just after the comment.
2283 Global syntax data is assumed to initially be valid for FROM and
2284 remains valid for forward search starting at the returned position. */
2286 static bool
2287 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2288 EMACS_INT nesting, int style, int prev_syntax,
2289 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2290 EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2292 unsigned short int quit_count = 0;
2293 int c, c1;
2294 enum syntaxcode code;
2295 int syntax, other_syntax;
2297 if (nesting <= 0) nesting = -1;
2299 /* Enter the loop in the middle so that we find
2300 a 2-char comment ender if we start in the middle of it. */
2301 syntax = prev_syntax;
2302 code = syntax & 0xff;
2303 if (syntax != 0 && from < stop) goto forw_incomment;
2305 while (1)
2307 if (from == stop)
2309 *incomment_ptr = nesting;
2310 *charpos_ptr = from;
2311 *bytepos_ptr = from_byte;
2312 *last_syntax_ptr =
2313 (code == Sescape || code == Scharquote
2314 || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2315 || (nesting > 0
2316 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2317 ? syntax : Smax ;
2318 return 0;
2320 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2321 syntax = SYNTAX_WITH_FLAGS (c);
2322 code = syntax & 0xff;
2323 if (code == Sendcomment
2324 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2325 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2326 (nesting > 0 && --nesting == 0) : nesting < 0)
2327 && !(Vcomment_end_can_be_escaped && char_quoted (from, from_byte)))
2328 /* We have encountered a comment end of the same style
2329 as the comment sequence which began this comment
2330 section. */
2331 break;
2332 if (code == Scomment_fence
2333 && style == ST_COMMENT_STYLE)
2334 /* We have encountered a comment end of the same style
2335 as the comment sequence which began this comment
2336 section. */
2337 break;
2338 if (nesting > 0
2339 && code == Scomment
2340 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2341 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2342 /* We have encountered a nested comment of the same style
2343 as the comment sequence which began this comment section. */
2344 nesting++;
2345 INC_BOTH (from, from_byte);
2346 UPDATE_SYNTAX_TABLE_FORWARD (from);
2348 forw_incomment:
2349 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2350 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2351 other_syntax = SYNTAX_WITH_FLAGS (c1),
2352 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2353 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2354 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2355 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2356 ? nesting > 0 : nesting < 0))
2358 syntax = Smax; /* So that "|#" (lisp) can not return
2359 the syntax of "#" in *last_syntax_ptr. */
2360 if (--nesting <= 0)
2361 /* We have encountered a comment end of the same style
2362 as the comment sequence which began this comment section. */
2363 break;
2364 else
2366 INC_BOTH (from, from_byte);
2367 UPDATE_SYNTAX_TABLE_FORWARD (from);
2370 if (nesting > 0
2371 && from < stop
2372 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2373 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2374 other_syntax = SYNTAX_WITH_FLAGS (c1),
2375 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2376 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2377 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2378 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2379 /* We have encountered a nested comment of the same style
2380 as the comment sequence which began this comment section. */
2382 syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2383 INC_BOTH (from, from_byte);
2384 UPDATE_SYNTAX_TABLE_FORWARD (from);
2385 nesting++;
2388 rarely_quit (++quit_count);
2390 *charpos_ptr = from;
2391 *bytepos_ptr = from_byte;
2392 *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2393 used up. */
2394 return 1;
2397 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2398 doc: /*
2399 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2400 Stop scanning if we find something other than a comment or whitespace.
2401 Set point to where scanning stops.
2402 If COUNT comments are found as expected, with nothing except whitespace
2403 between them, return t; otherwise return nil. */)
2404 (Lisp_Object count)
2406 ptrdiff_t from, from_byte, stop;
2407 int c, c1;
2408 enum syntaxcode code;
2409 int comstyle = 0; /* style of comment encountered */
2410 bool comnested = 0; /* whether the comment is nestable or not */
2411 bool found;
2412 EMACS_INT count1;
2413 ptrdiff_t out_charpos, out_bytepos;
2414 EMACS_INT dummy;
2415 int dummy2;
2416 unsigned short int quit_count = 0;
2418 CHECK_NUMBER (count);
2419 count1 = XINT (count);
2420 stop = count1 > 0 ? ZV : BEGV;
2422 from = PT;
2423 from_byte = PT_BYTE;
2425 SETUP_SYNTAX_TABLE (from, count1);
2426 while (count1 > 0)
2430 bool comstart_first;
2431 int syntax, other_syntax;
2433 if (from == stop)
2435 SET_PT_BOTH (from, from_byte);
2436 return Qnil;
2438 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2439 syntax = SYNTAX_WITH_FLAGS (c);
2440 code = SYNTAX (c);
2441 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2442 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2443 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2444 INC_BOTH (from, from_byte);
2445 UPDATE_SYNTAX_TABLE_FORWARD (from);
2446 if (from < stop && comstart_first
2447 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2448 other_syntax = SYNTAX_WITH_FLAGS (c1),
2449 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2451 /* We have encountered a comment start sequence and we
2452 are ignoring all text inside comments. We must record
2453 the comment style this sequence begins so that later,
2454 only a comment end of the same style actually ends
2455 the comment section. */
2456 code = Scomment;
2457 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2458 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2459 INC_BOTH (from, from_byte);
2460 UPDATE_SYNTAX_TABLE_FORWARD (from);
2462 rarely_quit (++quit_count);
2464 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2466 if (code == Scomment_fence)
2467 comstyle = ST_COMMENT_STYLE;
2468 else if (code != Scomment)
2470 DEC_BOTH (from, from_byte);
2471 SET_PT_BOTH (from, from_byte);
2472 return Qnil;
2474 /* We're at the start of a comment. */
2475 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2476 &out_charpos, &out_bytepos, &dummy, &dummy2);
2477 from = out_charpos; from_byte = out_bytepos;
2478 if (!found)
2480 SET_PT_BOTH (from, from_byte);
2481 return Qnil;
2483 INC_BOTH (from, from_byte);
2484 UPDATE_SYNTAX_TABLE_FORWARD (from);
2485 /* We have skipped one comment. */
2486 count1--;
2489 while (count1 < 0)
2491 while (true)
2493 if (from <= stop)
2495 SET_PT_BOTH (BEGV, BEGV_BYTE);
2496 return Qnil;
2499 DEC_BOTH (from, from_byte);
2500 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2501 bool quoted = char_quoted (from, from_byte);
2502 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2503 int syntax = SYNTAX_WITH_FLAGS (c);
2504 code = SYNTAX (c);
2505 comstyle = 0;
2506 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2507 if (code == Sendcomment)
2508 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2509 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2510 && prev_char_comend_first (from, from_byte)
2511 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2513 int other_syntax;
2514 /* We must record the comment style encountered so that
2515 later, we can match only the proper comment begin
2516 sequence of the same style. */
2517 DEC_BOTH (from, from_byte);
2518 code = Sendcomment;
2519 /* Calling char_quoted, above, set up global syntax position
2520 at the new value of FROM. */
2521 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2522 other_syntax = SYNTAX_WITH_FLAGS (c1);
2523 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2524 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2527 if (code == Scomment_fence)
2529 /* Skip until first preceding unquoted comment_fence. */
2530 bool fence_found = 0;
2531 ptrdiff_t ini = from, ini_byte = from_byte;
2533 while (1)
2535 DEC_BOTH (from, from_byte);
2536 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2537 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2538 if (SYNTAX (c) == Scomment_fence
2539 && !char_quoted (from, from_byte))
2541 fence_found = 1;
2542 break;
2544 else if (from == stop)
2545 break;
2546 rarely_quit (++quit_count);
2548 if (fence_found == 0)
2550 from = ini; /* Set point to ini + 1. */
2551 from_byte = ini_byte;
2552 goto leave;
2554 else
2555 /* We have skipped one comment. */
2556 break;
2558 else if (code == Sendcomment)
2560 found = back_comment (from, from_byte, stop, comnested, comstyle,
2561 &out_charpos, &out_bytepos);
2562 if (!found)
2564 if (c == '\n')
2565 /* This end-of-line is not an end-of-comment.
2566 Treat it like a whitespace.
2567 CC-mode (and maybe others) relies on this behavior. */
2569 else
2571 /* Failure: we should go back to the end of this
2572 not-quite-endcomment. */
2573 if (SYNTAX (c) != code)
2574 /* It was a two-char Sendcomment. */
2575 INC_BOTH (from, from_byte);
2576 goto leave;
2579 else
2581 /* We have skipped one comment. */
2582 from = out_charpos, from_byte = out_bytepos;
2583 break;
2586 else if (code != Swhitespace || quoted)
2588 leave:
2589 INC_BOTH (from, from_byte);
2590 SET_PT_BOTH (from, from_byte);
2591 return Qnil;
2594 rarely_quit (++quit_count);
2597 count1++;
2600 SET_PT_BOTH (from, from_byte);
2601 return Qt;
2604 /* Return syntax code of character C if C is an ASCII character
2605 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2607 static enum syntaxcode
2608 syntax_multibyte (int c, bool multibyte_symbol_p)
2610 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2613 static Lisp_Object
2614 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2616 Lisp_Object val;
2617 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2618 int c, c1;
2619 int stringterm;
2620 bool quoted;
2621 bool mathexit = 0;
2622 enum syntaxcode code;
2623 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2624 int comstyle = 0; /* Style of comment encountered. */
2625 bool comnested = 0; /* Whether the comment is nestable or not. */
2626 ptrdiff_t temp_pos;
2627 EMACS_INT last_good = from;
2628 bool found;
2629 ptrdiff_t from_byte;
2630 ptrdiff_t out_bytepos, out_charpos;
2631 EMACS_INT dummy;
2632 int dummy2;
2633 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2634 unsigned short int quit_count = 0;
2636 if (depth > 0) min_depth = 0;
2638 if (from > ZV) from = ZV;
2639 if (from < BEGV) from = BEGV;
2641 from_byte = CHAR_TO_BYTE (from);
2643 maybe_quit ();
2645 SETUP_SYNTAX_TABLE (from, count);
2646 while (count > 0)
2648 while (from < stop)
2650 rarely_quit (++quit_count);
2651 bool comstart_first, prefix;
2652 int syntax, other_syntax;
2653 UPDATE_SYNTAX_TABLE_FORWARD (from);
2654 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2655 syntax = SYNTAX_WITH_FLAGS (c);
2656 code = syntax_multibyte (c, multibyte_symbol_p);
2657 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2658 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2659 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2660 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2661 if (depth == min_depth)
2662 last_good = from;
2663 INC_BOTH (from, from_byte);
2664 UPDATE_SYNTAX_TABLE_FORWARD (from);
2665 if (from < stop && comstart_first
2666 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2667 other_syntax = SYNTAX_WITH_FLAGS (c),
2668 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2669 && parse_sexp_ignore_comments)
2671 /* We have encountered a comment start sequence and we
2672 are ignoring all text inside comments. We must record
2673 the comment style this sequence begins so that later,
2674 only a comment end of the same style actually ends
2675 the comment section. */
2676 code = Scomment;
2677 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2678 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2679 INC_BOTH (from, from_byte);
2680 UPDATE_SYNTAX_TABLE_FORWARD (from);
2683 if (prefix)
2684 continue;
2686 switch (code)
2688 case Sescape:
2689 case Scharquote:
2690 if (from == stop)
2691 goto lose;
2692 INC_BOTH (from, from_byte);
2693 /* Treat following character as a word constituent. */
2694 FALLTHROUGH;
2695 case Sword:
2696 case Ssymbol:
2697 if (depth || !sexpflag) break;
2698 /* This word counts as a sexp; return at end of it. */
2699 while (from < stop)
2701 UPDATE_SYNTAX_TABLE_FORWARD (from);
2703 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2704 switch (syntax_multibyte (c, multibyte_symbol_p))
2706 case Scharquote:
2707 case Sescape:
2708 INC_BOTH (from, from_byte);
2709 if (from == stop)
2710 goto lose;
2711 break;
2712 case Sword:
2713 case Ssymbol:
2714 case Squote:
2715 break;
2716 default:
2717 goto done;
2719 INC_BOTH (from, from_byte);
2720 rarely_quit (++quit_count);
2722 goto done;
2724 case Scomment_fence:
2725 comstyle = ST_COMMENT_STYLE;
2726 FALLTHROUGH;
2727 case Scomment:
2728 if (!parse_sexp_ignore_comments) break;
2729 UPDATE_SYNTAX_TABLE_FORWARD (from);
2730 found = forw_comment (from, from_byte, stop,
2731 comnested, comstyle, 0,
2732 &out_charpos, &out_bytepos, &dummy,
2733 &dummy2);
2734 from = out_charpos, from_byte = out_bytepos;
2735 if (!found)
2737 if (depth == 0)
2738 goto done;
2739 goto lose;
2741 INC_BOTH (from, from_byte);
2742 UPDATE_SYNTAX_TABLE_FORWARD (from);
2743 break;
2745 case Smath:
2746 if (!sexpflag)
2747 break;
2748 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2750 INC_BOTH (from, from_byte);
2752 if (mathexit)
2754 mathexit = 0;
2755 goto close1;
2757 mathexit = 1;
2758 FALLTHROUGH;
2759 case Sopen:
2760 if (!++depth) goto done;
2761 break;
2763 case Sclose:
2764 close1:
2765 if (!--depth) goto done;
2766 if (depth < min_depth)
2767 xsignal3 (Qscan_error,
2768 build_string ("Containing expression ends prematurely"),
2769 make_number (last_good), make_number (from));
2770 break;
2772 case Sstring:
2773 case Sstring_fence:
2774 temp_pos = dec_bytepos (from_byte);
2775 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2776 while (1)
2778 enum syntaxcode c_code;
2779 if (from >= stop)
2780 goto lose;
2781 UPDATE_SYNTAX_TABLE_FORWARD (from);
2782 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2783 c_code = syntax_multibyte (c, multibyte_symbol_p);
2784 if (code == Sstring
2785 ? c == stringterm && c_code == Sstring
2786 : c_code == Sstring_fence)
2787 break;
2789 if (c_code == Scharquote || c_code == Sescape)
2790 INC_BOTH (from, from_byte);
2791 INC_BOTH (from, from_byte);
2792 rarely_quit (++quit_count);
2794 INC_BOTH (from, from_byte);
2795 if (!depth && sexpflag) goto done;
2796 break;
2797 default:
2798 /* Ignore whitespace, punctuation, quote, endcomment. */
2799 break;
2803 /* Reached end of buffer. Error if within object, return nil if between */
2804 if (depth)
2805 goto lose;
2807 return Qnil;
2809 /* End of object reached */
2810 done:
2811 count--;
2814 while (count < 0)
2816 while (from > stop)
2818 rarely_quit (++quit_count);
2819 DEC_BOTH (from, from_byte);
2820 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2821 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2822 int syntax = SYNTAX_WITH_FLAGS (c);
2823 code = syntax_multibyte (c, multibyte_symbol_p);
2824 if (depth == min_depth)
2825 last_good = from;
2826 comstyle = 0;
2827 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2828 if (code == Sendcomment)
2829 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2830 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2831 && prev_char_comend_first (from, from_byte)
2832 && parse_sexp_ignore_comments)
2834 /* We must record the comment style encountered so that
2835 later, we can match only the proper comment begin
2836 sequence of the same style. */
2837 int c2, other_syntax;
2838 DEC_BOTH (from, from_byte);
2839 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2840 code = Sendcomment;
2841 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2842 other_syntax = SYNTAX_WITH_FLAGS (c2);
2843 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2844 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2847 /* Quoting turns anything except a comment-ender
2848 into a word character. Note that this cannot be true
2849 if we decremented FROM in the if-statement above. */
2850 if (code != Sendcomment && char_quoted (from, from_byte))
2852 DEC_BOTH (from, from_byte);
2853 code = Sword;
2855 else if (SYNTAX_FLAGS_PREFIX (syntax))
2856 continue;
2858 switch (code)
2860 case Sword:
2861 case Ssymbol:
2862 case Sescape:
2863 case Scharquote:
2864 if (depth || !sexpflag) break;
2865 /* This word counts as a sexp; count object finished
2866 after passing it. */
2867 while (from > stop)
2869 temp_pos = from_byte;
2870 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2871 DEC_POS (temp_pos);
2872 else
2873 temp_pos--;
2874 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2875 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2876 /* Don't allow comment-end to be quoted. */
2877 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2878 goto done2;
2879 quoted = char_quoted (from - 1, temp_pos);
2880 if (quoted)
2882 DEC_BOTH (from, from_byte);
2883 temp_pos = dec_bytepos (temp_pos);
2884 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2886 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2887 if (! quoted)
2888 switch (syntax_multibyte (c1, multibyte_symbol_p))
2890 case Sword: case Ssymbol: case Squote: break;
2891 default: goto done2;
2893 DEC_BOTH (from, from_byte);
2894 rarely_quit (++quit_count);
2896 goto done2;
2898 case Smath:
2899 if (!sexpflag)
2900 break;
2901 if (from > BEGV)
2903 temp_pos = dec_bytepos (from_byte);
2904 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2905 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2906 DEC_BOTH (from, from_byte);
2908 if (mathexit)
2910 mathexit = 0;
2911 goto open2;
2913 mathexit = 1;
2914 FALLTHROUGH;
2915 case Sclose:
2916 if (!++depth) goto done2;
2917 break;
2919 case Sopen:
2920 open2:
2921 if (!--depth) goto done2;
2922 if (depth < min_depth)
2923 xsignal3 (Qscan_error,
2924 build_string ("Containing expression ends prematurely"),
2925 make_number (last_good), make_number (from));
2926 break;
2928 case Sendcomment:
2929 if (!parse_sexp_ignore_comments)
2930 break;
2931 found = back_comment (from, from_byte, stop, comnested, comstyle,
2932 &out_charpos, &out_bytepos);
2933 /* FIXME: if !found, it really wasn't a comment-end.
2934 For single-char Sendcomment, we can't do much about it apart
2935 from skipping the char.
2936 For 2-char endcomments, we could try again, taking both
2937 chars as separate entities, but it's a lot of trouble
2938 for very little gain, so we don't bother either. -sm */
2939 if (found)
2940 from = out_charpos, from_byte = out_bytepos;
2941 break;
2943 case Scomment_fence:
2944 case Sstring_fence:
2945 while (1)
2947 if (from == stop)
2948 goto lose;
2949 DEC_BOTH (from, from_byte);
2950 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2951 if (!char_quoted (from, from_byte))
2953 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2954 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2955 break;
2957 rarely_quit (++quit_count);
2959 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2960 break;
2962 case Sstring:
2963 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2964 while (true)
2966 if (from == stop)
2967 goto lose;
2968 DEC_BOTH (from, from_byte);
2969 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2970 if (!char_quoted (from, from_byte))
2972 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2973 if (c == stringterm
2974 && (syntax_multibyte (c, multibyte_symbol_p)
2975 == Sstring))
2976 break;
2978 rarely_quit (++quit_count);
2980 if (!depth && sexpflag) goto done2;
2981 break;
2982 default:
2983 /* Ignore whitespace, punctuation, quote, endcomment. */
2984 break;
2988 /* Reached start of buffer. Error if within object, return nil if between */
2989 if (depth)
2990 goto lose;
2992 return Qnil;
2994 done2:
2995 count++;
2999 XSETFASTINT (val, from);
3000 return val;
3002 lose:
3003 xsignal3 (Qscan_error,
3004 build_string ("Unbalanced parentheses"),
3005 make_number (last_good), make_number (from));
3008 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3009 doc: /* Scan from character number FROM by COUNT lists.
3010 Scan forward if COUNT is positive, backward if COUNT is negative.
3011 Return the character number of the position thus found.
3013 A \"list", in this context, refers to a balanced parenthetical
3014 grouping, as determined by the syntax table.
3016 If DEPTH is nonzero, treat that as the nesting depth of the starting
3017 point (i.e. the starting point is DEPTH parentheses deep). This
3018 function scans over parentheses until the depth goes to zero COUNT
3019 times. Hence, positive DEPTH moves out that number of levels of
3020 parentheses, while negative DEPTH moves to a deeper level.
3022 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3024 If we reach the beginning or end of the accessible part of the buffer
3025 before we have scanned over COUNT lists, return nil if the depth at
3026 that point is zero, and signal a error if the depth is nonzero. */)
3027 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3029 CHECK_NUMBER (from);
3030 CHECK_NUMBER (count);
3031 CHECK_NUMBER (depth);
3033 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
3036 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3037 doc: /* Scan from character number FROM by COUNT balanced expressions.
3038 If COUNT is negative, scan backwards.
3039 Returns the character number of the position thus found.
3041 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3043 If the beginning or end of (the accessible part of) the buffer is reached
3044 in the middle of a parenthetical grouping, an error is signaled.
3045 If the beginning or end is reached between groupings
3046 but before count is used up, nil is returned. */)
3047 (Lisp_Object from, Lisp_Object count)
3049 CHECK_NUMBER (from);
3050 CHECK_NUMBER (count);
3052 return scan_lists (XINT (from), XINT (count), 0, 1);
3055 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3056 0, 0, 0,
3057 doc: /* Move point backward over any number of chars with prefix syntax.
3058 This includes chars with expression prefix syntax class (\\=') and those with
3059 the prefix syntax flag (p). */)
3060 (void)
3062 ptrdiff_t beg = BEGV;
3063 ptrdiff_t opoint = PT;
3064 ptrdiff_t opoint_byte = PT_BYTE;
3065 ptrdiff_t pos = PT;
3066 ptrdiff_t pos_byte = PT_BYTE;
3067 int c;
3069 if (pos <= beg)
3071 SET_PT_BOTH (opoint, opoint_byte);
3073 return Qnil;
3076 SETUP_SYNTAX_TABLE (pos, -1);
3078 DEC_BOTH (pos, pos_byte);
3080 while (!char_quoted (pos, pos_byte)
3081 /* Previous statement updates syntax table. */
3082 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3083 || syntax_prefix_flag_p (c)))
3085 opoint = pos;
3086 opoint_byte = pos_byte;
3088 if (pos <= beg)
3089 break;
3090 DEC_BOTH (pos, pos_byte);
3091 rarely_quit (pos);
3094 SET_PT_BOTH (opoint, opoint_byte);
3096 return Qnil;
3100 /* If the character at FROM_BYTE is the second part of a 2-character
3101 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3102 true. */
3103 static bool
3104 in_2char_comment_start (struct lisp_parse_state *state,
3105 int prev_from_syntax,
3106 ptrdiff_t prev_from,
3107 ptrdiff_t from_byte)
3109 int c1, syntax;
3110 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3111 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
3112 syntax = SYNTAX_WITH_FLAGS (c1),
3113 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3115 /* Record the comment style we have entered so that only
3116 the comment-end sequence of the same style actually
3117 terminates the comment section. */
3118 state->comstyle
3119 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3120 bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3121 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3122 state->incomment = comnested ? 1 : -1;
3123 state->comstr_start = prev_from;
3124 return true;
3126 return false;
3129 /* Parse forward from FROM / FROM_BYTE to END,
3130 assuming that FROM has state STATE,
3131 and return a description of the state of the parse at END.
3132 If STOPBEFORE, stop at the start of an atom.
3133 If COMMENTSTOP is 1, stop at the start of a comment.
3134 If COMMENTSTOP is -1, stop at the start or end of a comment,
3135 after the beginning of a string, or after the end of a string. */
3137 static void
3138 scan_sexps_forward (struct lisp_parse_state *state,
3139 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3140 EMACS_INT targetdepth, bool stopbefore,
3141 int commentstop)
3143 enum syntaxcode code;
3144 struct level { ptrdiff_t last, prev; };
3145 struct level levelstart[100];
3146 struct level *curlevel = levelstart;
3147 struct level *endlevel = levelstart + 100;
3148 EMACS_INT depth; /* Paren depth of current scanning location.
3149 level - levelstart equals this except
3150 when the depth becomes negative. */
3151 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3152 bool start_quoted = 0; /* True means starting after a char quote. */
3153 Lisp_Object tem;
3154 ptrdiff_t prev_from; /* Keep one character before FROM. */
3155 ptrdiff_t prev_from_byte;
3156 int prev_from_syntax, prev_prev_from_syntax;
3157 bool boundary_stop = commentstop == -1;
3158 bool nofence;
3159 bool found;
3160 ptrdiff_t out_bytepos, out_charpos;
3161 int temp;
3162 unsigned short int quit_count = 0;
3164 prev_from = from;
3165 prev_from_byte = from_byte;
3166 if (from != BEGV)
3167 DEC_BOTH (prev_from, prev_from_byte);
3169 /* Use this macro instead of `from++'. */
3170 #define INC_FROM \
3171 do { prev_from = from; \
3172 prev_from_byte = from_byte; \
3173 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3174 prev_prev_from_syntax = prev_from_syntax; \
3175 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3176 INC_BOTH (from, from_byte); \
3177 if (from < end) \
3178 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3179 } while (0)
3181 maybe_quit ();
3183 depth = state->depth;
3184 start_quoted = state->quoted;
3185 prev_prev_from_syntax = Smax;
3186 prev_from_syntax = state->prev_syntax;
3188 tem = state->levelstarts;
3189 while (!NILP (tem)) /* >= second enclosing sexps. */
3191 Lisp_Object temhd = Fcar (tem);
3192 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3193 curlevel->last = XINT (temhd);
3194 if (++curlevel == endlevel)
3195 curlevel--; /* error ("Nesting too deep for parser"); */
3196 curlevel->prev = -1;
3197 curlevel->last = -1;
3198 tem = Fcdr (tem);
3200 curlevel->prev = -1;
3201 curlevel->last = -1;
3203 state->quoted = 0;
3204 mindepth = depth;
3206 SETUP_SYNTAX_TABLE (from, 1);
3208 /* Enter the loop at a place appropriate for initial state. */
3210 if (state->incomment)
3211 goto startincomment;
3212 if (state->instring >= 0)
3214 nofence = state->instring != ST_STRING_STYLE;
3215 if (start_quoted)
3216 goto startquotedinstring;
3217 goto startinstring;
3219 else if (start_quoted)
3220 goto startquoted;
3221 else if ((from < end)
3222 && (in_2char_comment_start (state, prev_from_syntax,
3223 prev_from, from_byte)))
3225 INC_FROM;
3226 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3227 goto atcomment;
3230 while (from < end)
3232 rarely_quit (++quit_count);
3233 INC_FROM;
3235 if ((from < end)
3236 && (in_2char_comment_start (state, prev_from_syntax,
3237 prev_from, from_byte)))
3239 INC_FROM;
3240 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3241 goto atcomment;
3244 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3245 continue;
3246 code = prev_from_syntax & 0xff;
3247 switch (code)
3249 case Sescape:
3250 case Scharquote:
3251 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3252 curlevel->last = prev_from;
3253 startquoted:
3254 if (from == end) goto endquoted;
3255 INC_FROM;
3256 goto symstarted;
3257 /* treat following character as a word constituent */
3258 case Sword:
3259 case Ssymbol:
3260 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3261 curlevel->last = prev_from;
3262 symstarted:
3263 while (from < end)
3265 if (in_2char_comment_start (state, prev_from_syntax,
3266 prev_from, from_byte))
3268 INC_FROM;
3269 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3270 goto atcomment;
3273 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3274 switch (SYNTAX (symchar))
3276 case Scharquote:
3277 case Sescape:
3278 INC_FROM;
3279 if (from == end) goto endquoted;
3280 break;
3281 case Sword:
3282 case Ssymbol:
3283 case Squote:
3284 break;
3285 default:
3286 goto symdone;
3288 INC_FROM;
3289 rarely_quit (++quit_count);
3291 symdone:
3292 curlevel->prev = curlevel->last;
3293 break;
3295 case Scomment_fence:
3296 /* Record the comment style we have entered so that only
3297 the comment-end sequence of the same style actually
3298 terminates the comment section. */
3299 state->comstyle = ST_COMMENT_STYLE;
3300 state->incomment = -1;
3301 state->comstr_start = prev_from;
3302 goto atcomment;
3303 case Scomment:
3304 state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3305 state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3306 1 : -1);
3307 state->comstr_start = prev_from;
3308 atcomment:
3309 if (commentstop || boundary_stop) goto done;
3310 startincomment:
3311 /* The (from == BEGV) test was to enter the loop in the middle so
3312 that we find a 2-char comment ender even if we start in the
3313 middle of it. We don't want to do that if we're just at the
3314 beginning of the comment (think of (*) ... (*)). */
3315 found = forw_comment (from, from_byte, end,
3316 state->incomment, state->comstyle,
3317 from == BEGV ? 0 : prev_from_syntax,
3318 &out_charpos, &out_bytepos, &state->incomment,
3319 &prev_from_syntax);
3320 from = out_charpos; from_byte = out_bytepos;
3321 /* Beware! prev_from and friends (except prev_from_syntax)
3322 are invalid now. Luckily, the `done' doesn't use them
3323 and the INC_FROM sets them to a sane value without
3324 looking at them. */
3325 if (!found) goto done;
3326 INC_FROM;
3327 state->incomment = 0;
3328 state->comstyle = 0; /* reset the comment style */
3329 prev_from_syntax = Smax; /* For the comment closer */
3330 if (boundary_stop) goto done;
3331 break;
3333 case Sopen:
3334 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3335 depth++;
3336 /* curlevel++->last ran into compiler bug on Apollo */
3337 curlevel->last = prev_from;
3338 if (++curlevel == endlevel)
3339 curlevel--; /* error ("Nesting too deep for parser"); */
3340 curlevel->prev = -1;
3341 curlevel->last = -1;
3342 if (targetdepth == depth) goto done;
3343 break;
3345 case Sclose:
3346 depth--;
3347 if (depth < mindepth)
3348 mindepth = depth;
3349 if (curlevel != levelstart)
3350 curlevel--;
3351 curlevel->prev = curlevel->last;
3352 if (targetdepth == depth) goto done;
3353 break;
3355 case Sstring:
3356 case Sstring_fence:
3357 state->comstr_start = from - 1;
3358 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3359 curlevel->last = prev_from;
3360 state->instring = (code == Sstring
3361 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3362 : ST_STRING_STYLE);
3363 if (boundary_stop) goto done;
3364 startinstring:
3366 nofence = state->instring != ST_STRING_STYLE;
3368 while (1)
3370 int c;
3371 enum syntaxcode c_code;
3373 if (from >= end) goto done;
3374 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3375 c_code = SYNTAX (c);
3377 /* Check C_CODE here so that if the char has
3378 a syntax-table property which says it is NOT
3379 a string character, it does not end the string. */
3380 if (nofence && c == state->instring && c_code == Sstring)
3381 break;
3383 switch (c_code)
3385 case Sstring_fence:
3386 if (!nofence) goto string_end;
3387 break;
3389 case Scharquote:
3390 case Sescape:
3391 INC_FROM;
3392 startquotedinstring:
3393 if (from >= end) goto endquoted;
3394 break;
3396 default:
3397 break;
3399 INC_FROM;
3400 rarely_quit (++quit_count);
3403 string_end:
3404 state->instring = -1;
3405 curlevel->prev = curlevel->last;
3406 INC_FROM;
3407 if (boundary_stop) goto done;
3408 break;
3410 case Smath:
3411 /* FIXME: We should do something with it. */
3412 break;
3413 default:
3414 /* Ignore whitespace, punctuation, quote, endcomment. */
3415 break;
3418 goto done;
3420 stop: /* Here if stopping before start of sexp. */
3421 from = prev_from; /* We have just fetched the char that starts it; */
3422 from_byte = prev_from_byte;
3423 prev_from_syntax = prev_prev_from_syntax;
3424 goto done; /* but return the position before it. */
3426 endquoted:
3427 state->quoted = 1;
3428 done:
3429 state->depth = depth;
3430 state->mindepth = mindepth;
3431 state->thislevelstart = curlevel->prev;
3432 state->prevlevelstart
3433 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3434 state->location = from;
3435 state->location_byte = from_byte;
3436 state->levelstarts = Qnil;
3437 while (curlevel > levelstart)
3438 state->levelstarts = Fcons (make_number ((--curlevel)->last),
3439 state->levelstarts);
3440 state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3441 || state->quoted) ? prev_from_syntax : Smax;
3444 /* Convert a (lisp) parse state to the internal form used in
3445 scan_sexps_forward. */
3446 static void
3447 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3449 Lisp_Object tem;
3451 if (NILP (external))
3453 state->depth = 0;
3454 state->instring = -1;
3455 state->incomment = 0;
3456 state->quoted = 0;
3457 state->comstyle = 0; /* comment style a by default. */
3458 state->comstr_start = -1; /* no comment/string seen. */
3459 state->levelstarts = Qnil;
3460 state->prev_syntax = Smax;
3462 else
3464 tem = Fcar (external);
3465 if (!NILP (tem))
3466 state->depth = XINT (tem);
3467 else
3468 state->depth = 0;
3470 external = Fcdr (external);
3471 external = Fcdr (external);
3472 external = Fcdr (external);
3473 tem = Fcar (external);
3474 /* Check whether we are inside string_fence-style string: */
3475 state->instring = (!NILP (tem)
3476 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3477 : -1);
3479 external = Fcdr (external);
3480 tem = Fcar (external);
3481 state->incomment = (!NILP (tem)
3482 ? (INTEGERP (tem) ? XINT (tem) : -1)
3483 : 0);
3485 external = Fcdr (external);
3486 tem = Fcar (external);
3487 state->quoted = !NILP (tem);
3489 /* if the eighth element of the list is nil, we are in comment
3490 style a. If it is non-nil, we are in comment style b */
3491 external = Fcdr (external);
3492 external = Fcdr (external);
3493 tem = Fcar (external);
3494 state->comstyle = (NILP (tem)
3496 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3497 ? XINT (tem)
3498 : ST_COMMENT_STYLE));
3500 external = Fcdr (external);
3501 tem = Fcar (external);
3502 state->comstr_start =
3503 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3504 external = Fcdr (external);
3505 tem = Fcar (external);
3506 state->levelstarts = tem;
3508 external = Fcdr (external);
3509 tem = Fcar (external);
3510 state->prev_syntax = NILP (tem) ? Smax : XINT (tem);
3514 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3515 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3516 Parsing stops at TO or when certain criteria are met;
3517 point is set to where parsing stops.
3518 If fifth arg OLDSTATE is omitted or nil,
3519 parsing assumes that FROM is the beginning of a function.
3521 Value is a list of elements describing final state of parsing:
3522 0. depth in parens.
3523 1. character address of start of innermost containing list; nil if none.
3524 2. character address of start of last complete sexp terminated.
3525 3. non-nil if inside a string.
3526 (it is the character that will terminate the string,
3527 or t if the string should be terminated by a generic string delimiter.)
3528 4. nil if outside a comment, t if inside a non-nestable comment,
3529 else an integer (the current comment nesting).
3530 5. t if following a quote character.
3531 6. the minimum paren-depth encountered during this scan.
3532 7. style of comment, if any.
3533 8. character address of start of comment or string; nil if not in one.
3534 9. List of positions of currently open parens, outermost first.
3535 10. When the last position scanned holds the first character of a
3536 (potential) two character construct, the syntax of that position,
3537 otherwise nil. That construct can be a two character comment
3538 delimiter or an Escaped or Char-quoted character.
3539 11..... Possible further internal information used by `parse-partial-sexp'.
3541 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3542 in parentheses becomes equal to TARGETDEPTH.
3543 Fourth arg STOPBEFORE non-nil means stop when we come to
3544 any character that starts a sexp.
3545 Fifth arg OLDSTATE is a list like what this function returns.
3546 It is used to initialize the state of the parse. Elements number 1, 2, 6
3547 are ignored.
3548 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3549 If it is the symbol `syntax-table', stop after the start of a comment or a
3550 string, or after end of a comment or a string. */)
3551 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3552 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3554 struct lisp_parse_state state;
3555 EMACS_INT target;
3557 if (!NILP (targetdepth))
3559 CHECK_NUMBER (targetdepth);
3560 target = XINT (targetdepth);
3562 else
3563 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth. */
3565 validate_region (&from, &to);
3566 internalize_parse_state (oldstate, &state);
3567 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3568 XINT (to),
3569 target, !NILP (stopbefore),
3570 (NILP (commentstop)
3571 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3573 SET_PT_BOTH (state.location, state.location_byte);
3575 return
3576 Fcons (make_number (state.depth),
3577 Fcons (state.prevlevelstart < 0
3578 ? Qnil : make_number (state.prevlevelstart),
3579 Fcons (state.thislevelstart < 0
3580 ? Qnil : make_number (state.thislevelstart),
3581 Fcons (state.instring >= 0
3582 ? (state.instring == ST_STRING_STYLE
3583 ? Qt : make_number (state.instring)) : Qnil,
3584 Fcons (state.incomment < 0 ? Qt :
3585 (state.incomment == 0 ? Qnil :
3586 make_number (state.incomment)),
3587 Fcons (state.quoted ? Qt : Qnil,
3588 Fcons (make_number (state.mindepth),
3589 Fcons ((state.comstyle
3590 ? (state.comstyle == ST_COMMENT_STYLE
3591 ? Qsyntax_table
3592 : make_number (state.comstyle))
3593 : Qnil),
3594 Fcons (((state.incomment
3595 || (state.instring >= 0))
3596 ? make_number (state.comstr_start)
3597 : Qnil),
3598 Fcons (state.levelstarts,
3599 Fcons (state.prev_syntax == Smax
3600 ? Qnil
3601 : make_number (state.prev_syntax),
3602 Qnil)))))))))));
3605 void
3606 init_syntax_once (void)
3608 register int i, c;
3609 Lisp_Object temp;
3611 /* This has to be done here, before we call Fmake_char_table. */
3612 DEFSYM (Qsyntax_table, "syntax-table");
3614 /* Create objects which can be shared among syntax tables. */
3615 Vsyntax_code_object = make_uninit_vector (Smax);
3616 for (i = 0; i < Smax; i++)
3617 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3619 /* Now we are ready to set up this property, so we can
3620 create syntax tables. */
3621 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3623 temp = AREF (Vsyntax_code_object, Swhitespace);
3625 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3627 /* Control characters should not be whitespace. */
3628 temp = AREF (Vsyntax_code_object, Spunct);
3629 for (i = 0; i <= ' ' - 1; i++)
3630 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3631 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3633 /* Except that a few really are whitespace. */
3634 temp = AREF (Vsyntax_code_object, Swhitespace);
3635 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3636 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3637 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3638 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3639 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3641 temp = AREF (Vsyntax_code_object, Sword);
3642 for (i = 'a'; i <= 'z'; i++)
3643 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3644 for (i = 'A'; i <= 'Z'; i++)
3645 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3646 for (i = '0'; i <= '9'; i++)
3647 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3649 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3650 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3652 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3653 Fcons (make_number (Sopen), make_number (')')));
3654 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3655 Fcons (make_number (Sclose), make_number ('(')));
3656 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3657 Fcons (make_number (Sopen), make_number (']')));
3658 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3659 Fcons (make_number (Sclose), make_number ('[')));
3660 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3661 Fcons (make_number (Sopen), make_number ('}')));
3662 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3663 Fcons (make_number (Sclose), make_number ('{')));
3664 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3665 Fcons (make_number (Sstring), Qnil));
3666 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3667 Fcons (make_number (Sescape), Qnil));
3669 temp = AREF (Vsyntax_code_object, Ssymbol);
3670 for (i = 0; i < 10; i++)
3672 c = "_-+*/&|<>="[i];
3673 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3676 temp = AREF (Vsyntax_code_object, Spunct);
3677 for (i = 0; i < 12; i++)
3679 c = ".,;:?!#@~^'`"[i];
3680 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3683 /* All multibyte characters have syntax `word' by default. */
3684 temp = AREF (Vsyntax_code_object, Sword);
3685 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3688 void
3689 syms_of_syntax (void)
3691 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3693 staticpro (&Vsyntax_code_object);
3695 staticpro (&gl_state.object);
3696 staticpro (&gl_state.global_code);
3697 staticpro (&gl_state.current_syntax_table);
3698 staticpro (&gl_state.old_prop);
3700 /* Defined in regex.c. */
3701 staticpro (&re_match_object);
3703 DEFSYM (Qscan_error, "scan-error");
3704 Fput (Qscan_error, Qerror_conditions,
3705 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3706 Fput (Qscan_error, Qerror_message,
3707 build_pure_c_string ("Scan error"));
3709 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3710 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3712 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3713 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3714 Otherwise, that text property is simply ignored.
3715 See the info node `(elisp)Syntax Properties' for a description of the
3716 `syntax-table' property. */);
3718 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3719 doc: /* Position up to which syntax-table properties have been set. */);
3720 syntax_propertize__done = -1;
3721 DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3722 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3724 words_include_escapes = 0;
3725 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3726 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3728 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3729 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3730 multibyte_syntax_as_symbol = 0;
3732 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3733 open_paren_in_column_0_is_defun_start,
3734 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3735 open_paren_in_column_0_is_defun_start = 1;
3738 DEFVAR_LISP ("find-word-boundary-function-table",
3739 Vfind_word_boundary_function_table,
3740 doc: /*
3741 Char table of functions to search for the word boundary.
3742 Each function is called with two arguments; POS and LIMIT.
3743 POS and LIMIT are character positions in the current buffer.
3745 If POS is less than LIMIT, POS is at the first character of a word,
3746 and the return value of a function should be a position after the
3747 last character of that word.
3749 If POS is not less than LIMIT, POS is at the last character of a word,
3750 and the return value of a function should be a position at the first
3751 character of that word.
3753 In both cases, LIMIT bounds the search. */);
3754 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3756 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped,
3757 doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
3758 Vcomment_end_can_be_escaped = 0;
3759 DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
3760 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
3762 defsubr (&Ssyntax_table_p);
3763 defsubr (&Ssyntax_table);
3764 defsubr (&Sstandard_syntax_table);
3765 defsubr (&Scopy_syntax_table);
3766 defsubr (&Sset_syntax_table);
3767 defsubr (&Schar_syntax);
3768 defsubr (&Smatching_paren);
3769 defsubr (&Sstring_to_syntax);
3770 defsubr (&Smodify_syntax_entry);
3771 defsubr (&Sinternal_describe_syntax_value);
3773 defsubr (&Sforward_word);
3775 defsubr (&Sskip_chars_forward);
3776 defsubr (&Sskip_chars_backward);
3777 defsubr (&Sskip_syntax_forward);
3778 defsubr (&Sskip_syntax_backward);
3780 defsubr (&Sforward_comment);
3781 defsubr (&Sscan_lists);
3782 defsubr (&Sscan_sexps);
3783 defsubr (&Sbackward_prefix_chars);
3784 defsubr (&Sparse_partial_sexp);