; * lisp/ldefs-boot.el: Update.
[emacs.git] / src / syntax.c
blob3cc32094a8c38d8364e40f3e0cb1bfbef68bf9c0
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2019 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "buffer.h"
26 #include "regex.h"
27 #include "syntax.h"
28 #include "intervals.h"
29 #include "category.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
56 the chars. */
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
61 static bool
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
64 return (flags >> 16) & 1;
66 static bool
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
69 return (flags >> 17) & 1;
71 static bool
72 SYNTAX_FLAGS_COMEND_FIRST (int flags)
74 return (flags >> 18) & 1;
76 static bool
77 SYNTAX_FLAGS_COMEND_SECOND (int flags)
79 return (flags >> 19) & 1;
81 static bool
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
84 return (flags & 0x50000) != 0;
86 static bool
87 SYNTAX_FLAGS_PREFIX (int flags)
89 return (flags >> 20) & 1;
91 static bool
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
94 return (flags >> 21) & 1;
96 static bool
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
99 return (flags >> 23) & 1;
101 static int
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
104 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
106 static bool
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
109 return (flags >> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
114 static int
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
122 /* Extract a particular flag for a given character. */
124 static bool
125 SYNTAX_COMEND_FIRST (int c)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
134 enum
136 ST_COMMENT_STYLE = 256 + 1,
137 ST_STRING_STYLE = 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth; /* Depth at end of parsing. */
145 int instring; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
147 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart;
154 ptrdiff_t location; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte; /* Corresponding byte position. */
156 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos;
174 static ptrdiff_t find_start_value;
175 static ptrdiff_t find_start_value_byte;
176 static struct buffer *find_start_buffer;
177 static ptrdiff_t find_start_begv;
178 static EMACS_INT find_start_modiff;
181 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
182 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
183 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
184 static void scan_sexps_forward (struct lisp_parse_state *,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
186 bool, int);
187 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
188 static bool in_classes (int, Lisp_Object);
189 static void parse_sexp_propertize (ptrdiff_t charpos);
191 /* This setter is used only in this file, so it can be private. */
192 static void
193 bset_syntax_table (struct buffer *b, Lisp_Object val)
195 b->syntax_table_ = val;
198 /* Whether the syntax of the character C has the prefix flag set. */
199 bool
200 syntax_prefix_flag_p (int c)
202 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
205 struct gl_state_s gl_state; /* Global state of syntax parser. */
207 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
208 to scan to property-change. */
210 /* Set the syntax entry VAL for char C in table TABLE. */
212 static void
213 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
215 CHAR_TABLE_SET (table, c, val);
218 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
219 RANGE is a cons (FROM . TO) specifying the range of characters. */
221 static void
222 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
223 Lisp_Object val)
225 Fset_char_table_range (table, range, val);
228 /* Extract the information from the entry for character C
229 in the current syntax table. */
231 static Lisp_Object
232 SYNTAX_MATCH (int c)
234 Lisp_Object ent = SYNTAX_ENTRY (c);
235 return CONSP (ent) ? XCDR (ent) : Qnil;
238 /* This should be called with FROM at the start of forward
239 search, or after the last position of the backward search. It
240 makes sure that the first char is picked up with correct table, so
241 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
242 call.
243 Sign of COUNT gives the direction of the search.
246 static void
247 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
249 SETUP_BUFFER_SYNTAX_TABLE ();
250 gl_state.b_property = BEGV;
251 gl_state.e_property = ZV + 1;
252 gl_state.object = Qnil;
253 gl_state.offset = 0;
254 if (parse_sexp_lookup_properties)
256 if (count > 0)
257 update_syntax_table_forward (from, true, Qnil);
258 else if (from > BEGV)
260 update_syntax_table (from - 1, count, true, Qnil);
261 parse_sexp_propertize (from - 1);
266 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
267 If it is t (which is only used in fast_c_string_match_ignore_case),
268 ignore properties altogether.
270 This is meant for regex.c to use. For buffers, regex.c passes arguments
271 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
272 So if it is a buffer, we set the offset field to BEGV. */
274 void
275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
276 ptrdiff_t from, ptrdiff_t count)
278 SETUP_BUFFER_SYNTAX_TABLE ();
279 gl_state.object = object;
280 if (BUFFERP (gl_state.object))
282 struct buffer *buf = XBUFFER (gl_state.object);
283 gl_state.b_property = 1;
284 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
285 gl_state.offset = BUF_BEGV (buf) - 1;
287 else if (NILP (gl_state.object))
289 gl_state.b_property = 1;
290 gl_state.e_property = ZV - BEGV + 1;
291 gl_state.offset = BEGV - 1;
293 else if (EQ (gl_state.object, Qt))
295 gl_state.b_property = 0;
296 gl_state.e_property = PTRDIFF_MAX;
297 gl_state.offset = 0;
299 else
301 gl_state.b_property = 0;
302 gl_state.e_property = 1 + SCHARS (gl_state.object);
303 gl_state.offset = 0;
305 if (parse_sexp_lookup_properties)
306 update_syntax_table (from + gl_state.offset - (count <= 0),
307 count, 1, gl_state.object);
310 /* Update gl_state to an appropriate interval which contains CHARPOS. The
311 sign of COUNT give the relative position of CHARPOS wrt the previously
312 valid interval. If INIT, only [be]_property fields of gl_state are
313 valid at start, the rest is filled basing on OBJECT.
315 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316 direction than the intervals - or in an interval. We update the
317 current syntax-table basing on the property of this interval, and
318 update the interval to start further than CHARPOS - or be
319 NULL. We also update lim_property to be the next value of
320 charpos to call this subroutine again - or be before/after the
321 start/end of OBJECT. */
323 void
324 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
325 Lisp_Object object)
327 Lisp_Object tmp_table;
328 int cnt = 0;
329 bool invalidate = true;
330 INTERVAL i;
332 if (init)
334 gl_state.old_prop = Qnil;
335 gl_state.start = gl_state.b_property;
336 gl_state.stop = gl_state.e_property;
337 i = interval_of (charpos, object);
338 gl_state.backward_i = gl_state.forward_i = i;
339 invalidate = false;
340 if (!i)
341 return;
342 /* interval_of updates only ->position of the return value, so
343 update the parents manually to speed up update_interval. */
344 while (!NULL_PARENT (i))
346 if (AM_RIGHT_CHILD (i))
347 INTERVAL_PARENT (i)->position = i->position
348 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
349 - TOTAL_LENGTH (INTERVAL_PARENT (i))
350 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
351 else
352 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
353 + TOTAL_LENGTH (i);
354 i = INTERVAL_PARENT (i);
356 i = gl_state.forward_i;
357 gl_state.b_property = i->position - gl_state.offset;
358 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
359 goto update;
361 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
363 /* We are guaranteed to be called with CHARPOS either in i,
364 or further off. */
365 if (!i)
366 error ("Error in syntax_table logic for to-the-end intervals");
367 else if (charpos < i->position) /* Move left. */
369 if (count > 0)
370 error ("Error in syntax_table logic for intervals <-");
371 /* Update the interval. */
372 i = update_interval (i, charpos);
373 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
375 invalidate = false;
376 gl_state.forward_i = i;
377 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
380 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
382 if (count < 0)
383 error ("Error in syntax_table logic for intervals ->");
384 /* Update the interval. */
385 i = update_interval (i, charpos);
386 if (i->position != gl_state.e_property)
388 invalidate = false;
389 gl_state.backward_i = i;
390 gl_state.b_property = i->position - gl_state.offset;
394 update:
395 tmp_table = textget (i->plist, Qsyntax_table);
397 if (invalidate)
398 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
400 if (invalidate) /* Did not get to adjacent interval. */
401 { /* with the same table => */
402 /* invalidate the old range. */
403 if (count > 0)
405 gl_state.backward_i = i;
406 gl_state.b_property = i->position - gl_state.offset;
408 else
410 gl_state.forward_i = i;
411 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
415 if (!EQ (tmp_table, gl_state.old_prop))
417 gl_state.current_syntax_table = tmp_table;
418 gl_state.old_prop = tmp_table;
419 if (EQ (Fsyntax_table_p (tmp_table), Qt))
421 gl_state.use_global = 0;
423 else if (CONSP (tmp_table))
425 gl_state.use_global = 1;
426 gl_state.global_code = tmp_table;
428 else
430 gl_state.use_global = 0;
431 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
435 while (i)
437 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
439 if (count > 0)
441 gl_state.e_property = i->position - gl_state.offset;
442 gl_state.forward_i = i;
444 else
446 gl_state.b_property
447 = i->position + LENGTH (i) - gl_state.offset;
448 gl_state.backward_i = i;
450 return;
452 else if (cnt == INTERVALS_AT_ONCE)
454 if (count > 0)
456 gl_state.e_property
457 = i->position + LENGTH (i) - gl_state.offset
458 /* e_property at EOB is not set to ZV but to ZV+1, so that
459 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
460 having to check eob between the two. */
461 + (next_interval (i) ? 0 : 1);
462 gl_state.forward_i = i;
464 else
466 gl_state.b_property = i->position - gl_state.offset;
467 gl_state.backward_i = i;
469 return;
471 cnt++;
472 i = count > 0 ? next_interval (i) : previous_interval (i);
474 eassert (i == NULL); /* This property goes to the end. */
475 if (count > 0)
477 gl_state.e_property = gl_state.stop;
478 gl_state.forward_i = i;
480 else
481 gl_state.b_property = gl_state.start;
484 static void
485 parse_sexp_propertize (ptrdiff_t charpos)
487 EMACS_INT zv = ZV;
488 if (syntax_propertize__done <= charpos
489 && syntax_propertize__done < zv)
491 EMACS_INT modiffs = CHARS_MODIFF;
492 safe_call1 (Qinternal__syntax_propertize,
493 make_number (min (zv, 1 + charpos)));
494 if (modiffs != CHARS_MODIFF)
495 error ("parse-sexp-propertize-function modified the buffer!");
496 if (syntax_propertize__done <= charpos
497 && syntax_propertize__done < zv)
498 error ("parse-sexp-propertize-function did not move"
499 " syntax-propertize--done");
500 SETUP_SYNTAX_TABLE (charpos, 1);
502 else if (gl_state.e_property > syntax_propertize__done)
504 gl_state.e_property = syntax_propertize__done;
505 gl_state.e_property_truncated = true;
507 else if (gl_state.e_property_truncated
508 && gl_state.e_property < syntax_propertize__done)
509 { /* When moving backward, e_property might be set without resetting
510 e_property_truncated, so the e_property_truncated flag may
511 occasionally be left raised spuriously. This should be rare. */
512 gl_state.e_property_truncated = false;
513 update_syntax_table_forward (charpos, false, Qnil);
517 void
518 update_syntax_table_forward (ptrdiff_t charpos, bool init,
519 Lisp_Object object)
521 if (gl_state.e_property_truncated)
523 eassert (NILP (object));
524 eassert (charpos >= gl_state.e_property);
525 parse_sexp_propertize (charpos);
527 else
529 update_syntax_table (charpos, 1, init, object);
530 if (NILP (object) && gl_state.e_property > syntax_propertize__done)
531 parse_sexp_propertize (charpos);
535 /* Returns true if char at CHARPOS is quoted.
536 Global syntax-table data should be set up already to be good at CHARPOS
537 or after. On return global syntax data is good for lookup at CHARPOS. */
539 static bool
540 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
542 enum syntaxcode code;
543 ptrdiff_t beg = BEGV;
544 bool quoted = 0;
545 ptrdiff_t orig = charpos;
547 while (charpos > beg)
549 int c;
550 DEC_BOTH (charpos, bytepos);
552 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
553 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
554 code = SYNTAX (c);
555 if (! (code == Scharquote || code == Sescape))
556 break;
558 quoted = !quoted;
561 UPDATE_SYNTAX_TABLE (orig);
562 return quoted;
565 /* Return the bytepos one character before BYTEPOS.
566 We assume that BYTEPOS is not at the start of the buffer. */
568 static ptrdiff_t
569 dec_bytepos (ptrdiff_t bytepos)
571 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
572 return bytepos - 1;
574 DEC_POS (bytepos);
575 return bytepos;
578 /* Return a defun-start position before POS and not too far before.
579 It should be the last one before POS, or nearly the last.
581 When open_paren_in_column_0_is_defun_start is nonzero,
582 only the beginning of the buffer is treated as a defun-start.
584 We record the information about where the scan started
585 and what its result was, so that another call in the same area
586 can return the same value very quickly.
588 There is no promise at which position the global syntax data is
589 valid on return from the subroutine, so the caller should explicitly
590 update the global data. */
592 static ptrdiff_t
593 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
595 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
597 /* Use previous finding, if it's valid and applies to this inquiry. */
598 if (current_buffer == find_start_buffer
599 /* Reuse the defun-start even if POS is a little farther on.
600 POS might be in the next defun, but that's ok.
601 Our value may not be the best possible, but will still be usable. */
602 && pos <= find_start_pos + 1000
603 && pos >= find_start_value
604 && BEGV == find_start_begv
605 && MODIFF == find_start_modiff)
606 return find_start_value;
608 if (!open_paren_in_column_0_is_defun_start)
610 find_start_value = BEGV;
611 find_start_value_byte = BEGV_BYTE;
612 goto found;
615 /* Back up to start of line. */
616 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
618 /* We optimize syntax-table lookup for rare updates. Thus we accept
619 only those `^\s(' which are good in global _and_ text-property
620 syntax-tables. */
621 SETUP_BUFFER_SYNTAX_TABLE ();
622 while (PT > BEGV)
624 /* Open-paren at start of line means we may have found our
625 defun-start. */
626 int c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
627 if (SYNTAX (c) == Sopen)
629 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
630 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
631 if (SYNTAX (c) == Sopen)
632 break;
633 /* Now fallback to the default value. */
634 SETUP_BUFFER_SYNTAX_TABLE ();
636 /* Move to beg of previous line. */
637 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
640 /* Record what we found, for the next try. */
641 find_start_value = PT;
642 find_start_value_byte = PT_BYTE;
643 TEMP_SET_PT_BOTH (opoint, opoint_byte);
645 found:
646 find_start_buffer = current_buffer;
647 find_start_modiff = MODIFF;
648 find_start_begv = BEGV;
649 find_start_pos = pos;
651 return find_start_value;
654 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
656 static bool
657 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
659 int c;
660 bool val;
662 DEC_BOTH (pos, pos_byte);
663 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
664 c = FETCH_CHAR (pos_byte);
665 val = SYNTAX_COMEND_FIRST (c);
666 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
667 return val;
670 /* Check whether charpos FROM is at the end of a comment.
671 FROM_BYTE is the bytepos corresponding to FROM.
672 Do not move back before STOP.
674 Return true if we find a comment ending at FROM/FROM_BYTE.
676 If successful, store the charpos of the comment's beginning
677 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
679 Global syntax data remains valid for backward search starting at
680 the returned value (or at FROM, if the search was not successful). */
682 static bool
683 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
684 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
685 ptrdiff_t *bytepos_ptr)
687 /* Look back, counting the parity of string-quotes,
688 and recording the comment-starters seen.
689 When we reach a safe place, assume that's not in a string;
690 then step the main scan to the earliest comment-starter seen
691 an even number of string quotes away from the safe place.
693 OFROM[I] is position of the earliest comment-starter seen
694 which is I+2X quotes from the comment-end.
695 PARITY is current parity of quotes from the comment end. */
696 int string_style = -1; /* Presumed outside of any string. */
697 bool string_lossage = 0;
698 /* Not a real lossage: indicates that we have passed a matching comment
699 starter plus a non-matching comment-ender, meaning that any matching
700 comment-starter we might see later could be a false positive (hidden
701 inside another comment).
702 Test case: { a (* b } c (* d *) */
703 bool comment_lossage = 0;
704 ptrdiff_t comment_end = from;
705 ptrdiff_t comment_end_byte = from_byte;
706 ptrdiff_t comstart_pos = 0;
707 ptrdiff_t comstart_byte;
708 /* Place where the containing defun starts,
709 or 0 if we didn't come across it yet. */
710 ptrdiff_t defun_start = 0;
711 ptrdiff_t defun_start_byte = 0;
712 enum syntaxcode code;
713 ptrdiff_t nesting = 1; /* Current comment nesting. */
714 int c;
715 int syntax = 0;
716 unsigned short int quit_count = 0;
718 /* FIXME: A }} comment-ender style leads to incorrect behavior
719 in the case of {{ c }}} because we ignore the last two chars which are
720 assumed to be comment-enders although they aren't. */
722 /* At beginning of range to scan, we're outside of strings;
723 that determines quote parity to the comment-end. */
724 while (from != stop)
726 rarely_quit (++quit_count);
728 ptrdiff_t temp_byte;
729 int prev_syntax;
730 bool com2start, com2end, comstart;
732 /* Move back and examine a character. */
733 DEC_BOTH (from, from_byte);
734 UPDATE_SYNTAX_TABLE_BACKWARD (from);
736 prev_syntax = syntax;
737 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
738 syntax = SYNTAX_WITH_FLAGS (c);
739 code = SYNTAX (c);
741 /* Check for 2-char comment markers. */
742 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
743 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
744 && (comstyle
745 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
746 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
747 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
748 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
749 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
750 comstart = (com2start || code == Scomment);
752 /* Nasty cases with overlapping 2-char comment markers:
753 - snmp-mode: -- c -- foo -- c --
754 --- c --
755 ------ c --
756 - c-mode: *||*
757 |* *|* *|
758 |*| |* |*|
759 /// */
761 /* If a 2-char comment sequence partly overlaps with another,
762 we don't try to be clever. E.g. |*| in C, or }% in modes that
763 have %..\n and %{..}%. */
764 if (from > stop && (com2end || comstart))
766 ptrdiff_t next = from, next_byte = from_byte;
767 int next_c, next_syntax;
768 DEC_BOTH (next, next_byte);
769 UPDATE_SYNTAX_TABLE_BACKWARD (next);
770 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
771 next_syntax = SYNTAX_WITH_FLAGS (next_c);
772 if (((comstart || comnested)
773 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
774 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
775 || ((com2end || comnested)
776 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
777 && (comstyle
778 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
779 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
780 goto lossage;
781 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
784 if (com2start && comstart_pos == 0)
785 /* We're looking at a comment starter. But it might be a comment
786 ender as well (see snmp-mode). The first time we see one, we
787 need to consider it as a comment starter,
788 and the subsequent times as a comment ender. */
789 com2end = 0;
791 /* Turn a 2-char comment sequences into the appropriate syntax. */
792 if (com2end)
793 code = Sendcomment;
794 else if (com2start)
795 code = Scomment;
796 /* Ignore comment starters of a different style. */
797 else if (code == Scomment
798 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
799 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
800 continue;
802 /* Ignore escaped characters, except comment-enders which cannot
803 be escaped. */
804 if ((Vcomment_end_can_be_escaped || code != Sendcomment)
805 && char_quoted (from, from_byte))
806 continue;
808 switch (code)
810 case Sstring_fence:
811 case Scomment_fence:
812 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
813 FALLTHROUGH;
814 case Sstring:
815 /* Track parity of quotes. */
816 if (string_style == -1)
817 /* Entering a string. */
818 string_style = c;
819 else if (string_style == c)
820 /* Leaving the string. */
821 string_style = -1;
822 else
823 /* If we have two kinds of string delimiters.
824 There's no way to grok this scanning backwards. */
825 string_lossage = 1;
826 break;
828 case Scomment:
829 /* We've already checked that it is the relevant comstyle. */
830 if (string_style != -1 || comment_lossage || string_lossage)
831 /* There are odd string quotes involved, so let's be careful.
832 Test case in Pascal: " { " a { " } */
833 goto lossage;
835 if (!comnested)
837 /* Record best comment-starter so far. */
838 comstart_pos = from;
839 comstart_byte = from_byte;
841 else if (--nesting <= 0)
842 /* nested comments have to be balanced, so we don't need to
843 keep looking for earlier ones. We use here the same (slightly
844 incorrect) reasoning as below: since it is followed by uniform
845 paired string quotes, this comment-start has to be outside of
846 strings, else the comment-end itself would be inside a string. */
847 goto done;
848 break;
850 case Sendcomment:
851 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
852 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
853 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
854 /* This is the same style of comment ender as ours. */
856 if (comnested)
857 nesting++;
858 else
859 /* Anything before that can't count because it would match
860 this comment-ender rather than ours. */
861 from = stop; /* Break out of the loop. */
863 else if (comstart_pos != 0 || c != '\n')
864 /* We're mixing comment styles here, so we'd better be careful.
865 The (comstart_pos != 0 || c != '\n') check is not quite correct
866 (we should just always set comment_lossage), but removing it
867 would imply that any multiline comment in C would go through
868 lossage, which seems overkill.
869 The failure should only happen in the rare cases such as
870 { (* } *) */
871 comment_lossage = 1;
872 break;
874 case Sopen:
875 /* Assume a defun-start point is outside of strings. */
876 if (open_paren_in_column_0_is_defun_start
877 && (from == stop
878 || (temp_byte = dec_bytepos (from_byte),
879 FETCH_CHAR (temp_byte) == '\n')))
881 defun_start = from;
882 defun_start_byte = from_byte;
883 from = stop; /* Break out of the loop. */
885 break;
887 default:
888 break;
892 if (comstart_pos == 0)
894 from = comment_end;
895 from_byte = comment_end_byte;
896 UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
898 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
899 or `done'), then we've found the beginning of the non-nested comment. */
900 else if (1) /* !comnested */
902 from = comstart_pos;
903 from_byte = comstart_byte;
904 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
906 else lossage:
908 struct lisp_parse_state state;
909 bool adjusted = true;
910 /* We had two kinds of string delimiters mixed up
911 together. Decode this going forwards.
912 Scan fwd from a known safe place (beginning-of-defun)
913 to the one in question; this records where we
914 last passed a comment starter. */
915 /* If we did not already find the defun start, find it now. */
916 if (defun_start == 0)
918 defun_start = find_defun_start (comment_end, comment_end_byte);
919 defun_start_byte = find_start_value_byte;
920 adjusted = (defun_start > BEGV);
924 internalize_parse_state (Qnil, &state);
925 scan_sexps_forward (&state,
926 defun_start, defun_start_byte,
927 comment_end, TYPE_MINIMUM (EMACS_INT),
928 0, 0);
929 defun_start = comment_end;
930 if (!adjusted)
932 adjusted = true;
933 find_start_value
934 = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
935 : state.thislevelstart >= 0 ? state.thislevelstart
936 : find_start_value;
937 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
940 if (state.incomment == (comnested ? 1 : -1)
941 && state.comstyle == comstyle)
942 from = state.comstr_start;
943 else
945 from = comment_end;
946 if (state.incomment)
947 /* If comment_end is inside some other comment, maybe ours
948 is nested, so we need to try again from within the
949 surrounding comment. Example: { a (* " *) */
951 /* FIXME: We should advance by one or two chars. */
952 defun_start = state.comstr_start + 2;
953 defun_start_byte = CHAR_TO_BYTE (defun_start);
956 rarely_quit (++quit_count);
958 while (defun_start < comment_end);
960 from_byte = CHAR_TO_BYTE (from);
961 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
964 done:
965 *charpos_ptr = from;
966 *bytepos_ptr = from_byte;
968 return from != comment_end;
971 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
972 doc: /* Return t if OBJECT is a syntax table.
973 Currently, any char-table counts as a syntax table. */)
974 (Lisp_Object object)
976 if (CHAR_TABLE_P (object)
977 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
978 return Qt;
979 return Qnil;
982 static void
983 check_syntax_table (Lisp_Object obj)
985 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
986 Qsyntax_table_p, obj);
989 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
990 doc: /* Return the current syntax table.
991 This is the one specified by the current buffer. */)
992 (void)
994 return BVAR (current_buffer, syntax_table);
997 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
998 Sstandard_syntax_table, 0, 0, 0,
999 doc: /* Return the standard syntax table.
1000 This is the one used for new buffers. */)
1001 (void)
1003 return Vstandard_syntax_table;
1006 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1007 doc: /* Construct a new syntax table and return it.
1008 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1009 (Lisp_Object table)
1011 Lisp_Object copy;
1013 if (!NILP (table))
1014 check_syntax_table (table);
1015 else
1016 table = Vstandard_syntax_table;
1018 copy = Fcopy_sequence (table);
1020 /* Only the standard syntax table should have a default element.
1021 Other syntax tables should inherit from parents instead. */
1022 set_char_table_defalt (copy, Qnil);
1024 /* Copied syntax tables should all have parents.
1025 If we copied one with no parent, such as the standard syntax table,
1026 use the standard syntax table as the copy's parent. */
1027 if (NILP (XCHAR_TABLE (copy)->parent))
1028 Fset_char_table_parent (copy, Vstandard_syntax_table);
1029 return copy;
1032 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1033 doc: /* Select a new syntax table for the current buffer.
1034 One argument, a syntax table. */)
1035 (Lisp_Object table)
1037 int idx;
1038 check_syntax_table (table);
1039 bset_syntax_table (current_buffer, table);
1040 /* Indicate that this buffer now has a specified syntax table. */
1041 idx = PER_BUFFER_VAR_IDX (syntax_table);
1042 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1043 return table;
1046 /* Convert a letter which signifies a syntax code
1047 into the code it signifies.
1048 This is used by modify-syntax-entry, and other things. */
1050 unsigned char const syntax_spec_code[0400] =
1051 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1052 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1053 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1054 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1055 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1056 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1057 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1058 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1059 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1060 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1061 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1062 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1063 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1064 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1065 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1066 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1069 /* Indexed by syntax code, give the letter that describes it. */
1071 char const syntax_code_spec[16] =
1073 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1074 '!', '|'
1077 /* Indexed by syntax code, give the object (cons of syntax code and
1078 nil) to be stored in syntax table. Since these objects can be
1079 shared among syntax tables, we generate them in advance. By
1080 sharing objects, the function `describe-syntax' can give a more
1081 compact listing. */
1082 static Lisp_Object Vsyntax_code_object;
1085 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1086 doc: /* Return the syntax code of CHARACTER, described by a character.
1087 For example, if CHARACTER is a word constituent, the
1088 character `w' (119) is returned.
1089 The characters that correspond to various syntax codes
1090 are listed in the documentation of `modify-syntax-entry'.
1092 If you're trying to determine the syntax of characters in the buffer,
1093 this is probably the wrong function to use, because it can't take
1094 `syntax-table' text properties into account. Consider using
1095 `syntax-after' instead. */)
1096 (Lisp_Object character)
1098 int char_int;
1099 CHECK_CHARACTER (character);
1100 char_int = XINT (character);
1101 SETUP_BUFFER_SYNTAX_TABLE ();
1102 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1105 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1106 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1107 (Lisp_Object character)
1109 int char_int;
1110 enum syntaxcode code;
1111 CHECK_CHARACTER (character);
1112 char_int = XINT (character);
1113 SETUP_BUFFER_SYNTAX_TABLE ();
1114 code = SYNTAX (char_int);
1115 if (code == Sopen || code == Sclose)
1116 return SYNTAX_MATCH (char_int);
1117 return Qnil;
1120 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1121 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1122 STRING should be a string of the form allowed as argument of
1123 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1124 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1125 the value of a `syntax-table' text property. */)
1126 (Lisp_Object string)
1128 const unsigned char *p;
1129 int val;
1130 Lisp_Object match;
1132 CHECK_STRING (string);
1134 p = SDATA (string);
1135 val = syntax_spec_code[*p++];
1136 if (val == 0377)
1137 error ("Invalid syntax description letter: %c", p[-1]);
1139 if (val == Sinherit)
1140 return Qnil;
1142 if (*p)
1144 int len;
1145 int character = STRING_CHAR_AND_LENGTH (p, len);
1146 XSETINT (match, character);
1147 if (XFASTINT (match) == ' ')
1148 match = Qnil;
1149 p += len;
1151 else
1152 match = Qnil;
1154 while (*p)
1155 switch (*p++)
1157 case '1':
1158 val |= 1 << 16;
1159 break;
1161 case '2':
1162 val |= 1 << 17;
1163 break;
1165 case '3':
1166 val |= 1 << 18;
1167 break;
1169 case '4':
1170 val |= 1 << 19;
1171 break;
1173 case 'p':
1174 val |= 1 << 20;
1175 break;
1177 case 'b':
1178 val |= 1 << 21;
1179 break;
1181 case 'n':
1182 val |= 1 << 22;
1183 break;
1185 case 'c':
1186 val |= 1 << 23;
1187 break;
1190 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1191 return AREF (Vsyntax_code_object, val);
1192 else
1193 /* Since we can't use a shared object, let's make a new one. */
1194 return Fcons (make_number (val), match);
1197 /* I really don't know why this is interactive
1198 help-form should at least be made useful whilst reading the second arg. */
1199 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1200 "cSet syntax for character: \nsSet syntax for %s to: ",
1201 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1202 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1203 the current buffer's syntax table.
1204 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1205 in the range MIN to MAX are changed.
1206 The first character of NEWENTRY should be one of the following:
1207 Space or - whitespace syntax. w word constituent.
1208 _ symbol constituent. . punctuation.
1209 ( open-parenthesis. ) close-parenthesis.
1210 " string quote. \\ escape.
1211 $ paired delimiter. \\=' expression quote or prefix operator.
1212 < comment starter. > comment ender.
1213 / character-quote. @ inherit from parent table.
1214 | generic string fence. ! generic comment fence.
1216 Only single-character comment start and end sequences are represented thus.
1217 Two-character sequences are represented as described below.
1218 The second character of NEWENTRY is the matching parenthesis,
1219 used only if the first character is `(' or `)'.
1220 Any additional characters are flags.
1221 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1222 1 means CHAR is the start of a two-char comment start sequence.
1223 2 means CHAR is the second character of such a sequence.
1224 3 means CHAR is the start of a two-char comment end sequence.
1225 4 means CHAR is the second character of such a sequence.
1227 There can be several orthogonal comment sequences. This is to support
1228 language modes such as C++. By default, all comment sequences are of style
1229 a, but you can set the comment sequence style to b (on the second character
1230 of a comment-start, and the first character of a comment-end sequence) and/or
1231 c (on any of its chars) using this flag:
1232 b means CHAR is part of comment sequence b.
1233 c means CHAR is part of comment sequence c.
1234 n means CHAR is part of a nestable comment sequence.
1236 p means CHAR is a prefix character for `backward-prefix-chars';
1237 such characters are treated as whitespace when they occur
1238 between expressions.
1239 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1240 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1242 if (CONSP (c))
1244 CHECK_CHARACTER_CAR (c);
1245 CHECK_CHARACTER_CDR (c);
1247 else
1248 CHECK_CHARACTER (c);
1250 if (NILP (syntax_table))
1251 syntax_table = BVAR (current_buffer, syntax_table);
1252 else
1253 check_syntax_table (syntax_table);
1255 newentry = Fstring_to_syntax (newentry);
1256 if (CONSP (c))
1257 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1258 else
1259 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1261 /* We clear the regexp cache, since character classes can now have
1262 different values from those in the compiled regexps.*/
1263 clear_regexp_cache ();
1265 return Qnil;
1268 /* Dump syntax table to buffer in human-readable format */
1270 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1271 Sinternal_describe_syntax_value, 1, 1, 0,
1272 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1273 (Lisp_Object syntax)
1275 int code, syntax_code;
1276 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1277 char str[2];
1278 Lisp_Object first, match_lisp, value = syntax;
1280 if (NILP (value))
1282 insert_string ("default");
1283 return syntax;
1286 if (CHAR_TABLE_P (value))
1288 insert_string ("deeper char-table ...");
1289 return syntax;
1292 if (!CONSP (value))
1294 insert_string ("invalid");
1295 return syntax;
1298 first = XCAR (value);
1299 match_lisp = XCDR (value);
1301 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1303 insert_string ("invalid");
1304 return syntax;
1307 syntax_code = XINT (first) & INT_MAX;
1308 code = syntax_code & 0377;
1309 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1310 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1311 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1312 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1313 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1314 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1315 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1316 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1318 if (Smax <= code)
1320 insert_string ("invalid");
1321 return syntax;
1324 str[0] = syntax_code_spec[code], str[1] = 0;
1325 insert (str, 1);
1327 if (NILP (match_lisp))
1328 insert (" ", 1);
1329 else
1330 insert_char (XINT (match_lisp));
1332 if (start1)
1333 insert ("1", 1);
1334 if (start2)
1335 insert ("2", 1);
1337 if (end1)
1338 insert ("3", 1);
1339 if (end2)
1340 insert ("4", 1);
1342 if (prefix)
1343 insert ("p", 1);
1344 if (comstyleb)
1345 insert ("b", 1);
1346 if (comstylec)
1347 insert ("c", 1);
1348 if (comnested)
1349 insert ("n", 1);
1351 insert_string ("\twhich means: ");
1353 switch (code)
1355 case Swhitespace:
1356 insert_string ("whitespace"); break;
1357 case Spunct:
1358 insert_string ("punctuation"); break;
1359 case Sword:
1360 insert_string ("word"); break;
1361 case Ssymbol:
1362 insert_string ("symbol"); break;
1363 case Sopen:
1364 insert_string ("open"); break;
1365 case Sclose:
1366 insert_string ("close"); break;
1367 case Squote:
1368 insert_string ("prefix"); break;
1369 case Sstring:
1370 insert_string ("string"); break;
1371 case Smath:
1372 insert_string ("math"); break;
1373 case Sescape:
1374 insert_string ("escape"); break;
1375 case Scharquote:
1376 insert_string ("charquote"); break;
1377 case Scomment:
1378 insert_string ("comment"); break;
1379 case Sendcomment:
1380 insert_string ("endcomment"); break;
1381 case Sinherit:
1382 insert_string ("inherit"); break;
1383 case Scomment_fence:
1384 insert_string ("comment fence"); break;
1385 case Sstring_fence:
1386 insert_string ("string fence"); break;
1387 default:
1388 insert_string ("invalid");
1389 return syntax;
1392 if (!NILP (match_lisp))
1394 insert_string (", matches ");
1395 insert_char (XINT (match_lisp));
1398 if (start1)
1399 insert_string (",\n\t is the first character of a comment-start sequence");
1400 if (start2)
1401 insert_string (",\n\t is the second character of a comment-start sequence");
1403 if (end1)
1404 insert_string (",\n\t is the first character of a comment-end sequence");
1405 if (end2)
1406 insert_string (",\n\t is the second character of a comment-end sequence");
1407 if (comstyleb)
1408 insert_string (" (comment style b)");
1409 if (comstylec)
1410 insert_string (" (comment style c)");
1411 if (comnested)
1412 insert_string (" (nestable)");
1414 if (prefix)
1416 AUTO_STRING (prefixdoc,
1417 ",\n\t is a prefix character for `backward-prefix-chars'");
1418 insert1 (Fsubstitute_command_keys (prefixdoc));
1421 return syntax;
1424 /* Return the position across COUNT words from FROM.
1425 If that many words cannot be found before the end of the buffer, return 0.
1426 COUNT negative means scan backward and stop at word beginning. */
1428 ptrdiff_t
1429 scan_words (ptrdiff_t from, EMACS_INT count)
1431 ptrdiff_t beg = BEGV;
1432 ptrdiff_t end = ZV;
1433 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1434 enum syntaxcode code;
1435 int ch0, ch1;
1436 Lisp_Object func, pos;
1438 SETUP_SYNTAX_TABLE (from, count);
1440 while (count > 0)
1442 while (true)
1444 if (from == end)
1445 return 0;
1446 UPDATE_SYNTAX_TABLE_FORWARD (from);
1447 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1448 code = SYNTAX (ch0);
1449 INC_BOTH (from, from_byte);
1450 if (words_include_escapes
1451 && (code == Sescape || code == Scharquote))
1452 break;
1453 if (code == Sword)
1454 break;
1455 rarely_quit (from);
1457 /* Now CH0 is a character which begins a word and FROM is the
1458 position of the next character. */
1459 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1460 if (! NILP (Ffboundp (func)))
1462 pos = call2 (func, make_number (from - 1), make_number (end));
1463 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1465 from = XINT (pos);
1466 from_byte = CHAR_TO_BYTE (from);
1469 else
1471 while (1)
1473 if (from == end) break;
1474 UPDATE_SYNTAX_TABLE_FORWARD (from);
1475 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1476 code = SYNTAX (ch1);
1477 if ((code != Sword
1478 && (! words_include_escapes
1479 || (code != Sescape && code != Scharquote)))
1480 || word_boundary_p (ch0, ch1))
1481 break;
1482 INC_BOTH (from, from_byte);
1483 ch0 = ch1;
1484 rarely_quit (from);
1487 count--;
1489 while (count < 0)
1491 while (true)
1493 if (from == beg)
1494 return 0;
1495 DEC_BOTH (from, from_byte);
1496 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1497 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1498 code = SYNTAX (ch1);
1499 if (words_include_escapes
1500 && (code == Sescape || code == Scharquote))
1501 break;
1502 if (code == Sword)
1503 break;
1504 rarely_quit (from);
1506 /* Now CH1 is a character which ends a word and FROM is the
1507 position of it. */
1508 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1509 if (! NILP (Ffboundp (func)))
1511 pos = call2 (func, make_number (from), make_number (beg));
1512 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1514 from = XINT (pos);
1515 from_byte = CHAR_TO_BYTE (from);
1518 else
1520 while (1)
1522 if (from == beg)
1523 break;
1524 DEC_BOTH (from, from_byte);
1525 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1526 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1527 code = SYNTAX (ch0);
1528 if ((code != Sword
1529 && (! words_include_escapes
1530 || (code != Sescape && code != Scharquote)))
1531 || word_boundary_p (ch0, ch1))
1533 INC_BOTH (from, from_byte);
1534 break;
1536 ch1 = ch0;
1537 rarely_quit (from);
1540 count++;
1543 return from;
1546 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1547 doc: /* Move point forward ARG words (backward if ARG is negative).
1548 If ARG is omitted or nil, move point forward one word.
1549 Normally returns t.
1550 If an edge of the buffer or a field boundary is reached, point is
1551 left there and the function returns nil. Field boundaries are not
1552 noticed if `inhibit-field-text-motion' is non-nil.
1554 The word boundaries are normally determined by the buffer's syntax
1555 table and character script (according to `char-script-table'), but
1556 `find-word-boundary-function-table', such as set up by `subword-mode',
1557 can change that. If a Lisp program needs to move by words determined
1558 strictly by the syntax table, it should use `forward-word-strictly'
1559 instead. See Info node `(elisp) Word Motion' for details. */)
1560 (Lisp_Object arg)
1562 Lisp_Object tmp;
1563 ptrdiff_t orig_val, val;
1565 if (NILP (arg))
1566 XSETFASTINT (arg, 1);
1567 else
1568 CHECK_NUMBER (arg);
1570 val = orig_val = scan_words (PT, XINT (arg));
1571 if (! orig_val)
1572 val = XINT (arg) > 0 ? ZV : BEGV;
1574 /* Avoid jumping out of an input field. */
1575 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1576 Qnil, Qnil, Qnil);
1577 val = XFASTINT (tmp);
1579 SET_PT (val);
1580 return val == orig_val ? Qt : Qnil;
1583 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1584 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1585 STRING is like the inside of a `[...]' in a regular expression
1586 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1587 (but not at the end of a range; quoting is never needed there).
1588 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1589 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1590 Char classes, e.g. `[:alpha:]', are supported.
1592 Returns the distance traveled, either zero or positive. */)
1593 (Lisp_Object string, Lisp_Object lim)
1595 return skip_chars (1, string, lim, 1);
1598 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1599 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1600 See `skip-chars-forward' for details.
1601 Returns the distance traveled, either zero or negative. */)
1602 (Lisp_Object string, Lisp_Object lim)
1604 return skip_chars (0, string, lim, 1);
1607 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1608 doc: /* Move point forward across chars in specified syntax classes.
1609 SYNTAX is a string of syntax code characters.
1610 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1611 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1612 This function returns the distance traveled, either zero or positive. */)
1613 (Lisp_Object syntax, Lisp_Object lim)
1615 return skip_syntaxes (1, syntax, lim);
1618 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1619 doc: /* Move point backward across chars in specified syntax classes.
1620 SYNTAX is a string of syntax code characters.
1621 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1622 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1623 This function returns either zero or a negative number, and the absolute value
1624 of this is the distance traveled. */)
1625 (Lisp_Object syntax, Lisp_Object lim)
1627 return skip_syntaxes (0, syntax, lim);
1630 static Lisp_Object
1631 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1632 bool handle_iso_classes)
1634 int c;
1635 char fastmap[0400];
1636 /* Store the ranges of non-ASCII characters. */
1637 int *char_ranges UNINIT;
1638 int n_char_ranges = 0;
1639 bool negate = 0;
1640 ptrdiff_t i, i_byte;
1641 /* True if the current buffer is multibyte and the region contains
1642 non-ASCII chars. */
1643 bool multibyte;
1644 /* True if STRING is multibyte and it contains non-ASCII chars. */
1645 bool string_multibyte;
1646 ptrdiff_t size_byte;
1647 const unsigned char *str;
1648 int len;
1649 Lisp_Object iso_classes;
1650 USE_SAFE_ALLOCA;
1652 CHECK_STRING (string);
1653 iso_classes = Qnil;
1655 if (NILP (lim))
1656 XSETINT (lim, forwardp ? ZV : BEGV);
1657 else
1658 CHECK_NUMBER_COERCE_MARKER (lim);
1660 /* In any case, don't allow scan outside bounds of buffer. */
1661 if (XINT (lim) > ZV)
1662 XSETFASTINT (lim, ZV);
1663 if (XINT (lim) < BEGV)
1664 XSETFASTINT (lim, BEGV);
1666 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1667 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1668 string_multibyte = SBYTES (string) > SCHARS (string);
1670 memset (fastmap, 0, sizeof fastmap);
1672 str = SDATA (string);
1673 size_byte = SBYTES (string);
1675 i_byte = 0;
1676 if (i_byte < size_byte
1677 && SREF (string, 0) == '^')
1679 negate = 1; i_byte++;
1682 /* Find the characters specified and set their elements of fastmap.
1683 Handle backslashes and ranges specially.
1685 If STRING contains non-ASCII characters, setup char_ranges for
1686 them and use fastmap only for their leading codes. */
1688 if (! string_multibyte)
1690 bool string_has_eight_bit = 0;
1692 /* At first setup fastmap. */
1693 while (i_byte < size_byte)
1695 if (handle_iso_classes)
1697 const unsigned char *ch = str + i_byte;
1698 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1699 if (cc == 0)
1700 error ("Invalid ISO C character class");
1701 if (cc != -1)
1703 iso_classes = Fcons (make_number (cc), iso_classes);
1704 i_byte = ch - str;
1705 continue;
1709 c = str[i_byte++];
1711 if (c == '\\')
1713 if (i_byte == size_byte)
1714 break;
1716 c = str[i_byte++];
1718 /* Treat `-' as range character only if another character
1719 follows. */
1720 if (i_byte + 1 < size_byte
1721 && str[i_byte] == '-')
1723 int c2;
1725 /* Skip over the dash. */
1726 i_byte++;
1728 /* Get the end of the range. */
1729 c2 = str[i_byte++];
1730 if (c2 == '\\'
1731 && i_byte < size_byte)
1732 c2 = str[i_byte++];
1734 if (c <= c2)
1736 int lim2 = c2 + 1;
1737 while (c < lim2)
1738 fastmap[c++] = 1;
1739 if (! ASCII_CHAR_P (c2))
1740 string_has_eight_bit = 1;
1743 else
1745 fastmap[c] = 1;
1746 if (! ASCII_CHAR_P (c))
1747 string_has_eight_bit = 1;
1751 /* If the current range is multibyte and STRING contains
1752 eight-bit chars, arrange fastmap and setup char_ranges for
1753 the corresponding multibyte chars. */
1754 if (multibyte && string_has_eight_bit)
1756 char *p1;
1757 char himap[0200 + 1];
1758 memcpy (himap, fastmap + 0200, 0200);
1759 himap[0200] = 0;
1760 memset (fastmap + 0200, 0, 0200);
1761 SAFE_NALLOCA (char_ranges, 2, 128);
1762 i = 0;
1764 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1766 /* Deduce the next range C..C2 from the next clump of 1s
1767 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1768 order half of the old FASTMAP. */
1769 int c2, leading_code;
1770 i = p1 - himap;
1771 c = BYTE8_TO_CHAR (i + 0200);
1772 i += strlen (p1);
1773 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1775 char_ranges[n_char_ranges++] = c;
1776 char_ranges[n_char_ranges++] = c2;
1777 leading_code = CHAR_LEADING_CODE (c);
1778 memset (fastmap + leading_code, 1,
1779 CHAR_LEADING_CODE (c2) - leading_code + 1);
1783 else /* STRING is multibyte */
1785 SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1787 while (i_byte < size_byte)
1789 int leading_code = str[i_byte];
1791 if (handle_iso_classes)
1793 const unsigned char *ch = str + i_byte;
1794 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1795 if (cc == 0)
1796 error ("Invalid ISO C character class");
1797 if (cc != -1)
1799 iso_classes = Fcons (make_number (cc), iso_classes);
1800 i_byte = ch - str;
1801 continue;
1805 if (leading_code== '\\')
1807 if (++i_byte == size_byte)
1808 break;
1810 leading_code = str[i_byte];
1812 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1813 i_byte += len;
1816 /* Treat `-' as range character only if another character
1817 follows. */
1818 if (i_byte + 1 < size_byte
1819 && str[i_byte] == '-')
1821 int c2, leading_code2;
1823 /* Skip over the dash. */
1824 i_byte++;
1826 /* Get the end of the range. */
1827 leading_code2 = str[i_byte];
1828 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1829 i_byte += len;
1831 if (c2 == '\\'
1832 && i_byte < size_byte)
1834 leading_code2 = str[i_byte];
1835 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1836 i_byte += len;
1839 if (c > c2)
1840 continue;
1841 if (ASCII_CHAR_P (c))
1843 while (c <= c2 && c < 0x80)
1844 fastmap[c++] = 1;
1845 leading_code = CHAR_LEADING_CODE (c);
1847 if (! ASCII_CHAR_P (c))
1849 int lim2 = leading_code2 + 1;
1850 while (leading_code < lim2)
1851 fastmap[leading_code++] = 1;
1852 if (c <= c2)
1854 char_ranges[n_char_ranges++] = c;
1855 char_ranges[n_char_ranges++] = c2;
1859 else
1861 if (ASCII_CHAR_P (c))
1862 fastmap[c] = 1;
1863 else
1865 fastmap[leading_code] = 1;
1866 char_ranges[n_char_ranges++] = c;
1867 char_ranges[n_char_ranges++] = c;
1872 /* If the current range is unibyte and STRING contains non-ASCII
1873 chars, arrange fastmap for the corresponding unibyte
1874 chars. */
1876 if (! multibyte && n_char_ranges > 0)
1878 memset (fastmap + 0200, 0, 0200);
1879 for (i = 0; i < n_char_ranges; i += 2)
1881 int c1 = char_ranges[i];
1882 int lim2 = char_ranges[i + 1] + 1;
1884 for (; c1 < lim2; c1++)
1886 int b = CHAR_TO_BYTE_SAFE (c1);
1887 if (b >= 0)
1888 fastmap[b] = 1;
1894 /* If ^ was the first character, complement the fastmap. */
1895 if (negate)
1897 if (! multibyte)
1898 for (i = 0; i < sizeof fastmap; i++)
1899 fastmap[i] ^= 1;
1900 else
1902 for (i = 0; i < 0200; i++)
1903 fastmap[i] ^= 1;
1904 /* All non-ASCII chars possibly match. */
1905 for (; i < sizeof fastmap; i++)
1906 fastmap[i] = 1;
1911 ptrdiff_t start_point = PT;
1912 ptrdiff_t pos = PT;
1913 ptrdiff_t pos_byte = PT_BYTE;
1914 unsigned char *p = PT_ADDR, *endp, *stop;
1916 if (forwardp)
1918 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1919 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1921 else
1923 endp = CHAR_POS_ADDR (XINT (lim));
1924 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1927 /* This code may look up syntax tables using functions that rely on the
1928 gl_state object. To make sure this object is not out of date,
1929 let's initialize it manually.
1930 We ignore syntax-table text-properties for now, since that's
1931 what we've done in the past. */
1932 SETUP_BUFFER_SYNTAX_TABLE ();
1933 if (forwardp)
1935 if (multibyte)
1936 while (1)
1938 int nbytes;
1940 if (p >= stop)
1942 if (p >= endp)
1943 break;
1944 p = GAP_END_ADDR;
1945 stop = endp;
1947 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1948 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1950 if (negate)
1951 break;
1952 else
1953 goto fwd_ok;
1956 if (! fastmap[*p])
1957 break;
1958 if (! ASCII_CHAR_P (c))
1960 /* As we are looking at a multibyte character, we
1961 must look up the character in the table
1962 CHAR_RANGES. If there's no data in the table,
1963 that character is not what we want to skip. */
1965 /* The following code do the right thing even if
1966 n_char_ranges is zero (i.e. no data in
1967 CHAR_RANGES). */
1968 for (i = 0; i < n_char_ranges; i += 2)
1969 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1970 break;
1971 if (!(negate ^ (i < n_char_ranges)))
1972 break;
1974 fwd_ok:
1975 p += nbytes, pos++, pos_byte += nbytes;
1976 rarely_quit (pos);
1978 else
1979 while (true)
1981 if (p >= stop)
1983 if (p >= endp)
1984 break;
1985 p = GAP_END_ADDR;
1986 stop = endp;
1989 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1991 if (negate)
1992 break;
1993 else
1994 goto fwd_unibyte_ok;
1997 if (!fastmap[*p])
1998 break;
1999 fwd_unibyte_ok:
2000 p++, pos++, pos_byte++;
2001 rarely_quit (pos);
2004 else
2006 if (multibyte)
2007 while (true)
2009 if (p <= stop)
2011 if (p <= endp)
2012 break;
2013 p = GPT_ADDR;
2014 stop = endp;
2016 unsigned char *prev_p = p;
2018 p--;
2019 while (stop <= p && ! CHAR_HEAD_P (*p));
2021 c = STRING_CHAR (p);
2023 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2025 if (negate)
2026 break;
2027 else
2028 goto back_ok;
2031 if (! fastmap[*p])
2032 break;
2033 if (! ASCII_CHAR_P (c))
2035 /* See the comment in the previous similar code. */
2036 for (i = 0; i < n_char_ranges; i += 2)
2037 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2038 break;
2039 if (!(negate ^ (i < n_char_ranges)))
2040 break;
2042 back_ok:
2043 pos--, pos_byte -= prev_p - p;
2044 rarely_quit (pos);
2046 else
2047 while (true)
2049 if (p <= stop)
2051 if (p <= endp)
2052 break;
2053 p = GPT_ADDR;
2054 stop = endp;
2057 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2059 if (negate)
2060 break;
2061 else
2062 goto back_unibyte_ok;
2065 if (!fastmap[p[-1]])
2066 break;
2067 back_unibyte_ok:
2068 p--, pos--, pos_byte--;
2069 rarely_quit (pos);
2073 SET_PT_BOTH (pos, pos_byte);
2075 SAFE_FREE ();
2076 return make_number (PT - start_point);
2081 static Lisp_Object
2082 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2084 int c;
2085 unsigned char fastmap[0400];
2086 bool negate = 0;
2087 ptrdiff_t i, i_byte;
2088 bool multibyte;
2089 ptrdiff_t size_byte;
2090 unsigned char *str;
2092 CHECK_STRING (string);
2094 if (NILP (lim))
2095 XSETINT (lim, forwardp ? ZV : BEGV);
2096 else
2097 CHECK_NUMBER_COERCE_MARKER (lim);
2099 /* In any case, don't allow scan outside bounds of buffer. */
2100 if (XINT (lim) > ZV)
2101 XSETFASTINT (lim, ZV);
2102 if (XINT (lim) < BEGV)
2103 XSETFASTINT (lim, BEGV);
2105 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2106 return make_number (0);
2108 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2109 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2111 memset (fastmap, 0, sizeof fastmap);
2113 if (SBYTES (string) > SCHARS (string))
2114 /* As this is very rare case (syntax spec is ASCII only), don't
2115 consider efficiency. */
2116 string = string_make_unibyte (string);
2118 str = SDATA (string);
2119 size_byte = SBYTES (string);
2121 i_byte = 0;
2122 if (i_byte < size_byte
2123 && SREF (string, 0) == '^')
2125 negate = 1; i_byte++;
2128 /* Find the syntaxes specified and set their elements of fastmap. */
2130 while (i_byte < size_byte)
2132 c = str[i_byte++];
2133 fastmap[syntax_spec_code[c]] = 1;
2136 /* If ^ was the first character, complement the fastmap. */
2137 if (negate)
2138 for (i = 0; i < sizeof fastmap; i++)
2139 fastmap[i] ^= 1;
2142 ptrdiff_t start_point = PT;
2143 ptrdiff_t pos = PT;
2144 ptrdiff_t pos_byte = PT_BYTE;
2145 unsigned char *p, *endp, *stop;
2147 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2149 if (forwardp)
2151 while (true)
2153 p = BYTE_POS_ADDR (pos_byte);
2154 endp = XINT (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2155 stop = pos < GPT && GPT < XINT (lim) ? GPT_ADDR : endp;
2159 int nbytes;
2161 if (p >= stop)
2163 if (p >= endp)
2164 goto done;
2165 p = GAP_END_ADDR;
2166 stop = endp;
2168 if (multibyte)
2169 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2170 else
2171 c = *p, nbytes = 1;
2172 if (! fastmap[SYNTAX (c)])
2173 goto done;
2174 p += nbytes, pos++, pos_byte += nbytes;
2175 rarely_quit (pos);
2177 while (!parse_sexp_lookup_properties
2178 || pos < gl_state.e_property);
2180 update_syntax_table_forward (pos + gl_state.offset,
2181 false, gl_state.object);
2184 else
2186 p = BYTE_POS_ADDR (pos_byte);
2187 endp = CHAR_POS_ADDR (XINT (lim));
2188 stop = pos >= GPT && GPT > XINT (lim) ? GAP_END_ADDR : endp;
2190 if (multibyte)
2192 while (true)
2194 if (p <= stop)
2196 if (p <= endp)
2197 break;
2198 p = GPT_ADDR;
2199 stop = endp;
2201 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2203 unsigned char *prev_p = p;
2205 p--;
2206 while (stop <= p && ! CHAR_HEAD_P (*p));
2208 c = STRING_CHAR (p);
2209 if (! fastmap[SYNTAX (c)])
2210 break;
2211 pos--, pos_byte -= prev_p - p;
2212 rarely_quit (pos);
2215 else
2217 while (true)
2219 if (p <= stop)
2221 if (p <= endp)
2222 break;
2223 p = GPT_ADDR;
2224 stop = endp;
2226 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2227 if (! fastmap[SYNTAX (p[-1])])
2228 break;
2229 p--, pos--, pos_byte--;
2230 rarely_quit (pos);
2235 done:
2236 SET_PT_BOTH (pos, pos_byte);
2238 return make_number (PT - start_point);
2242 /* Return true if character C belongs to one of the ISO classes
2243 in the list ISO_CLASSES. Each class is represented by an
2244 integer which is its type according to re_wctype. */
2246 static bool
2247 in_classes (int c, Lisp_Object iso_classes)
2249 bool fits_class = 0;
2251 while (CONSP (iso_classes))
2253 Lisp_Object elt;
2254 elt = XCAR (iso_classes);
2255 iso_classes = XCDR (iso_classes);
2257 if (re_iswctype (c, XFASTINT (elt)))
2258 fits_class = 1;
2261 return fits_class;
2264 /* Jump over a comment, assuming we are at the beginning of one.
2265 FROM is the current position.
2266 FROM_BYTE is the bytepos corresponding to FROM.
2267 Do not move past STOP (a charpos).
2268 The comment over which we have to jump is of style STYLE
2269 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2270 NESTING should be positive to indicate the nesting at the beginning
2271 for nested comments and should be zero or negative else.
2272 ST_COMMENT_STYLE cannot be nested.
2273 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2274 (or 0 If the search cannot start in the middle of a two-character).
2276 If successful, return true and store the charpos of the comment's
2277 end into *CHARPOS_PTR and the corresponding bytepos into
2278 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2279 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2280 current nesting (as defined for state->incomment) in
2281 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2282 comment be a possible first character of a two character construct,
2283 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2284 we store Smax into *last_syntax_ptr.
2286 The comment end is the last character of the comment rather than the
2287 character just after the comment.
2289 Global syntax data is assumed to initially be valid for FROM and
2290 remains valid for forward search starting at the returned position. */
2292 static bool
2293 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2294 EMACS_INT nesting, int style, int prev_syntax,
2295 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2296 EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2298 unsigned short int quit_count = 0;
2299 int c, c1;
2300 enum syntaxcode code;
2301 int syntax, other_syntax;
2303 if (nesting <= 0) nesting = -1;
2305 /* Enter the loop in the middle so that we find
2306 a 2-char comment ender if we start in the middle of it. */
2307 syntax = prev_syntax;
2308 code = syntax & 0xff;
2309 if (syntax != 0 && from < stop) goto forw_incomment;
2311 while (1)
2313 if (from == stop)
2315 *incomment_ptr = nesting;
2316 *charpos_ptr = from;
2317 *bytepos_ptr = from_byte;
2318 *last_syntax_ptr =
2319 (code == Sescape || code == Scharquote
2320 || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2321 || (nesting > 0
2322 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2323 ? syntax : Smax ;
2324 return 0;
2326 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2327 syntax = SYNTAX_WITH_FLAGS (c);
2328 code = syntax & 0xff;
2329 if (code == Sendcomment
2330 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2331 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2332 (nesting > 0 && --nesting == 0) : nesting < 0)
2333 && !(Vcomment_end_can_be_escaped && char_quoted (from, from_byte)))
2334 /* We have encountered a comment end of the same style
2335 as the comment sequence which began this comment
2336 section. */
2337 break;
2338 if (code == Scomment_fence
2339 && style == ST_COMMENT_STYLE)
2340 /* We have encountered a comment end of the same style
2341 as the comment sequence which began this comment
2342 section. */
2343 break;
2344 if (nesting > 0
2345 && code == Scomment
2346 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2347 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2348 /* We have encountered a nested comment of the same style
2349 as the comment sequence which began this comment section. */
2350 nesting++;
2351 INC_BOTH (from, from_byte);
2352 UPDATE_SYNTAX_TABLE_FORWARD (from);
2354 forw_incomment:
2355 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2356 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2357 other_syntax = SYNTAX_WITH_FLAGS (c1),
2358 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2359 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2360 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2361 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2362 ? nesting > 0 : nesting < 0))
2364 syntax = Smax; /* So that "|#" (lisp) can not return
2365 the syntax of "#" in *last_syntax_ptr. */
2366 if (--nesting <= 0)
2367 /* We have encountered a comment end of the same style
2368 as the comment sequence which began this comment section. */
2369 break;
2370 else
2372 INC_BOTH (from, from_byte);
2373 UPDATE_SYNTAX_TABLE_FORWARD (from);
2376 if (nesting > 0
2377 && from < stop
2378 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2379 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2380 other_syntax = SYNTAX_WITH_FLAGS (c1),
2381 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2382 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2383 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2384 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2385 /* We have encountered a nested comment of the same style
2386 as the comment sequence which began this comment section. */
2388 syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2389 INC_BOTH (from, from_byte);
2390 UPDATE_SYNTAX_TABLE_FORWARD (from);
2391 nesting++;
2394 rarely_quit (++quit_count);
2396 *charpos_ptr = from;
2397 *bytepos_ptr = from_byte;
2398 *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2399 used up. */
2400 return 1;
2403 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2404 doc: /*
2405 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2406 Stop scanning if we find something other than a comment or whitespace.
2407 Set point to where scanning stops.
2408 If COUNT comments are found as expected, with nothing except whitespace
2409 between them, return t; otherwise return nil. */)
2410 (Lisp_Object count)
2412 ptrdiff_t from, from_byte, stop;
2413 int c, c1;
2414 enum syntaxcode code;
2415 int comstyle = 0; /* style of comment encountered */
2416 bool comnested = 0; /* whether the comment is nestable or not */
2417 bool found;
2418 EMACS_INT count1;
2419 ptrdiff_t out_charpos, out_bytepos;
2420 EMACS_INT dummy;
2421 int dummy2;
2422 unsigned short int quit_count = 0;
2424 CHECK_NUMBER (count);
2425 count1 = XINT (count);
2426 stop = count1 > 0 ? ZV : BEGV;
2428 from = PT;
2429 from_byte = PT_BYTE;
2431 SETUP_SYNTAX_TABLE (from, count1);
2432 while (count1 > 0)
2436 bool comstart_first;
2437 int syntax, other_syntax;
2439 if (from == stop)
2441 SET_PT_BOTH (from, from_byte);
2442 return Qnil;
2444 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2445 syntax = SYNTAX_WITH_FLAGS (c);
2446 code = SYNTAX (c);
2447 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2448 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2449 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2450 INC_BOTH (from, from_byte);
2451 UPDATE_SYNTAX_TABLE_FORWARD (from);
2452 if (from < stop && comstart_first
2453 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2454 other_syntax = SYNTAX_WITH_FLAGS (c1),
2455 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2457 /* We have encountered a comment start sequence and we
2458 are ignoring all text inside comments. We must record
2459 the comment style this sequence begins so that later,
2460 only a comment end of the same style actually ends
2461 the comment section. */
2462 code = Scomment;
2463 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2464 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2465 INC_BOTH (from, from_byte);
2466 UPDATE_SYNTAX_TABLE_FORWARD (from);
2468 rarely_quit (++quit_count);
2470 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2472 if (code == Scomment_fence)
2473 comstyle = ST_COMMENT_STYLE;
2474 else if (code != Scomment)
2476 DEC_BOTH (from, from_byte);
2477 SET_PT_BOTH (from, from_byte);
2478 return Qnil;
2480 /* We're at the start of a comment. */
2481 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2482 &out_charpos, &out_bytepos, &dummy, &dummy2);
2483 from = out_charpos; from_byte = out_bytepos;
2484 if (!found)
2486 SET_PT_BOTH (from, from_byte);
2487 return Qnil;
2489 INC_BOTH (from, from_byte);
2490 UPDATE_SYNTAX_TABLE_FORWARD (from);
2491 /* We have skipped one comment. */
2492 count1--;
2495 while (count1 < 0)
2497 while (true)
2499 if (from <= stop)
2501 SET_PT_BOTH (BEGV, BEGV_BYTE);
2502 return Qnil;
2505 DEC_BOTH (from, from_byte);
2506 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2507 bool quoted = char_quoted (from, from_byte);
2508 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2509 int syntax = SYNTAX_WITH_FLAGS (c);
2510 code = SYNTAX (c);
2511 comstyle = 0;
2512 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2513 if (code == Sendcomment)
2514 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2515 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2516 && prev_char_comend_first (from, from_byte)
2517 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2519 int other_syntax;
2520 /* We must record the comment style encountered so that
2521 later, we can match only the proper comment begin
2522 sequence of the same style. */
2523 DEC_BOTH (from, from_byte);
2524 code = Sendcomment;
2525 /* Calling char_quoted, above, set up global syntax position
2526 at the new value of FROM. */
2527 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2528 other_syntax = SYNTAX_WITH_FLAGS (c1);
2529 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2530 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2533 if (code == Scomment_fence)
2535 /* Skip until first preceding unquoted comment_fence. */
2536 bool fence_found = 0;
2537 ptrdiff_t ini = from, ini_byte = from_byte;
2539 while (1)
2541 DEC_BOTH (from, from_byte);
2542 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2543 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2544 if (SYNTAX (c) == Scomment_fence
2545 && !char_quoted (from, from_byte))
2547 fence_found = 1;
2548 break;
2550 else if (from == stop)
2551 break;
2552 rarely_quit (++quit_count);
2554 if (fence_found == 0)
2556 from = ini; /* Set point to ini + 1. */
2557 from_byte = ini_byte;
2558 goto leave;
2560 else
2561 /* We have skipped one comment. */
2562 break;
2564 else if (code == Sendcomment)
2566 found = back_comment (from, from_byte, stop, comnested, comstyle,
2567 &out_charpos, &out_bytepos);
2568 if (!found)
2570 if (c == '\n')
2571 /* This end-of-line is not an end-of-comment.
2572 Treat it like a whitespace.
2573 CC-mode (and maybe others) relies on this behavior. */
2575 else
2577 /* Failure: we should go back to the end of this
2578 not-quite-endcomment. */
2579 if (SYNTAX (c) != code)
2580 /* It was a two-char Sendcomment. */
2581 INC_BOTH (from, from_byte);
2582 goto leave;
2585 else
2587 /* We have skipped one comment. */
2588 from = out_charpos, from_byte = out_bytepos;
2589 break;
2592 else if (code != Swhitespace || quoted)
2594 leave:
2595 INC_BOTH (from, from_byte);
2596 SET_PT_BOTH (from, from_byte);
2597 return Qnil;
2600 rarely_quit (++quit_count);
2603 count1++;
2606 SET_PT_BOTH (from, from_byte);
2607 return Qt;
2610 /* Return syntax code of character C if C is an ASCII character
2611 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2613 static enum syntaxcode
2614 syntax_multibyte (int c, bool multibyte_symbol_p)
2616 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2619 static Lisp_Object
2620 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2622 Lisp_Object val;
2623 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2624 int c, c1;
2625 int stringterm;
2626 bool quoted;
2627 bool mathexit = 0;
2628 enum syntaxcode code;
2629 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2630 int comstyle = 0; /* Style of comment encountered. */
2631 bool comnested = 0; /* Whether the comment is nestable or not. */
2632 ptrdiff_t temp_pos;
2633 EMACS_INT last_good = from;
2634 bool found;
2635 ptrdiff_t from_byte;
2636 ptrdiff_t out_bytepos, out_charpos;
2637 EMACS_INT dummy;
2638 int dummy2;
2639 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2640 unsigned short int quit_count = 0;
2642 if (depth > 0) min_depth = 0;
2644 if (from > ZV) from = ZV;
2645 if (from < BEGV) from = BEGV;
2647 from_byte = CHAR_TO_BYTE (from);
2649 maybe_quit ();
2651 SETUP_SYNTAX_TABLE (from, count);
2652 while (count > 0)
2654 while (from < stop)
2656 rarely_quit (++quit_count);
2657 bool comstart_first, prefix;
2658 int syntax, other_syntax;
2659 UPDATE_SYNTAX_TABLE_FORWARD (from);
2660 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2661 syntax = SYNTAX_WITH_FLAGS (c);
2662 code = syntax_multibyte (c, multibyte_symbol_p);
2663 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2664 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2665 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2666 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2667 if (depth == min_depth)
2668 last_good = from;
2669 INC_BOTH (from, from_byte);
2670 UPDATE_SYNTAX_TABLE_FORWARD (from);
2671 if (from < stop && comstart_first
2672 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2673 other_syntax = SYNTAX_WITH_FLAGS (c),
2674 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2675 && parse_sexp_ignore_comments)
2677 /* We have encountered a comment start sequence and we
2678 are ignoring all text inside comments. We must record
2679 the comment style this sequence begins so that later,
2680 only a comment end of the same style actually ends
2681 the comment section. */
2682 code = Scomment;
2683 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2684 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2685 INC_BOTH (from, from_byte);
2686 UPDATE_SYNTAX_TABLE_FORWARD (from);
2689 if (prefix)
2690 continue;
2692 switch (code)
2694 case Sescape:
2695 case Scharquote:
2696 if (from == stop)
2697 goto lose;
2698 INC_BOTH (from, from_byte);
2699 /* Treat following character as a word constituent. */
2700 FALLTHROUGH;
2701 case Sword:
2702 case Ssymbol:
2703 if (depth || !sexpflag) break;
2704 /* This word counts as a sexp; return at end of it. */
2705 while (from < stop)
2707 UPDATE_SYNTAX_TABLE_FORWARD (from);
2709 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2710 switch (syntax_multibyte (c, multibyte_symbol_p))
2712 case Scharquote:
2713 case Sescape:
2714 INC_BOTH (from, from_byte);
2715 if (from == stop)
2716 goto lose;
2717 break;
2718 case Sword:
2719 case Ssymbol:
2720 case Squote:
2721 break;
2722 default:
2723 goto done;
2725 INC_BOTH (from, from_byte);
2726 rarely_quit (++quit_count);
2728 goto done;
2730 case Scomment_fence:
2731 comstyle = ST_COMMENT_STYLE;
2732 FALLTHROUGH;
2733 case Scomment:
2734 if (!parse_sexp_ignore_comments) break;
2735 UPDATE_SYNTAX_TABLE_FORWARD (from);
2736 found = forw_comment (from, from_byte, stop,
2737 comnested, comstyle, 0,
2738 &out_charpos, &out_bytepos, &dummy,
2739 &dummy2);
2740 from = out_charpos, from_byte = out_bytepos;
2741 if (!found)
2743 if (depth == 0)
2744 goto done;
2745 goto lose;
2747 INC_BOTH (from, from_byte);
2748 UPDATE_SYNTAX_TABLE_FORWARD (from);
2749 break;
2751 case Smath:
2752 if (!sexpflag)
2753 break;
2754 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2756 INC_BOTH (from, from_byte);
2758 if (mathexit)
2760 mathexit = 0;
2761 goto close1;
2763 mathexit = 1;
2764 FALLTHROUGH;
2765 case Sopen:
2766 if (!++depth) goto done;
2767 break;
2769 case Sclose:
2770 close1:
2771 if (!--depth) goto done;
2772 if (depth < min_depth)
2773 xsignal3 (Qscan_error,
2774 build_string ("Containing expression ends prematurely"),
2775 make_number (last_good), make_number (from));
2776 break;
2778 case Sstring:
2779 case Sstring_fence:
2780 temp_pos = dec_bytepos (from_byte);
2781 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2782 while (1)
2784 enum syntaxcode c_code;
2785 if (from >= stop)
2786 goto lose;
2787 UPDATE_SYNTAX_TABLE_FORWARD (from);
2788 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2789 c_code = syntax_multibyte (c, multibyte_symbol_p);
2790 if (code == Sstring
2791 ? c == stringterm && c_code == Sstring
2792 : c_code == Sstring_fence)
2793 break;
2795 if (c_code == Scharquote || c_code == Sescape)
2796 INC_BOTH (from, from_byte);
2797 INC_BOTH (from, from_byte);
2798 rarely_quit (++quit_count);
2800 INC_BOTH (from, from_byte);
2801 if (!depth && sexpflag) goto done;
2802 break;
2803 default:
2804 /* Ignore whitespace, punctuation, quote, endcomment. */
2805 break;
2809 /* Reached end of buffer. Error if within object, return nil if between */
2810 if (depth)
2811 goto lose;
2813 return Qnil;
2815 /* End of object reached */
2816 done:
2817 count--;
2820 while (count < 0)
2822 while (from > stop)
2824 rarely_quit (++quit_count);
2825 DEC_BOTH (from, from_byte);
2826 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2827 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2828 int syntax = SYNTAX_WITH_FLAGS (c);
2829 code = syntax_multibyte (c, multibyte_symbol_p);
2830 if (depth == min_depth)
2831 last_good = from;
2832 comstyle = 0;
2833 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2834 if (code == Sendcomment)
2835 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2836 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2837 && prev_char_comend_first (from, from_byte)
2838 && parse_sexp_ignore_comments)
2840 /* We must record the comment style encountered so that
2841 later, we can match only the proper comment begin
2842 sequence of the same style. */
2843 int c2, other_syntax;
2844 DEC_BOTH (from, from_byte);
2845 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2846 code = Sendcomment;
2847 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2848 other_syntax = SYNTAX_WITH_FLAGS (c2);
2849 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2850 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2853 /* Quoting turns anything except a comment-ender
2854 into a word character. Note that this cannot be true
2855 if we decremented FROM in the if-statement above. */
2856 if (code != Sendcomment && char_quoted (from, from_byte))
2858 DEC_BOTH (from, from_byte);
2859 code = Sword;
2861 else if (SYNTAX_FLAGS_PREFIX (syntax))
2862 continue;
2864 switch (code)
2866 case Sword:
2867 case Ssymbol:
2868 case Sescape:
2869 case Scharquote:
2870 if (depth || !sexpflag) break;
2871 /* This word counts as a sexp; count object finished
2872 after passing it. */
2873 while (from > stop)
2875 temp_pos = from_byte;
2876 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2877 DEC_POS (temp_pos);
2878 else
2879 temp_pos--;
2880 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2881 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2882 /* Don't allow comment-end to be quoted. */
2883 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2884 goto done2;
2885 quoted = char_quoted (from - 1, temp_pos);
2886 if (quoted)
2888 DEC_BOTH (from, from_byte);
2889 temp_pos = dec_bytepos (temp_pos);
2890 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2892 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2893 if (! quoted)
2894 switch (syntax_multibyte (c1, multibyte_symbol_p))
2896 case Sword: case Ssymbol: case Squote: break;
2897 default: goto done2;
2899 DEC_BOTH (from, from_byte);
2900 rarely_quit (++quit_count);
2902 goto done2;
2904 case Smath:
2905 if (!sexpflag)
2906 break;
2907 if (from > BEGV)
2909 temp_pos = dec_bytepos (from_byte);
2910 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2911 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2912 DEC_BOTH (from, from_byte);
2914 if (mathexit)
2916 mathexit = 0;
2917 goto open2;
2919 mathexit = 1;
2920 FALLTHROUGH;
2921 case Sclose:
2922 if (!++depth) goto done2;
2923 break;
2925 case Sopen:
2926 open2:
2927 if (!--depth) goto done2;
2928 if (depth < min_depth)
2929 xsignal3 (Qscan_error,
2930 build_string ("Containing expression ends prematurely"),
2931 make_number (last_good), make_number (from));
2932 break;
2934 case Sendcomment:
2935 if (!parse_sexp_ignore_comments)
2936 break;
2937 found = back_comment (from, from_byte, stop, comnested, comstyle,
2938 &out_charpos, &out_bytepos);
2939 /* FIXME: if !found, it really wasn't a comment-end.
2940 For single-char Sendcomment, we can't do much about it apart
2941 from skipping the char.
2942 For 2-char endcomments, we could try again, taking both
2943 chars as separate entities, but it's a lot of trouble
2944 for very little gain, so we don't bother either. -sm */
2945 if (found)
2946 from = out_charpos, from_byte = out_bytepos;
2947 break;
2949 case Scomment_fence:
2950 case Sstring_fence:
2951 while (1)
2953 if (from == stop)
2954 goto lose;
2955 DEC_BOTH (from, from_byte);
2956 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2957 if (!char_quoted (from, from_byte))
2959 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2960 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2961 break;
2963 rarely_quit (++quit_count);
2965 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2966 break;
2968 case Sstring:
2969 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2970 while (true)
2972 if (from == stop)
2973 goto lose;
2974 DEC_BOTH (from, from_byte);
2975 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2976 if (!char_quoted (from, from_byte))
2978 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2979 if (c == stringterm
2980 && (syntax_multibyte (c, multibyte_symbol_p)
2981 == Sstring))
2982 break;
2984 rarely_quit (++quit_count);
2986 if (!depth && sexpflag) goto done2;
2987 break;
2988 default:
2989 /* Ignore whitespace, punctuation, quote, endcomment. */
2990 break;
2994 /* Reached start of buffer. Error if within object, return nil if between */
2995 if (depth)
2996 goto lose;
2998 return Qnil;
3000 done2:
3001 count++;
3005 XSETFASTINT (val, from);
3006 return val;
3008 lose:
3009 xsignal3 (Qscan_error,
3010 build_string ("Unbalanced parentheses"),
3011 make_number (last_good), make_number (from));
3014 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3015 doc: /* Scan from character number FROM by COUNT lists.
3016 Scan forward if COUNT is positive, backward if COUNT is negative.
3017 Return the character number of the position thus found.
3019 A \"list", in this context, refers to a balanced parenthetical
3020 grouping, as determined by the syntax table.
3022 If DEPTH is nonzero, treat that as the nesting depth of the starting
3023 point (i.e. the starting point is DEPTH parentheses deep). This
3024 function scans over parentheses until the depth goes to zero COUNT
3025 times. Hence, positive DEPTH moves out that number of levels of
3026 parentheses, while negative DEPTH moves to a deeper level.
3028 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3030 If we reach the beginning or end of the accessible part of the buffer
3031 before we have scanned over COUNT lists, return nil if the depth at
3032 that point is zero, and signal an error if the depth is nonzero. */)
3033 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3035 CHECK_NUMBER (from);
3036 CHECK_NUMBER (count);
3037 CHECK_NUMBER (depth);
3039 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
3042 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3043 doc: /* Scan from character number FROM by COUNT balanced expressions.
3044 If COUNT is negative, scan backwards.
3045 Returns the character number of the position thus found.
3047 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3049 If the beginning or end of (the accessible part of) the buffer is reached
3050 in the middle of a parenthetical grouping, an error is signaled.
3051 If the beginning or end is reached between groupings
3052 but before count is used up, nil is returned. */)
3053 (Lisp_Object from, Lisp_Object count)
3055 CHECK_NUMBER (from);
3056 CHECK_NUMBER (count);
3058 return scan_lists (XINT (from), XINT (count), 0, 1);
3061 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3062 0, 0, 0,
3063 doc: /* Move point backward over any number of chars with prefix syntax.
3064 This includes chars with expression prefix syntax class (\\=') and those with
3065 the prefix syntax flag (p). */)
3066 (void)
3068 ptrdiff_t beg = BEGV;
3069 ptrdiff_t opoint = PT;
3070 ptrdiff_t opoint_byte = PT_BYTE;
3071 ptrdiff_t pos = PT;
3072 ptrdiff_t pos_byte = PT_BYTE;
3073 int c;
3075 if (pos <= beg)
3077 SET_PT_BOTH (opoint, opoint_byte);
3079 return Qnil;
3082 SETUP_SYNTAX_TABLE (pos, -1);
3084 DEC_BOTH (pos, pos_byte);
3086 while (!char_quoted (pos, pos_byte)
3087 /* Previous statement updates syntax table. */
3088 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3089 || syntax_prefix_flag_p (c)))
3091 opoint = pos;
3092 opoint_byte = pos_byte;
3094 if (pos <= beg)
3095 break;
3096 DEC_BOTH (pos, pos_byte);
3097 rarely_quit (pos);
3100 SET_PT_BOTH (opoint, opoint_byte);
3102 return Qnil;
3106 /* If the character at FROM_BYTE is the second part of a 2-character
3107 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3108 true. */
3109 static bool
3110 in_2char_comment_start (struct lisp_parse_state *state,
3111 int prev_from_syntax,
3112 ptrdiff_t prev_from,
3113 ptrdiff_t from_byte)
3115 int c1, syntax;
3116 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3117 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
3118 syntax = SYNTAX_WITH_FLAGS (c1),
3119 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3121 /* Record the comment style we have entered so that only
3122 the comment-end sequence of the same style actually
3123 terminates the comment section. */
3124 state->comstyle
3125 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3126 bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3127 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3128 state->incomment = comnested ? 1 : -1;
3129 state->comstr_start = prev_from;
3130 return true;
3132 return false;
3135 /* Parse forward from FROM / FROM_BYTE to END,
3136 assuming that FROM has state STATE,
3137 and return a description of the state of the parse at END.
3138 If STOPBEFORE, stop at the start of an atom.
3139 If COMMENTSTOP is 1, stop at the start of a comment.
3140 If COMMENTSTOP is -1, stop at the start or end of a comment,
3141 after the beginning of a string, or after the end of a string. */
3143 static void
3144 scan_sexps_forward (struct lisp_parse_state *state,
3145 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3146 EMACS_INT targetdepth, bool stopbefore,
3147 int commentstop)
3149 enum syntaxcode code;
3150 struct level { ptrdiff_t last, prev; };
3151 struct level levelstart[100];
3152 struct level *curlevel = levelstart;
3153 struct level *endlevel = levelstart + 100;
3154 EMACS_INT depth; /* Paren depth of current scanning location.
3155 level - levelstart equals this except
3156 when the depth becomes negative. */
3157 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3158 bool start_quoted = 0; /* True means starting after a char quote. */
3159 Lisp_Object tem;
3160 ptrdiff_t prev_from; /* Keep one character before FROM. */
3161 ptrdiff_t prev_from_byte;
3162 int prev_from_syntax, prev_prev_from_syntax;
3163 bool boundary_stop = commentstop == -1;
3164 bool nofence;
3165 bool found;
3166 ptrdiff_t out_bytepos, out_charpos;
3167 int temp;
3168 unsigned short int quit_count = 0;
3170 prev_from = from;
3171 prev_from_byte = from_byte;
3172 if (from != BEGV)
3173 DEC_BOTH (prev_from, prev_from_byte);
3175 /* Use this macro instead of `from++'. */
3176 #define INC_FROM \
3177 do { prev_from = from; \
3178 prev_from_byte = from_byte; \
3179 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3180 prev_prev_from_syntax = prev_from_syntax; \
3181 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3182 INC_BOTH (from, from_byte); \
3183 if (from < end) \
3184 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3185 } while (0)
3187 maybe_quit ();
3189 depth = state->depth;
3190 start_quoted = state->quoted;
3191 prev_prev_from_syntax = Smax;
3192 prev_from_syntax = state->prev_syntax;
3194 tem = state->levelstarts;
3195 while (!NILP (tem)) /* >= second enclosing sexps. */
3197 Lisp_Object temhd = Fcar (tem);
3198 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3199 curlevel->last = XINT (temhd);
3200 if (++curlevel == endlevel)
3201 curlevel--; /* error ("Nesting too deep for parser"); */
3202 curlevel->prev = -1;
3203 curlevel->last = -1;
3204 tem = Fcdr (tem);
3206 curlevel->prev = -1;
3207 curlevel->last = -1;
3209 state->quoted = 0;
3210 mindepth = depth;
3212 SETUP_SYNTAX_TABLE (from, 1);
3214 /* Enter the loop at a place appropriate for initial state. */
3216 if (state->incomment)
3217 goto startincomment;
3218 if (state->instring >= 0)
3220 nofence = state->instring != ST_STRING_STYLE;
3221 if (start_quoted)
3222 goto startquotedinstring;
3223 goto startinstring;
3225 else if (start_quoted)
3226 goto startquoted;
3227 else if ((from < end)
3228 && (in_2char_comment_start (state, prev_from_syntax,
3229 prev_from, from_byte)))
3231 INC_FROM;
3232 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3233 goto atcomment;
3236 while (from < end)
3238 rarely_quit (++quit_count);
3239 INC_FROM;
3241 if ((from < end)
3242 && (in_2char_comment_start (state, prev_from_syntax,
3243 prev_from, from_byte)))
3245 INC_FROM;
3246 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3247 goto atcomment;
3250 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3251 continue;
3252 code = prev_from_syntax & 0xff;
3253 switch (code)
3255 case Sescape:
3256 case Scharquote:
3257 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3258 curlevel->last = prev_from;
3259 startquoted:
3260 if (from == end) goto endquoted;
3261 INC_FROM;
3262 goto symstarted;
3263 /* treat following character as a word constituent */
3264 case Sword:
3265 case Ssymbol:
3266 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3267 curlevel->last = prev_from;
3268 symstarted:
3269 while (from < end)
3271 if (in_2char_comment_start (state, prev_from_syntax,
3272 prev_from, from_byte))
3274 INC_FROM;
3275 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3276 goto atcomment;
3279 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3280 switch (SYNTAX (symchar))
3282 case Scharquote:
3283 case Sescape:
3284 INC_FROM;
3285 if (from == end) goto endquoted;
3286 break;
3287 case Sword:
3288 case Ssymbol:
3289 case Squote:
3290 break;
3291 default:
3292 goto symdone;
3294 INC_FROM;
3295 rarely_quit (++quit_count);
3297 symdone:
3298 curlevel->prev = curlevel->last;
3299 break;
3301 case Scomment_fence:
3302 /* Record the comment style we have entered so that only
3303 the comment-end sequence of the same style actually
3304 terminates the comment section. */
3305 state->comstyle = ST_COMMENT_STYLE;
3306 state->incomment = -1;
3307 state->comstr_start = prev_from;
3308 goto atcomment;
3309 case Scomment:
3310 state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3311 state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3312 1 : -1);
3313 state->comstr_start = prev_from;
3314 atcomment:
3315 if (commentstop || boundary_stop) goto done;
3316 startincomment:
3317 /* The (from == BEGV) test was to enter the loop in the middle so
3318 that we find a 2-char comment ender even if we start in the
3319 middle of it. We don't want to do that if we're just at the
3320 beginning of the comment (think of (*) ... (*)). */
3321 found = forw_comment (from, from_byte, end,
3322 state->incomment, state->comstyle,
3323 from == BEGV ? 0 : prev_from_syntax,
3324 &out_charpos, &out_bytepos, &state->incomment,
3325 &prev_from_syntax);
3326 from = out_charpos; from_byte = out_bytepos;
3327 /* Beware! prev_from and friends (except prev_from_syntax)
3328 are invalid now. Luckily, the `done' doesn't use them
3329 and the INC_FROM sets them to a sane value without
3330 looking at them. */
3331 if (!found) goto done;
3332 INC_FROM;
3333 state->incomment = 0;
3334 state->comstyle = 0; /* reset the comment style */
3335 prev_from_syntax = Smax; /* For the comment closer */
3336 if (boundary_stop) goto done;
3337 break;
3339 case Sopen:
3340 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3341 depth++;
3342 /* curlevel++->last ran into compiler bug on Apollo */
3343 curlevel->last = prev_from;
3344 if (++curlevel == endlevel)
3345 curlevel--; /* error ("Nesting too deep for parser"); */
3346 curlevel->prev = -1;
3347 curlevel->last = -1;
3348 if (targetdepth == depth) goto done;
3349 break;
3351 case Sclose:
3352 depth--;
3353 if (depth < mindepth)
3354 mindepth = depth;
3355 if (curlevel != levelstart)
3356 curlevel--;
3357 curlevel->prev = curlevel->last;
3358 if (targetdepth == depth) goto done;
3359 break;
3361 case Sstring:
3362 case Sstring_fence:
3363 state->comstr_start = from - 1;
3364 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3365 curlevel->last = prev_from;
3366 state->instring = (code == Sstring
3367 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3368 : ST_STRING_STYLE);
3369 if (boundary_stop) goto done;
3370 startinstring:
3372 nofence = state->instring != ST_STRING_STYLE;
3374 while (1)
3376 int c;
3377 enum syntaxcode c_code;
3379 if (from >= end) goto done;
3380 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3381 c_code = SYNTAX (c);
3383 /* Check C_CODE here so that if the char has
3384 a syntax-table property which says it is NOT
3385 a string character, it does not end the string. */
3386 if (nofence && c == state->instring && c_code == Sstring)
3387 break;
3389 switch (c_code)
3391 case Sstring_fence:
3392 if (!nofence) goto string_end;
3393 break;
3395 case Scharquote:
3396 case Sescape:
3397 INC_FROM;
3398 startquotedinstring:
3399 if (from >= end) goto endquoted;
3400 break;
3402 default:
3403 break;
3405 INC_FROM;
3406 rarely_quit (++quit_count);
3409 string_end:
3410 state->instring = -1;
3411 curlevel->prev = curlevel->last;
3412 INC_FROM;
3413 if (boundary_stop) goto done;
3414 break;
3416 case Smath:
3417 /* FIXME: We should do something with it. */
3418 break;
3419 default:
3420 /* Ignore whitespace, punctuation, quote, endcomment. */
3421 break;
3424 goto done;
3426 stop: /* Here if stopping before start of sexp. */
3427 from = prev_from; /* We have just fetched the char that starts it; */
3428 from_byte = prev_from_byte;
3429 prev_from_syntax = prev_prev_from_syntax;
3430 goto done; /* but return the position before it. */
3432 endquoted:
3433 state->quoted = 1;
3434 done:
3435 state->depth = depth;
3436 state->mindepth = mindepth;
3437 state->thislevelstart = curlevel->prev;
3438 state->prevlevelstart
3439 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3440 state->location = from;
3441 state->location_byte = from_byte;
3442 state->levelstarts = Qnil;
3443 while (curlevel > levelstart)
3444 state->levelstarts = Fcons (make_number ((--curlevel)->last),
3445 state->levelstarts);
3446 state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3447 || state->quoted) ? prev_from_syntax : Smax;
3450 /* Convert a (lisp) parse state to the internal form used in
3451 scan_sexps_forward. */
3452 static void
3453 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3455 Lisp_Object tem;
3457 if (NILP (external))
3459 state->depth = 0;
3460 state->instring = -1;
3461 state->incomment = 0;
3462 state->quoted = 0;
3463 state->comstyle = 0; /* comment style a by default. */
3464 state->comstr_start = -1; /* no comment/string seen. */
3465 state->levelstarts = Qnil;
3466 state->prev_syntax = Smax;
3468 else
3470 tem = Fcar (external);
3471 if (!NILP (tem))
3472 state->depth = XINT (tem);
3473 else
3474 state->depth = 0;
3476 external = Fcdr (external);
3477 external = Fcdr (external);
3478 external = Fcdr (external);
3479 tem = Fcar (external);
3480 /* Check whether we are inside string_fence-style string: */
3481 state->instring = (!NILP (tem)
3482 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3483 : -1);
3485 external = Fcdr (external);
3486 tem = Fcar (external);
3487 state->incomment = (!NILP (tem)
3488 ? (INTEGERP (tem) ? XINT (tem) : -1)
3489 : 0);
3491 external = Fcdr (external);
3492 tem = Fcar (external);
3493 state->quoted = !NILP (tem);
3495 /* if the eighth element of the list is nil, we are in comment
3496 style a. If it is non-nil, we are in comment style b */
3497 external = Fcdr (external);
3498 external = Fcdr (external);
3499 tem = Fcar (external);
3500 state->comstyle = (NILP (tem)
3502 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3503 ? XINT (tem)
3504 : ST_COMMENT_STYLE));
3506 external = Fcdr (external);
3507 tem = Fcar (external);
3508 state->comstr_start =
3509 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3510 external = Fcdr (external);
3511 tem = Fcar (external);
3512 state->levelstarts = tem;
3514 external = Fcdr (external);
3515 tem = Fcar (external);
3516 state->prev_syntax = NILP (tem) ? Smax : XINT (tem);
3520 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3521 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3522 Parsing stops at TO or when certain criteria are met;
3523 point is set to where parsing stops.
3524 If fifth arg OLDSTATE is omitted or nil,
3525 parsing assumes that FROM is the beginning of a function.
3527 Value is a list of elements describing final state of parsing:
3528 0. depth in parens.
3529 1. character address of start of innermost containing list; nil if none.
3530 2. character address of start of last complete sexp terminated.
3531 3. non-nil if inside a string.
3532 (it is the character that will terminate the string,
3533 or t if the string should be terminated by a generic string delimiter.)
3534 4. nil if outside a comment, t if inside a non-nestable comment,
3535 else an integer (the current comment nesting).
3536 5. t if following a quote character.
3537 6. the minimum paren-depth encountered during this scan.
3538 7. style of comment, if any.
3539 8. character address of start of comment or string; nil if not in one.
3540 9. List of positions of currently open parens, outermost first.
3541 10. When the last position scanned holds the first character of a
3542 (potential) two character construct, the syntax of that position,
3543 otherwise nil. That construct can be a two character comment
3544 delimiter or an Escaped or Char-quoted character.
3545 11..... Possible further internal information used by `parse-partial-sexp'.
3547 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3548 in parentheses becomes equal to TARGETDEPTH.
3549 Fourth arg STOPBEFORE non-nil means stop when we come to
3550 any character that starts a sexp.
3551 Fifth arg OLDSTATE is a list like what this function returns.
3552 It is used to initialize the state of the parse. Elements number 1, 2, 6
3553 are ignored.
3554 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3555 If it is the symbol `syntax-table', stop after the start of a comment or a
3556 string, or after end of a comment or a string. */)
3557 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3558 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3560 struct lisp_parse_state state;
3561 EMACS_INT target;
3563 if (!NILP (targetdepth))
3565 CHECK_NUMBER (targetdepth);
3566 target = XINT (targetdepth);
3568 else
3569 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth. */
3571 validate_region (&from, &to);
3572 internalize_parse_state (oldstate, &state);
3573 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3574 XINT (to),
3575 target, !NILP (stopbefore),
3576 (NILP (commentstop)
3577 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3579 SET_PT_BOTH (state.location, state.location_byte);
3581 return
3582 Fcons (make_number (state.depth),
3583 Fcons (state.prevlevelstart < 0
3584 ? Qnil : make_number (state.prevlevelstart),
3585 Fcons (state.thislevelstart < 0
3586 ? Qnil : make_number (state.thislevelstart),
3587 Fcons (state.instring >= 0
3588 ? (state.instring == ST_STRING_STYLE
3589 ? Qt : make_number (state.instring)) : Qnil,
3590 Fcons (state.incomment < 0 ? Qt :
3591 (state.incomment == 0 ? Qnil :
3592 make_number (state.incomment)),
3593 Fcons (state.quoted ? Qt : Qnil,
3594 Fcons (make_number (state.mindepth),
3595 Fcons ((state.comstyle
3596 ? (state.comstyle == ST_COMMENT_STYLE
3597 ? Qsyntax_table
3598 : make_number (state.comstyle))
3599 : Qnil),
3600 Fcons (((state.incomment
3601 || (state.instring >= 0))
3602 ? make_number (state.comstr_start)
3603 : Qnil),
3604 Fcons (state.levelstarts,
3605 Fcons (state.prev_syntax == Smax
3606 ? Qnil
3607 : make_number (state.prev_syntax),
3608 Qnil)))))))))));
3611 void
3612 init_syntax_once (void)
3614 register int i, c;
3615 Lisp_Object temp;
3617 /* This has to be done here, before we call Fmake_char_table. */
3618 DEFSYM (Qsyntax_table, "syntax-table");
3620 /* Create objects which can be shared among syntax tables. */
3621 Vsyntax_code_object = make_uninit_vector (Smax);
3622 for (i = 0; i < Smax; i++)
3623 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3625 /* Now we are ready to set up this property, so we can
3626 create syntax tables. */
3627 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3629 temp = AREF (Vsyntax_code_object, Swhitespace);
3631 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3633 /* Control characters should not be whitespace. */
3634 temp = AREF (Vsyntax_code_object, Spunct);
3635 for (i = 0; i <= ' ' - 1; i++)
3636 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3637 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3639 /* Except that a few really are whitespace. */
3640 temp = AREF (Vsyntax_code_object, Swhitespace);
3641 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3642 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3643 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3644 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3645 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3647 temp = AREF (Vsyntax_code_object, Sword);
3648 for (i = 'a'; i <= 'z'; i++)
3649 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3650 for (i = 'A'; i <= 'Z'; i++)
3651 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3652 for (i = '0'; i <= '9'; i++)
3653 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3655 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3656 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3658 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3659 Fcons (make_number (Sopen), make_number (')')));
3660 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3661 Fcons (make_number (Sclose), make_number ('(')));
3662 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3663 Fcons (make_number (Sopen), make_number (']')));
3664 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3665 Fcons (make_number (Sclose), make_number ('[')));
3666 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3667 Fcons (make_number (Sopen), make_number ('}')));
3668 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3669 Fcons (make_number (Sclose), make_number ('{')));
3670 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3671 Fcons (make_number (Sstring), Qnil));
3672 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3673 Fcons (make_number (Sescape), Qnil));
3675 temp = AREF (Vsyntax_code_object, Ssymbol);
3676 for (i = 0; i < 10; i++)
3678 c = "_-+*/&|<>="[i];
3679 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3682 temp = AREF (Vsyntax_code_object, Spunct);
3683 for (i = 0; i < 12; i++)
3685 c = ".,;:?!#@~^'`"[i];
3686 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3689 /* All multibyte characters have syntax `word' by default. */
3690 temp = AREF (Vsyntax_code_object, Sword);
3691 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3694 void
3695 syms_of_syntax (void)
3697 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3699 staticpro (&Vsyntax_code_object);
3701 staticpro (&gl_state.object);
3702 staticpro (&gl_state.global_code);
3703 staticpro (&gl_state.current_syntax_table);
3704 staticpro (&gl_state.old_prop);
3706 /* Defined in regex.c. */
3707 staticpro (&re_match_object);
3709 DEFSYM (Qscan_error, "scan-error");
3710 Fput (Qscan_error, Qerror_conditions,
3711 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3712 Fput (Qscan_error, Qerror_message,
3713 build_pure_c_string ("Scan error"));
3715 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3716 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3718 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3719 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3720 Otherwise, that text property is simply ignored.
3721 See the info node `(elisp)Syntax Properties' for a description of the
3722 `syntax-table' property. */);
3724 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3725 doc: /* Position up to which syntax-table properties have been set. */);
3726 syntax_propertize__done = -1;
3727 DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3728 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3730 words_include_escapes = 0;
3731 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3732 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3734 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3735 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3736 multibyte_syntax_as_symbol = 0;
3738 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3739 open_paren_in_column_0_is_defun_start,
3740 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3741 open_paren_in_column_0_is_defun_start = 1;
3744 DEFVAR_LISP ("find-word-boundary-function-table",
3745 Vfind_word_boundary_function_table,
3746 doc: /*
3747 Char table of functions to search for the word boundary.
3748 Each function is called with two arguments; POS and LIMIT.
3749 POS and LIMIT are character positions in the current buffer.
3751 If POS is less than LIMIT, POS is at the first character of a word,
3752 and the return value of a function should be a position after the
3753 last character of that word.
3755 If POS is not less than LIMIT, POS is at the last character of a word,
3756 and the return value of a function should be a position at the first
3757 character of that word.
3759 In both cases, LIMIT bounds the search. */);
3760 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3762 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped,
3763 doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
3764 Vcomment_end_can_be_escaped = 0;
3765 DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
3766 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
3768 defsubr (&Ssyntax_table_p);
3769 defsubr (&Ssyntax_table);
3770 defsubr (&Sstandard_syntax_table);
3771 defsubr (&Scopy_syntax_table);
3772 defsubr (&Sset_syntax_table);
3773 defsubr (&Schar_syntax);
3774 defsubr (&Smatching_paren);
3775 defsubr (&Sstring_to_syntax);
3776 defsubr (&Smodify_syntax_entry);
3777 defsubr (&Sinternal_describe_syntax_value);
3779 defsubr (&Sforward_word);
3781 defsubr (&Sskip_chars_forward);
3782 defsubr (&Sskip_chars_backward);
3783 defsubr (&Sskip_syntax_forward);
3784 defsubr (&Sskip_syntax_backward);
3786 defsubr (&Sforward_comment);
3787 defsubr (&Sscan_lists);
3788 defsubr (&Sscan_sexps);
3789 defsubr (&Sbackward_prefix_chars);
3790 defsubr (&Sparse_partial_sexp);