Do not use switch-to-buffer for working in a temp buffer
[emacs.git] / src / syntax.c
blob7aa43e6e5c77fbb954a0cb20392f9cec9670b876
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2017 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include "lisp.h"
24 #include "character.h"
25 #include "buffer.h"
26 #include "regex.h"
27 #include "syntax.h"
28 #include "intervals.h"
29 #include "category.h"
31 /* Make syntax table lookup grant data in gl_state. */
32 #define SYNTAX(c) syntax_property (c, 1)
33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
36 /* Eight single-bit flags have the following meanings:
37 1. This character is the first of a two-character comment-start sequence.
38 2. This character is the second of a two-character comment-start sequence.
39 3. This character is the first of a two-character comment-end sequence.
40 4. This character is the second of a two-character comment-end sequence.
41 5. This character is a prefix, for backward-prefix-chars.
42 6. The char is part of a delimiter for comments of style "b".
43 7. This character is part of a nestable comment sequence.
44 8. The char is part of a delimiter for comments of style "c".
45 Note that any two-character sequence whose first character has flag 1
46 and whose second character has flag 2 will be interpreted as a comment start.
48 Bits 6 and 8 discriminate among different comment styles.
49 Languages such as C++ allow two orthogonal syntax start/end pairs
50 and bit 6 determines whether a comment-end or Scommentend
51 ends style a or b. Comment markers can start style a, b, c, or bc.
52 Style a is always the default.
53 For 2-char comment markers, the style b flag is looked up only on the second
54 char of the comment marker and on the first char of the comment ender.
55 For style c (like the nested flag), the flag can be placed on any of
56 the chars. */
58 /* These functions extract specific flags from an integer
59 that holds the syntax code and the flags. */
61 static bool
62 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
64 return (flags >> 16) & 1;
66 static bool
67 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
69 return (flags >> 17) & 1;
71 static bool
72 SYNTAX_FLAGS_COMEND_FIRST (int flags)
74 return (flags >> 18) & 1;
76 static bool
77 SYNTAX_FLAGS_COMEND_SECOND (int flags)
79 return (flags >> 19) & 1;
81 static bool
82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
84 return (flags & 0x50000) != 0;
86 static bool
87 SYNTAX_FLAGS_PREFIX (int flags)
89 return (flags >> 20) & 1;
91 static bool
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
94 return (flags >> 21) & 1;
96 static bool
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
99 return (flags >> 23) & 1;
101 static int
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
104 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
106 static bool
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
109 return (flags >> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
114 static int
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
122 /* Extract a particular flag for a given character. */
124 static bool
125 SYNTAX_COMEND_FIRST (int c)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
134 enum
136 ST_COMMENT_STYLE = 256 + 1,
137 ST_STRING_STYLE = 256 + 2
140 /* This is the internal form of the parse state used in parse-partial-sexp. */
142 struct lisp_parse_state
144 EMACS_INT depth; /* Depth at end of parsing. */
145 int instring; /* -1 if not within string, else desired terminator. */
146 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
147 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
148 bool quoted; /* True if just after an escape char at end of parsing. */
149 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
150 /* Char number of most recent start-of-expression at current level */
151 ptrdiff_t thislevelstart;
152 /* Char number of start of containing expression */
153 ptrdiff_t prevlevelstart;
154 ptrdiff_t location; /* Char number at which parsing stopped. */
155 ptrdiff_t location_byte; /* Corresponding byte position. */
156 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
157 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
158 of levels (starting from outermost). */
159 int prev_syntax; /* Syntax of previous position scanned, when
160 that position (potentially) holds the first char
161 of a 2-char construct, i.e. comment delimiter
162 or Sescape, etc. Smax otherwise. */
165 /* These variables are a cache for finding the start of a defun.
166 find_start_pos is the place for which the defun start was found.
167 find_start_value is the defun start position found for it.
168 find_start_value_byte is the corresponding byte position.
169 find_start_buffer is the buffer it was found in.
170 find_start_begv is the BEGV value when it was found.
171 find_start_modiff is the value of MODIFF when it was found. */
173 static ptrdiff_t find_start_pos;
174 static ptrdiff_t find_start_value;
175 static ptrdiff_t find_start_value_byte;
176 static struct buffer *find_start_buffer;
177 static ptrdiff_t find_start_begv;
178 static EMACS_INT find_start_modiff;
181 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
182 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
183 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
184 static void scan_sexps_forward (struct lisp_parse_state *,
185 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
186 bool, int);
187 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
188 static bool in_classes (int, Lisp_Object);
189 static void parse_sexp_propertize (ptrdiff_t charpos);
191 /* This setter is used only in this file, so it can be private. */
192 static void
193 bset_syntax_table (struct buffer *b, Lisp_Object val)
195 b->syntax_table_ = val;
198 /* Whether the syntax of the character C has the prefix flag set. */
199 bool
200 syntax_prefix_flag_p (int c)
202 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
205 struct gl_state_s gl_state; /* Global state of syntax parser. */
207 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
208 to scan to property-change. */
210 /* Set the syntax entry VAL for char C in table TABLE. */
212 static void
213 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
215 CHAR_TABLE_SET (table, c, val);
218 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
219 RANGE is a cons (FROM . TO) specifying the range of characters. */
221 static void
222 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
223 Lisp_Object val)
225 Fset_char_table_range (table, range, val);
228 /* Extract the information from the entry for character C
229 in the current syntax table. */
231 static Lisp_Object
232 SYNTAX_MATCH (int c)
234 Lisp_Object ent = SYNTAX_ENTRY (c);
235 return CONSP (ent) ? XCDR (ent) : Qnil;
238 /* This should be called with FROM at the start of forward
239 search, or after the last position of the backward search. It
240 makes sure that the first char is picked up with correct table, so
241 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
242 call.
243 Sign of COUNT gives the direction of the search.
246 static void
247 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
249 SETUP_BUFFER_SYNTAX_TABLE ();
250 gl_state.b_property = BEGV;
251 gl_state.e_property = ZV + 1;
252 gl_state.object = Qnil;
253 gl_state.offset = 0;
254 if (parse_sexp_lookup_properties)
256 if (count > 0)
257 update_syntax_table_forward (from, true, Qnil);
258 else if (from > BEGV)
260 update_syntax_table (from - 1, count, true, Qnil);
261 parse_sexp_propertize (from - 1);
266 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
267 If it is t (which is only used in fast_c_string_match_ignore_case),
268 ignore properties altogether.
270 This is meant for regex.c to use. For buffers, regex.c passes arguments
271 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
272 So if it is a buffer, we set the offset field to BEGV. */
274 void
275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
276 ptrdiff_t from, ptrdiff_t count)
278 SETUP_BUFFER_SYNTAX_TABLE ();
279 gl_state.object = object;
280 if (BUFFERP (gl_state.object))
282 struct buffer *buf = XBUFFER (gl_state.object);
283 gl_state.b_property = 1;
284 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
285 gl_state.offset = BUF_BEGV (buf) - 1;
287 else if (NILP (gl_state.object))
289 gl_state.b_property = 1;
290 gl_state.e_property = ZV - BEGV + 1;
291 gl_state.offset = BEGV - 1;
293 else if (EQ (gl_state.object, Qt))
295 gl_state.b_property = 0;
296 gl_state.e_property = PTRDIFF_MAX;
297 gl_state.offset = 0;
299 else
301 gl_state.b_property = 0;
302 gl_state.e_property = 1 + SCHARS (gl_state.object);
303 gl_state.offset = 0;
305 if (parse_sexp_lookup_properties)
306 update_syntax_table (from + gl_state.offset - (count <= 0),
307 count, 1, gl_state.object);
310 /* Update gl_state to an appropriate interval which contains CHARPOS. The
311 sign of COUNT give the relative position of CHARPOS wrt the previously
312 valid interval. If INIT, only [be]_property fields of gl_state are
313 valid at start, the rest is filled basing on OBJECT.
315 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316 direction than the intervals - or in an interval. We update the
317 current syntax-table basing on the property of this interval, and
318 update the interval to start further than CHARPOS - or be
319 NULL. We also update lim_property to be the next value of
320 charpos to call this subroutine again - or be before/after the
321 start/end of OBJECT. */
323 void
324 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
325 Lisp_Object object)
327 Lisp_Object tmp_table;
328 int cnt = 0;
329 bool invalidate = true;
330 INTERVAL i;
332 if (init)
334 gl_state.old_prop = Qnil;
335 gl_state.start = gl_state.b_property;
336 gl_state.stop = gl_state.e_property;
337 i = interval_of (charpos, object);
338 gl_state.backward_i = gl_state.forward_i = i;
339 invalidate = false;
340 if (!i)
341 return;
342 /* interval_of updates only ->position of the return value, so
343 update the parents manually to speed up update_interval. */
344 while (!NULL_PARENT (i))
346 if (AM_RIGHT_CHILD (i))
347 INTERVAL_PARENT (i)->position = i->position
348 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
349 - TOTAL_LENGTH (INTERVAL_PARENT (i))
350 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
351 else
352 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
353 + TOTAL_LENGTH (i);
354 i = INTERVAL_PARENT (i);
356 i = gl_state.forward_i;
357 gl_state.b_property = i->position - gl_state.offset;
358 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
359 goto update;
361 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
363 /* We are guaranteed to be called with CHARPOS either in i,
364 or further off. */
365 if (!i)
366 error ("Error in syntax_table logic for to-the-end intervals");
367 else if (charpos < i->position) /* Move left. */
369 if (count > 0)
370 error ("Error in syntax_table logic for intervals <-");
371 /* Update the interval. */
372 i = update_interval (i, charpos);
373 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
375 invalidate = false;
376 gl_state.forward_i = i;
377 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
380 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
382 if (count < 0)
383 error ("Error in syntax_table logic for intervals ->");
384 /* Update the interval. */
385 i = update_interval (i, charpos);
386 if (i->position != gl_state.e_property)
388 invalidate = false;
389 gl_state.backward_i = i;
390 gl_state.b_property = i->position - gl_state.offset;
394 update:
395 tmp_table = textget (i->plist, Qsyntax_table);
397 if (invalidate)
398 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
400 if (invalidate) /* Did not get to adjacent interval. */
401 { /* with the same table => */
402 /* invalidate the old range. */
403 if (count > 0)
405 gl_state.backward_i = i;
406 gl_state.b_property = i->position - gl_state.offset;
408 else
410 gl_state.forward_i = i;
411 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
415 if (!EQ (tmp_table, gl_state.old_prop))
417 gl_state.current_syntax_table = tmp_table;
418 gl_state.old_prop = tmp_table;
419 if (EQ (Fsyntax_table_p (tmp_table), Qt))
421 gl_state.use_global = 0;
423 else if (CONSP (tmp_table))
425 gl_state.use_global = 1;
426 gl_state.global_code = tmp_table;
428 else
430 gl_state.use_global = 0;
431 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
435 while (i)
437 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
439 if (count > 0)
441 gl_state.e_property = i->position - gl_state.offset;
442 gl_state.forward_i = i;
444 else
446 gl_state.b_property
447 = i->position + LENGTH (i) - gl_state.offset;
448 gl_state.backward_i = i;
450 return;
452 else if (cnt == INTERVALS_AT_ONCE)
454 if (count > 0)
456 gl_state.e_property
457 = i->position + LENGTH (i) - gl_state.offset
458 /* e_property at EOB is not set to ZV but to ZV+1, so that
459 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
460 having to check eob between the two. */
461 + (next_interval (i) ? 0 : 1);
462 gl_state.forward_i = i;
464 else
466 gl_state.b_property = i->position - gl_state.offset;
467 gl_state.backward_i = i;
469 return;
471 cnt++;
472 i = count > 0 ? next_interval (i) : previous_interval (i);
474 eassert (i == NULL); /* This property goes to the end. */
475 if (count > 0)
477 gl_state.e_property = gl_state.stop;
478 gl_state.forward_i = i;
480 else
481 gl_state.b_property = gl_state.start;
484 static void
485 parse_sexp_propertize (ptrdiff_t charpos)
487 EMACS_INT zv = ZV;
488 if (syntax_propertize__done <= charpos
489 && syntax_propertize__done < zv)
491 EMACS_INT modiffs = CHARS_MODIFF;
492 safe_call1 (Qinternal__syntax_propertize,
493 make_number (min (zv, 1 + charpos)));
494 if (modiffs != CHARS_MODIFF)
495 error ("parse-sexp-propertize-function modified the buffer!");
496 if (syntax_propertize__done <= charpos
497 && syntax_propertize__done < zv)
498 error ("parse-sexp-propertize-function did not move"
499 " syntax-propertize--done");
500 SETUP_SYNTAX_TABLE (charpos, 1);
502 else if (gl_state.e_property > syntax_propertize__done)
504 gl_state.e_property = syntax_propertize__done;
505 gl_state.e_property_truncated = true;
507 else if (gl_state.e_property_truncated
508 && gl_state.e_property < syntax_propertize__done)
509 { /* When moving backward, e_property might be set without resetting
510 e_property_truncated, so the e_property_truncated flag may
511 occasionally be left raised spuriously. This should be rare. */
512 gl_state.e_property_truncated = false;
513 update_syntax_table_forward (charpos, false, Qnil);
517 void
518 update_syntax_table_forward (ptrdiff_t charpos, bool init,
519 Lisp_Object object)
521 if (gl_state.e_property_truncated)
523 eassert (NILP (object));
524 eassert (charpos >= gl_state.e_property);
525 parse_sexp_propertize (charpos);
527 else
529 update_syntax_table (charpos, 1, init, object);
530 if (NILP (object) && gl_state.e_property > syntax_propertize__done)
531 parse_sexp_propertize (charpos);
535 /* Returns true if char at CHARPOS is quoted.
536 Global syntax-table data should be set up already to be good at CHARPOS
537 or after. On return global syntax data is good for lookup at CHARPOS. */
539 static bool
540 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
542 enum syntaxcode code;
543 ptrdiff_t beg = BEGV;
544 bool quoted = 0;
545 ptrdiff_t orig = charpos;
547 while (charpos > beg)
549 int c;
550 DEC_BOTH (charpos, bytepos);
552 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
553 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
554 code = SYNTAX (c);
555 if (! (code == Scharquote || code == Sescape))
556 break;
558 quoted = !quoted;
561 UPDATE_SYNTAX_TABLE (orig);
562 return quoted;
565 /* Return the bytepos one character before BYTEPOS.
566 We assume that BYTEPOS is not at the start of the buffer. */
568 static ptrdiff_t
569 dec_bytepos (ptrdiff_t bytepos)
571 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
572 return bytepos - 1;
574 DEC_POS (bytepos);
575 return bytepos;
578 /* Return a defun-start position before POS and not too far before.
579 It should be the last one before POS, or nearly the last.
581 When open_paren_in_column_0_is_defun_start is nonzero,
582 only the beginning of the buffer is treated as a defun-start.
584 We record the information about where the scan started
585 and what its result was, so that another call in the same area
586 can return the same value very quickly.
588 There is no promise at which position the global syntax data is
589 valid on return from the subroutine, so the caller should explicitly
590 update the global data. */
592 static ptrdiff_t
593 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
595 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
597 /* Use previous finding, if it's valid and applies to this inquiry. */
598 if (current_buffer == find_start_buffer
599 /* Reuse the defun-start even if POS is a little farther on.
600 POS might be in the next defun, but that's ok.
601 Our value may not be the best possible, but will still be usable. */
602 && pos <= find_start_pos + 1000
603 && pos >= find_start_value
604 && BEGV == find_start_begv
605 && MODIFF == find_start_modiff)
606 return find_start_value;
608 if (!open_paren_in_column_0_is_defun_start)
610 find_start_value = BEGV;
611 find_start_value_byte = BEGV_BYTE;
612 goto found;
615 /* Back up to start of line. */
616 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
618 /* We optimize syntax-table lookup for rare updates. Thus we accept
619 only those `^\s(' which are good in global _and_ text-property
620 syntax-tables. */
621 SETUP_BUFFER_SYNTAX_TABLE ();
622 while (PT > BEGV)
624 /* Open-paren at start of line means we may have found our
625 defun-start. */
626 int c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
627 if (SYNTAX (c) == Sopen)
629 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
630 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
631 if (SYNTAX (c) == Sopen)
632 break;
633 /* Now fallback to the default value. */
634 SETUP_BUFFER_SYNTAX_TABLE ();
636 /* Move to beg of previous line. */
637 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
640 /* Record what we found, for the next try. */
641 find_start_value = PT;
642 find_start_value_byte = PT_BYTE;
643 TEMP_SET_PT_BOTH (opoint, opoint_byte);
645 found:
646 find_start_buffer = current_buffer;
647 find_start_modiff = MODIFF;
648 find_start_begv = BEGV;
649 find_start_pos = pos;
651 return find_start_value;
654 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
656 static bool
657 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
659 int c;
660 bool val;
662 DEC_BOTH (pos, pos_byte);
663 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
664 c = FETCH_CHAR (pos_byte);
665 val = SYNTAX_COMEND_FIRST (c);
666 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
667 return val;
670 /* Check whether charpos FROM is at the end of a comment.
671 FROM_BYTE is the bytepos corresponding to FROM.
672 Do not move back before STOP.
674 Return true if we find a comment ending at FROM/FROM_BYTE.
676 If successful, store the charpos of the comment's beginning
677 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
679 Global syntax data remains valid for backward search starting at
680 the returned value (or at FROM, if the search was not successful). */
682 static bool
683 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
684 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
685 ptrdiff_t *bytepos_ptr)
687 /* Look back, counting the parity of string-quotes,
688 and recording the comment-starters seen.
689 When we reach a safe place, assume that's not in a string;
690 then step the main scan to the earliest comment-starter seen
691 an even number of string quotes away from the safe place.
693 OFROM[I] is position of the earliest comment-starter seen
694 which is I+2X quotes from the comment-end.
695 PARITY is current parity of quotes from the comment end. */
696 int string_style = -1; /* Presumed outside of any string. */
697 bool string_lossage = 0;
698 /* Not a real lossage: indicates that we have passed a matching comment
699 starter plus a non-matching comment-ender, meaning that any matching
700 comment-starter we might see later could be a false positive (hidden
701 inside another comment).
702 Test case: { a (* b } c (* d *) */
703 bool comment_lossage = 0;
704 ptrdiff_t comment_end = from;
705 ptrdiff_t comment_end_byte = from_byte;
706 ptrdiff_t comstart_pos = 0;
707 ptrdiff_t comstart_byte;
708 /* Place where the containing defun starts,
709 or 0 if we didn't come across it yet. */
710 ptrdiff_t defun_start = 0;
711 ptrdiff_t defun_start_byte = 0;
712 enum syntaxcode code;
713 ptrdiff_t nesting = 1; /* Current comment nesting. */
714 int c;
715 int syntax = 0;
716 unsigned short int quit_count = 0;
718 /* FIXME: A }} comment-ender style leads to incorrect behavior
719 in the case of {{ c }}} because we ignore the last two chars which are
720 assumed to be comment-enders although they aren't. */
722 /* At beginning of range to scan, we're outside of strings;
723 that determines quote parity to the comment-end. */
724 while (from != stop)
726 rarely_quit (++quit_count);
728 ptrdiff_t temp_byte;
729 int prev_syntax;
730 bool com2start, com2end, comstart;
732 /* Move back and examine a character. */
733 DEC_BOTH (from, from_byte);
734 UPDATE_SYNTAX_TABLE_BACKWARD (from);
736 prev_syntax = syntax;
737 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
738 syntax = SYNTAX_WITH_FLAGS (c);
739 code = SYNTAX (c);
741 /* Check for 2-char comment markers. */
742 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
743 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
744 && (comstyle
745 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
746 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
747 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
748 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
749 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
750 comstart = (com2start || code == Scomment);
752 /* Nasty cases with overlapping 2-char comment markers:
753 - snmp-mode: -- c -- foo -- c --
754 --- c --
755 ------ c --
756 - c-mode: *||*
757 |* *|* *|
758 |*| |* |*|
759 /// */
761 /* If a 2-char comment sequence partly overlaps with another,
762 we don't try to be clever. E.g. |*| in C, or }% in modes that
763 have %..\n and %{..}%. */
764 if (from > stop && (com2end || comstart))
766 ptrdiff_t next = from, next_byte = from_byte;
767 int next_c, next_syntax;
768 DEC_BOTH (next, next_byte);
769 UPDATE_SYNTAX_TABLE_BACKWARD (next);
770 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
771 next_syntax = SYNTAX_WITH_FLAGS (next_c);
772 if (((comstart || comnested)
773 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
774 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
775 || ((com2end || comnested)
776 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
777 && (comstyle
778 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
779 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
780 goto lossage;
781 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
784 if (com2start && comstart_pos == 0)
785 /* We're looking at a comment starter. But it might be a comment
786 ender as well (see snmp-mode). The first time we see one, we
787 need to consider it as a comment starter,
788 and the subsequent times as a comment ender. */
789 com2end = 0;
791 /* Turn a 2-char comment sequences into the appropriate syntax. */
792 if (com2end)
793 code = Sendcomment;
794 else if (com2start)
795 code = Scomment;
796 /* Ignore comment starters of a different style. */
797 else if (code == Scomment
798 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
799 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
800 continue;
802 /* Ignore escaped characters, except comment-enders which cannot
803 be escaped. */
804 if ((Vcomment_end_can_be_escaped || code != Sendcomment)
805 && char_quoted (from, from_byte))
806 continue;
808 switch (code)
810 case Sstring_fence:
811 case Scomment_fence:
812 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
813 case Sstring:
814 /* Track parity of quotes. */
815 if (string_style == -1)
816 /* Entering a string. */
817 string_style = c;
818 else if (string_style == c)
819 /* Leaving the string. */
820 string_style = -1;
821 else
822 /* If we have two kinds of string delimiters.
823 There's no way to grok this scanning backwards. */
824 string_lossage = 1;
825 break;
827 case Scomment:
828 /* We've already checked that it is the relevant comstyle. */
829 if (string_style != -1 || comment_lossage || string_lossage)
830 /* There are odd string quotes involved, so let's be careful.
831 Test case in Pascal: " { " a { " } */
832 goto lossage;
834 if (!comnested)
836 /* Record best comment-starter so far. */
837 comstart_pos = from;
838 comstart_byte = from_byte;
840 else if (--nesting <= 0)
841 /* nested comments have to be balanced, so we don't need to
842 keep looking for earlier ones. We use here the same (slightly
843 incorrect) reasoning as below: since it is followed by uniform
844 paired string quotes, this comment-start has to be outside of
845 strings, else the comment-end itself would be inside a string. */
846 goto done;
847 break;
849 case Sendcomment:
850 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
851 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
852 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
853 /* This is the same style of comment ender as ours. */
855 if (comnested)
856 nesting++;
857 else
858 /* Anything before that can't count because it would match
859 this comment-ender rather than ours. */
860 from = stop; /* Break out of the loop. */
862 else if (comstart_pos != 0 || c != '\n')
863 /* We're mixing comment styles here, so we'd better be careful.
864 The (comstart_pos != 0 || c != '\n') check is not quite correct
865 (we should just always set comment_lossage), but removing it
866 would imply that any multiline comment in C would go through
867 lossage, which seems overkill.
868 The failure should only happen in the rare cases such as
869 { (* } *) */
870 comment_lossage = 1;
871 break;
873 case Sopen:
874 /* Assume a defun-start point is outside of strings. */
875 if (open_paren_in_column_0_is_defun_start
876 && (from == stop
877 || (temp_byte = dec_bytepos (from_byte),
878 FETCH_CHAR (temp_byte) == '\n')))
880 defun_start = from;
881 defun_start_byte = from_byte;
882 from = stop; /* Break out of the loop. */
884 break;
886 default:
887 break;
891 if (comstart_pos == 0)
893 from = comment_end;
894 from_byte = comment_end_byte;
895 UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
897 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
898 or `done'), then we've found the beginning of the non-nested comment. */
899 else if (1) /* !comnested */
901 from = comstart_pos;
902 from_byte = comstart_byte;
903 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
905 else lossage:
907 struct lisp_parse_state state;
908 bool adjusted = true;
909 /* We had two kinds of string delimiters mixed up
910 together. Decode this going forwards.
911 Scan fwd from a known safe place (beginning-of-defun)
912 to the one in question; this records where we
913 last passed a comment starter. */
914 /* If we did not already find the defun start, find it now. */
915 if (defun_start == 0)
917 defun_start = find_defun_start (comment_end, comment_end_byte);
918 defun_start_byte = find_start_value_byte;
919 adjusted = (defun_start > BEGV);
923 internalize_parse_state (Qnil, &state);
924 scan_sexps_forward (&state,
925 defun_start, defun_start_byte,
926 comment_end, TYPE_MINIMUM (EMACS_INT),
927 0, 0);
928 defun_start = comment_end;
929 if (!adjusted)
931 adjusted = true;
932 find_start_value
933 = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
934 : state.thislevelstart >= 0 ? state.thislevelstart
935 : find_start_value;
936 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
939 if (state.incomment == (comnested ? 1 : -1)
940 && state.comstyle == comstyle)
941 from = state.comstr_start;
942 else
944 from = comment_end;
945 if (state.incomment)
946 /* If comment_end is inside some other comment, maybe ours
947 is nested, so we need to try again from within the
948 surrounding comment. Example: { a (* " *) */
950 /* FIXME: We should advance by one or two chars. */
951 defun_start = state.comstr_start + 2;
952 defun_start_byte = CHAR_TO_BYTE (defun_start);
955 rarely_quit (++quit_count);
957 while (defun_start < comment_end);
959 from_byte = CHAR_TO_BYTE (from);
960 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
963 done:
964 *charpos_ptr = from;
965 *bytepos_ptr = from_byte;
967 return from != comment_end;
970 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
971 doc: /* Return t if OBJECT is a syntax table.
972 Currently, any char-table counts as a syntax table. */)
973 (Lisp_Object object)
975 if (CHAR_TABLE_P (object)
976 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
977 return Qt;
978 return Qnil;
981 static void
982 check_syntax_table (Lisp_Object obj)
984 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
985 Qsyntax_table_p, obj);
988 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
989 doc: /* Return the current syntax table.
990 This is the one specified by the current buffer. */)
991 (void)
993 return BVAR (current_buffer, syntax_table);
996 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
997 Sstandard_syntax_table, 0, 0, 0,
998 doc: /* Return the standard syntax table.
999 This is the one used for new buffers. */)
1000 (void)
1002 return Vstandard_syntax_table;
1005 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1006 doc: /* Construct a new syntax table and return it.
1007 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1008 (Lisp_Object table)
1010 Lisp_Object copy;
1012 if (!NILP (table))
1013 check_syntax_table (table);
1014 else
1015 table = Vstandard_syntax_table;
1017 copy = Fcopy_sequence (table);
1019 /* Only the standard syntax table should have a default element.
1020 Other syntax tables should inherit from parents instead. */
1021 set_char_table_defalt (copy, Qnil);
1023 /* Copied syntax tables should all have parents.
1024 If we copied one with no parent, such as the standard syntax table,
1025 use the standard syntax table as the copy's parent. */
1026 if (NILP (XCHAR_TABLE (copy)->parent))
1027 Fset_char_table_parent (copy, Vstandard_syntax_table);
1028 return copy;
1031 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1032 doc: /* Select a new syntax table for the current buffer.
1033 One argument, a syntax table. */)
1034 (Lisp_Object table)
1036 int idx;
1037 check_syntax_table (table);
1038 bset_syntax_table (current_buffer, table);
1039 /* Indicate that this buffer now has a specified syntax table. */
1040 idx = PER_BUFFER_VAR_IDX (syntax_table);
1041 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1042 return table;
1045 /* Convert a letter which signifies a syntax code
1046 into the code it signifies.
1047 This is used by modify-syntax-entry, and other things. */
1049 unsigned char const syntax_spec_code[0400] =
1050 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1051 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1052 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1053 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1054 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1055 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1056 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1057 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1058 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1059 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1060 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1061 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1062 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1063 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1064 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1065 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1068 /* Indexed by syntax code, give the letter that describes it. */
1070 char const syntax_code_spec[16] =
1072 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1073 '!', '|'
1076 /* Indexed by syntax code, give the object (cons of syntax code and
1077 nil) to be stored in syntax table. Since these objects can be
1078 shared among syntax tables, we generate them in advance. By
1079 sharing objects, the function `describe-syntax' can give a more
1080 compact listing. */
1081 static Lisp_Object Vsyntax_code_object;
1084 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1085 doc: /* Return the syntax code of CHARACTER, described by a character.
1086 For example, if CHARACTER is a word constituent, the
1087 character `w' (119) is returned.
1088 The characters that correspond to various syntax codes
1089 are listed in the documentation of `modify-syntax-entry'. */)
1090 (Lisp_Object character)
1092 int char_int;
1093 CHECK_CHARACTER (character);
1094 char_int = XINT (character);
1095 SETUP_BUFFER_SYNTAX_TABLE ();
1096 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1099 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1100 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1101 (Lisp_Object character)
1103 int char_int;
1104 enum syntaxcode code;
1105 CHECK_CHARACTER (character);
1106 char_int = XINT (character);
1107 SETUP_BUFFER_SYNTAX_TABLE ();
1108 code = SYNTAX (char_int);
1109 if (code == Sopen || code == Sclose)
1110 return SYNTAX_MATCH (char_int);
1111 return Qnil;
1114 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1115 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1116 STRING should be a string of the form allowed as argument of
1117 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1118 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1119 the value of a `syntax-table' text property. */)
1120 (Lisp_Object string)
1122 const unsigned char *p;
1123 int val;
1124 Lisp_Object match;
1126 CHECK_STRING (string);
1128 p = SDATA (string);
1129 val = syntax_spec_code[*p++];
1130 if (val == 0377)
1131 error ("Invalid syntax description letter: %c", p[-1]);
1133 if (val == Sinherit)
1134 return Qnil;
1136 if (*p)
1138 int len;
1139 int character = STRING_CHAR_AND_LENGTH (p, len);
1140 XSETINT (match, character);
1141 if (XFASTINT (match) == ' ')
1142 match = Qnil;
1143 p += len;
1145 else
1146 match = Qnil;
1148 while (*p)
1149 switch (*p++)
1151 case '1':
1152 val |= 1 << 16;
1153 break;
1155 case '2':
1156 val |= 1 << 17;
1157 break;
1159 case '3':
1160 val |= 1 << 18;
1161 break;
1163 case '4':
1164 val |= 1 << 19;
1165 break;
1167 case 'p':
1168 val |= 1 << 20;
1169 break;
1171 case 'b':
1172 val |= 1 << 21;
1173 break;
1175 case 'n':
1176 val |= 1 << 22;
1177 break;
1179 case 'c':
1180 val |= 1 << 23;
1181 break;
1184 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1185 return AREF (Vsyntax_code_object, val);
1186 else
1187 /* Since we can't use a shared object, let's make a new one. */
1188 return Fcons (make_number (val), match);
1191 /* I really don't know why this is interactive
1192 help-form should at least be made useful whilst reading the second arg. */
1193 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1194 "cSet syntax for character: \nsSet syntax for %s to: ",
1195 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1196 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1197 the current buffer's syntax table.
1198 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1199 in the range MIN to MAX are changed.
1200 The first character of NEWENTRY should be one of the following:
1201 Space or - whitespace syntax. w word constituent.
1202 _ symbol constituent. . punctuation.
1203 ( open-parenthesis. ) close-parenthesis.
1204 " string quote. \\ escape.
1205 $ paired delimiter. \\=' expression quote or prefix operator.
1206 < comment starter. > comment ender.
1207 / character-quote. @ inherit from parent table.
1208 | generic string fence. ! generic comment fence.
1210 Only single-character comment start and end sequences are represented thus.
1211 Two-character sequences are represented as described below.
1212 The second character of NEWENTRY is the matching parenthesis,
1213 used only if the first character is `(' or `)'.
1214 Any additional characters are flags.
1215 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1216 1 means CHAR is the start of a two-char comment start sequence.
1217 2 means CHAR is the second character of such a sequence.
1218 3 means CHAR is the start of a two-char comment end sequence.
1219 4 means CHAR is the second character of such a sequence.
1221 There can be several orthogonal comment sequences. This is to support
1222 language modes such as C++. By default, all comment sequences are of style
1223 a, but you can set the comment sequence style to b (on the second character
1224 of a comment-start, and the first character of a comment-end sequence) and/or
1225 c (on any of its chars) using this flag:
1226 b means CHAR is part of comment sequence b.
1227 c means CHAR is part of comment sequence c.
1228 n means CHAR is part of a nestable comment sequence.
1230 p means CHAR is a prefix character for `backward-prefix-chars';
1231 such characters are treated as whitespace when they occur
1232 between expressions.
1233 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1234 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1236 if (CONSP (c))
1238 CHECK_CHARACTER_CAR (c);
1239 CHECK_CHARACTER_CDR (c);
1241 else
1242 CHECK_CHARACTER (c);
1244 if (NILP (syntax_table))
1245 syntax_table = BVAR (current_buffer, syntax_table);
1246 else
1247 check_syntax_table (syntax_table);
1249 newentry = Fstring_to_syntax (newentry);
1250 if (CONSP (c))
1251 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1252 else
1253 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1255 /* We clear the regexp cache, since character classes can now have
1256 different values from those in the compiled regexps.*/
1257 clear_regexp_cache ();
1259 return Qnil;
1262 /* Dump syntax table to buffer in human-readable format */
1264 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1265 Sinternal_describe_syntax_value, 1, 1, 0,
1266 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1267 (Lisp_Object syntax)
1269 int code, syntax_code;
1270 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1271 char str[2];
1272 Lisp_Object first, match_lisp, value = syntax;
1274 if (NILP (value))
1276 insert_string ("default");
1277 return syntax;
1280 if (CHAR_TABLE_P (value))
1282 insert_string ("deeper char-table ...");
1283 return syntax;
1286 if (!CONSP (value))
1288 insert_string ("invalid");
1289 return syntax;
1292 first = XCAR (value);
1293 match_lisp = XCDR (value);
1295 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1297 insert_string ("invalid");
1298 return syntax;
1301 syntax_code = XINT (first) & INT_MAX;
1302 code = syntax_code & 0377;
1303 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1304 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1305 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1306 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1307 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1308 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1309 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1310 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1312 if (Smax <= code)
1314 insert_string ("invalid");
1315 return syntax;
1318 str[0] = syntax_code_spec[code], str[1] = 0;
1319 insert (str, 1);
1321 if (NILP (match_lisp))
1322 insert (" ", 1);
1323 else
1324 insert_char (XINT (match_lisp));
1326 if (start1)
1327 insert ("1", 1);
1328 if (start2)
1329 insert ("2", 1);
1331 if (end1)
1332 insert ("3", 1);
1333 if (end2)
1334 insert ("4", 1);
1336 if (prefix)
1337 insert ("p", 1);
1338 if (comstyleb)
1339 insert ("b", 1);
1340 if (comstylec)
1341 insert ("c", 1);
1342 if (comnested)
1343 insert ("n", 1);
1345 insert_string ("\twhich means: ");
1347 switch (code)
1349 case Swhitespace:
1350 insert_string ("whitespace"); break;
1351 case Spunct:
1352 insert_string ("punctuation"); break;
1353 case Sword:
1354 insert_string ("word"); break;
1355 case Ssymbol:
1356 insert_string ("symbol"); break;
1357 case Sopen:
1358 insert_string ("open"); break;
1359 case Sclose:
1360 insert_string ("close"); break;
1361 case Squote:
1362 insert_string ("prefix"); break;
1363 case Sstring:
1364 insert_string ("string"); break;
1365 case Smath:
1366 insert_string ("math"); break;
1367 case Sescape:
1368 insert_string ("escape"); break;
1369 case Scharquote:
1370 insert_string ("charquote"); break;
1371 case Scomment:
1372 insert_string ("comment"); break;
1373 case Sendcomment:
1374 insert_string ("endcomment"); break;
1375 case Sinherit:
1376 insert_string ("inherit"); break;
1377 case Scomment_fence:
1378 insert_string ("comment fence"); break;
1379 case Sstring_fence:
1380 insert_string ("string fence"); break;
1381 default:
1382 insert_string ("invalid");
1383 return syntax;
1386 if (!NILP (match_lisp))
1388 insert_string (", matches ");
1389 insert_char (XINT (match_lisp));
1392 if (start1)
1393 insert_string (",\n\t is the first character of a comment-start sequence");
1394 if (start2)
1395 insert_string (",\n\t is the second character of a comment-start sequence");
1397 if (end1)
1398 insert_string (",\n\t is the first character of a comment-end sequence");
1399 if (end2)
1400 insert_string (",\n\t is the second character of a comment-end sequence");
1401 if (comstyleb)
1402 insert_string (" (comment style b)");
1403 if (comstylec)
1404 insert_string (" (comment style c)");
1405 if (comnested)
1406 insert_string (" (nestable)");
1408 if (prefix)
1410 AUTO_STRING (prefixdoc,
1411 ",\n\t is a prefix character for `backward-prefix-chars'");
1412 insert1 (Fsubstitute_command_keys (prefixdoc));
1415 return syntax;
1418 /* Return the position across COUNT words from FROM.
1419 If that many words cannot be found before the end of the buffer, return 0.
1420 COUNT negative means scan backward and stop at word beginning. */
1422 ptrdiff_t
1423 scan_words (ptrdiff_t from, EMACS_INT count)
1425 ptrdiff_t beg = BEGV;
1426 ptrdiff_t end = ZV;
1427 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1428 enum syntaxcode code;
1429 int ch0, ch1;
1430 Lisp_Object func, pos;
1432 SETUP_SYNTAX_TABLE (from, count);
1434 while (count > 0)
1436 while (true)
1438 if (from == end)
1439 return 0;
1440 UPDATE_SYNTAX_TABLE_FORWARD (from);
1441 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1442 code = SYNTAX (ch0);
1443 INC_BOTH (from, from_byte);
1444 if (words_include_escapes
1445 && (code == Sescape || code == Scharquote))
1446 break;
1447 if (code == Sword)
1448 break;
1449 rarely_quit (from);
1451 /* Now CH0 is a character which begins a word and FROM is the
1452 position of the next character. */
1453 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1454 if (! NILP (Ffboundp (func)))
1456 pos = call2 (func, make_number (from - 1), make_number (end));
1457 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1459 from = XINT (pos);
1460 from_byte = CHAR_TO_BYTE (from);
1463 else
1465 while (1)
1467 if (from == end) break;
1468 UPDATE_SYNTAX_TABLE_FORWARD (from);
1469 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1470 code = SYNTAX (ch1);
1471 if ((code != Sword
1472 && (! words_include_escapes
1473 || (code != Sescape && code != Scharquote)))
1474 || word_boundary_p (ch0, ch1))
1475 break;
1476 INC_BOTH (from, from_byte);
1477 ch0 = ch1;
1478 rarely_quit (from);
1481 count--;
1483 while (count < 0)
1485 while (true)
1487 if (from == beg)
1488 return 0;
1489 DEC_BOTH (from, from_byte);
1490 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1491 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1492 code = SYNTAX (ch1);
1493 if (words_include_escapes
1494 && (code == Sescape || code == Scharquote))
1495 break;
1496 if (code == Sword)
1497 break;
1498 rarely_quit (from);
1500 /* Now CH1 is a character which ends a word and FROM is the
1501 position of it. */
1502 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1503 if (! NILP (Ffboundp (func)))
1505 pos = call2 (func, make_number (from), make_number (beg));
1506 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1508 from = XINT (pos);
1509 from_byte = CHAR_TO_BYTE (from);
1512 else
1514 while (1)
1516 if (from == beg)
1517 break;
1518 DEC_BOTH (from, from_byte);
1519 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1520 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1521 code = SYNTAX (ch0);
1522 if ((code != Sword
1523 && (! words_include_escapes
1524 || (code != Sescape && code != Scharquote)))
1525 || word_boundary_p (ch0, ch1))
1527 INC_BOTH (from, from_byte);
1528 break;
1530 ch1 = ch0;
1531 rarely_quit (from);
1534 count++;
1537 return from;
1540 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1541 doc: /* Move point forward ARG words (backward if ARG is negative).
1542 If ARG is omitted or nil, move point forward one word.
1543 Normally returns t.
1544 If an edge of the buffer or a field boundary is reached, point is
1545 left there and the function returns nil. Field boundaries are not
1546 noticed if `inhibit-field-text-motion' is non-nil.
1548 The word boundaries are normally determined by the buffer's syntax
1549 table, but `find-word-boundary-function-table', such as set up
1550 by `subword-mode', can change that. If a Lisp program needs to
1551 move by words determined strictly by the syntax table, it should
1552 use `forward-word-strictly' instead. */)
1553 (Lisp_Object arg)
1555 Lisp_Object tmp;
1556 ptrdiff_t orig_val, val;
1558 if (NILP (arg))
1559 XSETFASTINT (arg, 1);
1560 else
1561 CHECK_NUMBER (arg);
1563 val = orig_val = scan_words (PT, XINT (arg));
1564 if (! orig_val)
1565 val = XINT (arg) > 0 ? ZV : BEGV;
1567 /* Avoid jumping out of an input field. */
1568 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1569 Qnil, Qnil, Qnil);
1570 val = XFASTINT (tmp);
1572 SET_PT (val);
1573 return val == orig_val ? Qt : Qnil;
1576 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1577 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1578 STRING is like the inside of a `[...]' in a regular expression
1579 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1580 (but not at the end of a range; quoting is never needed there).
1581 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1582 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1583 Char classes, e.g. `[:alpha:]', are supported.
1585 Returns the distance traveled, either zero or positive. */)
1586 (Lisp_Object string, Lisp_Object lim)
1588 return skip_chars (1, string, lim, 1);
1591 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1592 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1593 See `skip-chars-forward' for details.
1594 Returns the distance traveled, either zero or negative. */)
1595 (Lisp_Object string, Lisp_Object lim)
1597 return skip_chars (0, string, lim, 1);
1600 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1601 doc: /* Move point forward across chars in specified syntax classes.
1602 SYNTAX is a string of syntax code characters.
1603 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1604 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1605 This function returns the distance traveled, either zero or positive. */)
1606 (Lisp_Object syntax, Lisp_Object lim)
1608 return skip_syntaxes (1, syntax, lim);
1611 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1612 doc: /* Move point backward across chars in specified syntax classes.
1613 SYNTAX is a string of syntax code characters.
1614 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1615 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1616 This function returns either zero or a negative number, and the absolute value
1617 of this is the distance traveled. */)
1618 (Lisp_Object syntax, Lisp_Object lim)
1620 return skip_syntaxes (0, syntax, lim);
1623 static Lisp_Object
1624 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1625 bool handle_iso_classes)
1627 int c;
1628 char fastmap[0400];
1629 /* Store the ranges of non-ASCII characters. */
1630 int *char_ranges UNINIT;
1631 int n_char_ranges = 0;
1632 bool negate = 0;
1633 ptrdiff_t i, i_byte;
1634 /* True if the current buffer is multibyte and the region contains
1635 non-ASCII chars. */
1636 bool multibyte;
1637 /* True if STRING is multibyte and it contains non-ASCII chars. */
1638 bool string_multibyte;
1639 ptrdiff_t size_byte;
1640 const unsigned char *str;
1641 int len;
1642 Lisp_Object iso_classes;
1643 USE_SAFE_ALLOCA;
1645 CHECK_STRING (string);
1646 iso_classes = Qnil;
1648 if (NILP (lim))
1649 XSETINT (lim, forwardp ? ZV : BEGV);
1650 else
1651 CHECK_NUMBER_COERCE_MARKER (lim);
1653 /* In any case, don't allow scan outside bounds of buffer. */
1654 if (XINT (lim) > ZV)
1655 XSETFASTINT (lim, ZV);
1656 if (XINT (lim) < BEGV)
1657 XSETFASTINT (lim, BEGV);
1659 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1660 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1661 string_multibyte = SBYTES (string) > SCHARS (string);
1663 memset (fastmap, 0, sizeof fastmap);
1665 str = SDATA (string);
1666 size_byte = SBYTES (string);
1668 i_byte = 0;
1669 if (i_byte < size_byte
1670 && SREF (string, 0) == '^')
1672 negate = 1; i_byte++;
1675 /* Find the characters specified and set their elements of fastmap.
1676 Handle backslashes and ranges specially.
1678 If STRING contains non-ASCII characters, setup char_ranges for
1679 them and use fastmap only for their leading codes. */
1681 if (! string_multibyte)
1683 bool string_has_eight_bit = 0;
1685 /* At first setup fastmap. */
1686 while (i_byte < size_byte)
1688 if (handle_iso_classes)
1690 const unsigned char *ch = str + i_byte;
1691 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1692 if (cc == 0)
1693 error ("Invalid ISO C character class");
1694 if (cc != -1)
1696 iso_classes = Fcons (make_number (cc), iso_classes);
1697 i_byte = ch - str;
1698 continue;
1702 c = str[i_byte++];
1704 if (c == '\\')
1706 if (i_byte == size_byte)
1707 break;
1709 c = str[i_byte++];
1711 /* Treat `-' as range character only if another character
1712 follows. */
1713 if (i_byte + 1 < size_byte
1714 && str[i_byte] == '-')
1716 int c2;
1718 /* Skip over the dash. */
1719 i_byte++;
1721 /* Get the end of the range. */
1722 c2 = str[i_byte++];
1723 if (c2 == '\\'
1724 && i_byte < size_byte)
1725 c2 = str[i_byte++];
1727 if (c <= c2)
1729 int lim2 = c2 + 1;
1730 while (c < lim2)
1731 fastmap[c++] = 1;
1732 if (! ASCII_CHAR_P (c2))
1733 string_has_eight_bit = 1;
1736 else
1738 fastmap[c] = 1;
1739 if (! ASCII_CHAR_P (c))
1740 string_has_eight_bit = 1;
1744 /* If the current range is multibyte and STRING contains
1745 eight-bit chars, arrange fastmap and setup char_ranges for
1746 the corresponding multibyte chars. */
1747 if (multibyte && string_has_eight_bit)
1749 char *p1;
1750 char himap[0200 + 1];
1751 memcpy (himap, fastmap + 0200, 0200);
1752 himap[0200] = 0;
1753 memset (fastmap + 0200, 0, 0200);
1754 SAFE_NALLOCA (char_ranges, 2, 128);
1755 i = 0;
1757 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1759 /* Deduce the next range C..C2 from the next clump of 1s
1760 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1761 order half of the old FASTMAP. */
1762 int c2, leading_code;
1763 i = p1 - himap;
1764 c = BYTE8_TO_CHAR (i + 0200);
1765 i += strlen (p1);
1766 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1768 char_ranges[n_char_ranges++] = c;
1769 char_ranges[n_char_ranges++] = c2;
1770 leading_code = CHAR_LEADING_CODE (c);
1771 memset (fastmap + leading_code, 1,
1772 CHAR_LEADING_CODE (c2) - leading_code + 1);
1776 else /* STRING is multibyte */
1778 SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1780 while (i_byte < size_byte)
1782 int leading_code = str[i_byte];
1784 if (handle_iso_classes)
1786 const unsigned char *ch = str + i_byte;
1787 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1788 if (cc == 0)
1789 error ("Invalid ISO C character class");
1790 if (cc != -1)
1792 iso_classes = Fcons (make_number (cc), iso_classes);
1793 i_byte = ch - str;
1794 continue;
1798 if (leading_code== '\\')
1800 if (++i_byte == size_byte)
1801 break;
1803 leading_code = str[i_byte];
1805 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1806 i_byte += len;
1809 /* Treat `-' as range character only if another character
1810 follows. */
1811 if (i_byte + 1 < size_byte
1812 && str[i_byte] == '-')
1814 int c2, leading_code2;
1816 /* Skip over the dash. */
1817 i_byte++;
1819 /* Get the end of the range. */
1820 leading_code2 = str[i_byte];
1821 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1822 i_byte += len;
1824 if (c2 == '\\'
1825 && i_byte < size_byte)
1827 leading_code2 = str[i_byte];
1828 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1829 i_byte += len;
1832 if (c > c2)
1833 continue;
1834 if (ASCII_CHAR_P (c))
1836 while (c <= c2 && c < 0x80)
1837 fastmap[c++] = 1;
1838 leading_code = CHAR_LEADING_CODE (c);
1840 if (! ASCII_CHAR_P (c))
1842 int lim2 = leading_code2 + 1;
1843 while (leading_code < lim2)
1844 fastmap[leading_code++] = 1;
1845 if (c <= c2)
1847 char_ranges[n_char_ranges++] = c;
1848 char_ranges[n_char_ranges++] = c2;
1852 else
1854 if (ASCII_CHAR_P (c))
1855 fastmap[c] = 1;
1856 else
1858 fastmap[leading_code] = 1;
1859 char_ranges[n_char_ranges++] = c;
1860 char_ranges[n_char_ranges++] = c;
1865 /* If the current range is unibyte and STRING contains non-ASCII
1866 chars, arrange fastmap for the corresponding unibyte
1867 chars. */
1869 if (! multibyte && n_char_ranges > 0)
1871 memset (fastmap + 0200, 0, 0200);
1872 for (i = 0; i < n_char_ranges; i += 2)
1874 int c1 = char_ranges[i];
1875 int lim2 = char_ranges[i + 1] + 1;
1877 for (; c1 < lim2; c1++)
1879 int b = CHAR_TO_BYTE_SAFE (c1);
1880 if (b >= 0)
1881 fastmap[b] = 1;
1887 /* If ^ was the first character, complement the fastmap. */
1888 if (negate)
1890 if (! multibyte)
1891 for (i = 0; i < sizeof fastmap; i++)
1892 fastmap[i] ^= 1;
1893 else
1895 for (i = 0; i < 0200; i++)
1896 fastmap[i] ^= 1;
1897 /* All non-ASCII chars possibly match. */
1898 for (; i < sizeof fastmap; i++)
1899 fastmap[i] = 1;
1904 ptrdiff_t start_point = PT;
1905 ptrdiff_t pos = PT;
1906 ptrdiff_t pos_byte = PT_BYTE;
1907 unsigned char *p = PT_ADDR, *endp, *stop;
1909 if (forwardp)
1911 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1912 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1914 else
1916 endp = CHAR_POS_ADDR (XINT (lim));
1917 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1920 /* This code may look up syntax tables using functions that rely on the
1921 gl_state object. To make sure this object is not out of date,
1922 let's initialize it manually.
1923 We ignore syntax-table text-properties for now, since that's
1924 what we've done in the past. */
1925 SETUP_BUFFER_SYNTAX_TABLE ();
1926 if (forwardp)
1928 if (multibyte)
1929 while (1)
1931 int nbytes;
1933 if (p >= stop)
1935 if (p >= endp)
1936 break;
1937 p = GAP_END_ADDR;
1938 stop = endp;
1940 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1941 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1943 if (negate)
1944 break;
1945 else
1946 goto fwd_ok;
1949 if (! fastmap[*p])
1950 break;
1951 if (! ASCII_CHAR_P (c))
1953 /* As we are looking at a multibyte character, we
1954 must look up the character in the table
1955 CHAR_RANGES. If there's no data in the table,
1956 that character is not what we want to skip. */
1958 /* The following code do the right thing even if
1959 n_char_ranges is zero (i.e. no data in
1960 CHAR_RANGES). */
1961 for (i = 0; i < n_char_ranges; i += 2)
1962 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1963 break;
1964 if (!(negate ^ (i < n_char_ranges)))
1965 break;
1967 fwd_ok:
1968 p += nbytes, pos++, pos_byte += nbytes;
1969 rarely_quit (pos);
1971 else
1972 while (true)
1974 if (p >= stop)
1976 if (p >= endp)
1977 break;
1978 p = GAP_END_ADDR;
1979 stop = endp;
1982 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1984 if (negate)
1985 break;
1986 else
1987 goto fwd_unibyte_ok;
1990 if (!fastmap[*p])
1991 break;
1992 fwd_unibyte_ok:
1993 p++, pos++, pos_byte++;
1994 rarely_quit (pos);
1997 else
1999 if (multibyte)
2000 while (true)
2002 if (p <= stop)
2004 if (p <= endp)
2005 break;
2006 p = GPT_ADDR;
2007 stop = endp;
2009 unsigned char *prev_p = p;
2011 p--;
2012 while (stop <= p && ! CHAR_HEAD_P (*p));
2014 c = STRING_CHAR (p);
2016 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2018 if (negate)
2019 break;
2020 else
2021 goto back_ok;
2024 if (! fastmap[*p])
2025 break;
2026 if (! ASCII_CHAR_P (c))
2028 /* See the comment in the previous similar code. */
2029 for (i = 0; i < n_char_ranges; i += 2)
2030 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2031 break;
2032 if (!(negate ^ (i < n_char_ranges)))
2033 break;
2035 back_ok:
2036 pos--, pos_byte -= prev_p - p;
2037 rarely_quit (pos);
2039 else
2040 while (true)
2042 if (p <= stop)
2044 if (p <= endp)
2045 break;
2046 p = GPT_ADDR;
2047 stop = endp;
2050 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2052 if (negate)
2053 break;
2054 else
2055 goto back_unibyte_ok;
2058 if (!fastmap[p[-1]])
2059 break;
2060 back_unibyte_ok:
2061 p--, pos--, pos_byte--;
2062 rarely_quit (pos);
2066 SET_PT_BOTH (pos, pos_byte);
2068 SAFE_FREE ();
2069 return make_number (PT - start_point);
2074 static Lisp_Object
2075 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2077 int c;
2078 unsigned char fastmap[0400];
2079 bool negate = 0;
2080 ptrdiff_t i, i_byte;
2081 bool multibyte;
2082 ptrdiff_t size_byte;
2083 unsigned char *str;
2085 CHECK_STRING (string);
2087 if (NILP (lim))
2088 XSETINT (lim, forwardp ? ZV : BEGV);
2089 else
2090 CHECK_NUMBER_COERCE_MARKER (lim);
2092 /* In any case, don't allow scan outside bounds of buffer. */
2093 if (XINT (lim) > ZV)
2094 XSETFASTINT (lim, ZV);
2095 if (XINT (lim) < BEGV)
2096 XSETFASTINT (lim, BEGV);
2098 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2099 return make_number (0);
2101 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2102 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2104 memset (fastmap, 0, sizeof fastmap);
2106 if (SBYTES (string) > SCHARS (string))
2107 /* As this is very rare case (syntax spec is ASCII only), don't
2108 consider efficiency. */
2109 string = string_make_unibyte (string);
2111 str = SDATA (string);
2112 size_byte = SBYTES (string);
2114 i_byte = 0;
2115 if (i_byte < size_byte
2116 && SREF (string, 0) == '^')
2118 negate = 1; i_byte++;
2121 /* Find the syntaxes specified and set their elements of fastmap. */
2123 while (i_byte < size_byte)
2125 c = str[i_byte++];
2126 fastmap[syntax_spec_code[c]] = 1;
2129 /* If ^ was the first character, complement the fastmap. */
2130 if (negate)
2131 for (i = 0; i < sizeof fastmap; i++)
2132 fastmap[i] ^= 1;
2135 ptrdiff_t start_point = PT;
2136 ptrdiff_t pos = PT;
2137 ptrdiff_t pos_byte = PT_BYTE;
2138 unsigned char *p, *endp, *stop;
2140 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2142 if (forwardp)
2144 while (true)
2146 p = BYTE_POS_ADDR (pos_byte);
2147 endp = XINT (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2148 stop = pos < GPT && GPT < XINT (lim) ? GPT_ADDR : endp;
2152 int nbytes;
2154 if (p >= stop)
2156 if (p >= endp)
2157 goto done;
2158 p = GAP_END_ADDR;
2159 stop = endp;
2161 if (multibyte)
2162 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2163 else
2164 c = *p, nbytes = 1;
2165 if (! fastmap[SYNTAX (c)])
2166 goto done;
2167 p += nbytes, pos++, pos_byte += nbytes;
2168 rarely_quit (pos);
2170 while (!parse_sexp_lookup_properties
2171 || pos < gl_state.e_property);
2173 update_syntax_table_forward (pos + gl_state.offset,
2174 false, gl_state.object);
2177 else
2179 p = BYTE_POS_ADDR (pos_byte);
2180 endp = CHAR_POS_ADDR (XINT (lim));
2181 stop = pos >= GPT && GPT > XINT (lim) ? GAP_END_ADDR : endp;
2183 if (multibyte)
2185 while (true)
2187 if (p <= stop)
2189 if (p <= endp)
2190 break;
2191 p = GPT_ADDR;
2192 stop = endp;
2194 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2196 unsigned char *prev_p = p;
2198 p--;
2199 while (stop <= p && ! CHAR_HEAD_P (*p));
2201 c = STRING_CHAR (p);
2202 if (! fastmap[SYNTAX (c)])
2203 break;
2204 pos--, pos_byte -= prev_p - p;
2205 rarely_quit (pos);
2208 else
2210 while (true)
2212 if (p <= stop)
2214 if (p <= endp)
2215 break;
2216 p = GPT_ADDR;
2217 stop = endp;
2219 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2220 if (! fastmap[SYNTAX (p[-1])])
2221 break;
2222 p--, pos--, pos_byte--;
2223 rarely_quit (pos);
2228 done:
2229 SET_PT_BOTH (pos, pos_byte);
2231 return make_number (PT - start_point);
2235 /* Return true if character C belongs to one of the ISO classes
2236 in the list ISO_CLASSES. Each class is represented by an
2237 integer which is its type according to re_wctype. */
2239 static bool
2240 in_classes (int c, Lisp_Object iso_classes)
2242 bool fits_class = 0;
2244 while (CONSP (iso_classes))
2246 Lisp_Object elt;
2247 elt = XCAR (iso_classes);
2248 iso_classes = XCDR (iso_classes);
2250 if (re_iswctype (c, XFASTINT (elt)))
2251 fits_class = 1;
2254 return fits_class;
2257 /* Jump over a comment, assuming we are at the beginning of one.
2258 FROM is the current position.
2259 FROM_BYTE is the bytepos corresponding to FROM.
2260 Do not move past STOP (a charpos).
2261 The comment over which we have to jump is of style STYLE
2262 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2263 NESTING should be positive to indicate the nesting at the beginning
2264 for nested comments and should be zero or negative else.
2265 ST_COMMENT_STYLE cannot be nested.
2266 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2267 (or 0 If the search cannot start in the middle of a two-character).
2269 If successful, return true and store the charpos of the comment's
2270 end into *CHARPOS_PTR and the corresponding bytepos into
2271 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2272 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2273 current nesting (as defined for state->incomment) in
2274 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2275 comment be a possible first character of a two character construct,
2276 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2277 we store Smax into *last_syntax_ptr.
2279 The comment end is the last character of the comment rather than the
2280 character just after the comment.
2282 Global syntax data is assumed to initially be valid for FROM and
2283 remains valid for forward search starting at the returned position. */
2285 static bool
2286 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2287 EMACS_INT nesting, int style, int prev_syntax,
2288 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2289 EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2291 unsigned short int quit_count = 0;
2292 int c, c1;
2293 enum syntaxcode code;
2294 int syntax, other_syntax;
2296 if (nesting <= 0) nesting = -1;
2298 /* Enter the loop in the middle so that we find
2299 a 2-char comment ender if we start in the middle of it. */
2300 syntax = prev_syntax;
2301 code = syntax & 0xff;
2302 if (syntax != 0 && from < stop) goto forw_incomment;
2304 while (1)
2306 if (from == stop)
2308 *incomment_ptr = nesting;
2309 *charpos_ptr = from;
2310 *bytepos_ptr = from_byte;
2311 *last_syntax_ptr =
2312 (code == Sescape || code == Scharquote
2313 || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2314 || (nesting > 0
2315 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2316 ? syntax : Smax ;
2317 return 0;
2319 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2320 syntax = SYNTAX_WITH_FLAGS (c);
2321 code = syntax & 0xff;
2322 if (code == Sendcomment
2323 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2324 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2325 (nesting > 0 && --nesting == 0) : nesting < 0)
2326 && !(Vcomment_end_can_be_escaped && char_quoted (from, from_byte)))
2327 /* We have encountered a comment end of the same style
2328 as the comment sequence which began this comment
2329 section. */
2330 break;
2331 if (code == Scomment_fence
2332 && style == ST_COMMENT_STYLE)
2333 /* We have encountered a comment end of the same style
2334 as the comment sequence which began this comment
2335 section. */
2336 break;
2337 if (nesting > 0
2338 && code == Scomment
2339 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2340 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2341 /* We have encountered a nested comment of the same style
2342 as the comment sequence which began this comment section. */
2343 nesting++;
2344 INC_BOTH (from, from_byte);
2345 UPDATE_SYNTAX_TABLE_FORWARD (from);
2347 forw_incomment:
2348 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2349 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2350 other_syntax = SYNTAX_WITH_FLAGS (c1),
2351 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2352 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2353 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2354 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2355 ? nesting > 0 : nesting < 0))
2357 syntax = Smax; /* So that "|#" (lisp) can not return
2358 the syntax of "#" in *last_syntax_ptr. */
2359 if (--nesting <= 0)
2360 /* We have encountered a comment end of the same style
2361 as the comment sequence which began this comment section. */
2362 break;
2363 else
2365 INC_BOTH (from, from_byte);
2366 UPDATE_SYNTAX_TABLE_FORWARD (from);
2369 if (nesting > 0
2370 && from < stop
2371 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2372 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2373 other_syntax = SYNTAX_WITH_FLAGS (c1),
2374 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2375 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2376 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2377 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2378 /* We have encountered a nested comment of the same style
2379 as the comment sequence which began this comment section. */
2381 syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2382 INC_BOTH (from, from_byte);
2383 UPDATE_SYNTAX_TABLE_FORWARD (from);
2384 nesting++;
2387 rarely_quit (++quit_count);
2389 *charpos_ptr = from;
2390 *bytepos_ptr = from_byte;
2391 *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2392 used up. */
2393 return 1;
2396 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2397 doc: /*
2398 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2399 Stop scanning if we find something other than a comment or whitespace.
2400 Set point to where scanning stops.
2401 If COUNT comments are found as expected, with nothing except whitespace
2402 between them, return t; otherwise return nil. */)
2403 (Lisp_Object count)
2405 ptrdiff_t from, from_byte, stop;
2406 int c, c1;
2407 enum syntaxcode code;
2408 int comstyle = 0; /* style of comment encountered */
2409 bool comnested = 0; /* whether the comment is nestable or not */
2410 bool found;
2411 EMACS_INT count1;
2412 ptrdiff_t out_charpos, out_bytepos;
2413 EMACS_INT dummy;
2414 int dummy2;
2415 unsigned short int quit_count = 0;
2417 CHECK_NUMBER (count);
2418 count1 = XINT (count);
2419 stop = count1 > 0 ? ZV : BEGV;
2421 from = PT;
2422 from_byte = PT_BYTE;
2424 SETUP_SYNTAX_TABLE (from, count1);
2425 while (count1 > 0)
2429 bool comstart_first;
2430 int syntax, other_syntax;
2432 if (from == stop)
2434 SET_PT_BOTH (from, from_byte);
2435 return Qnil;
2437 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2438 syntax = SYNTAX_WITH_FLAGS (c);
2439 code = SYNTAX (c);
2440 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2441 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2442 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2443 INC_BOTH (from, from_byte);
2444 UPDATE_SYNTAX_TABLE_FORWARD (from);
2445 if (from < stop && comstart_first
2446 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2447 other_syntax = SYNTAX_WITH_FLAGS (c1),
2448 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2450 /* We have encountered a comment start sequence and we
2451 are ignoring all text inside comments. We must record
2452 the comment style this sequence begins so that later,
2453 only a comment end of the same style actually ends
2454 the comment section. */
2455 code = Scomment;
2456 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2457 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2458 INC_BOTH (from, from_byte);
2459 UPDATE_SYNTAX_TABLE_FORWARD (from);
2461 rarely_quit (++quit_count);
2463 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2465 if (code == Scomment_fence)
2466 comstyle = ST_COMMENT_STYLE;
2467 else if (code != Scomment)
2469 DEC_BOTH (from, from_byte);
2470 SET_PT_BOTH (from, from_byte);
2471 return Qnil;
2473 /* We're at the start of a comment. */
2474 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2475 &out_charpos, &out_bytepos, &dummy, &dummy2);
2476 from = out_charpos; from_byte = out_bytepos;
2477 if (!found)
2479 SET_PT_BOTH (from, from_byte);
2480 return Qnil;
2482 INC_BOTH (from, from_byte);
2483 UPDATE_SYNTAX_TABLE_FORWARD (from);
2484 /* We have skipped one comment. */
2485 count1--;
2488 while (count1 < 0)
2490 while (true)
2492 if (from <= stop)
2494 SET_PT_BOTH (BEGV, BEGV_BYTE);
2495 return Qnil;
2498 DEC_BOTH (from, from_byte);
2499 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2500 bool quoted = char_quoted (from, from_byte);
2501 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2502 int syntax = SYNTAX_WITH_FLAGS (c);
2503 code = SYNTAX (c);
2504 comstyle = 0;
2505 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2506 if (code == Sendcomment)
2507 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2508 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2509 && prev_char_comend_first (from, from_byte)
2510 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2512 int other_syntax;
2513 /* We must record the comment style encountered so that
2514 later, we can match only the proper comment begin
2515 sequence of the same style. */
2516 DEC_BOTH (from, from_byte);
2517 code = Sendcomment;
2518 /* Calling char_quoted, above, set up global syntax position
2519 at the new value of FROM. */
2520 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2521 other_syntax = SYNTAX_WITH_FLAGS (c1);
2522 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2523 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2526 if (code == Scomment_fence)
2528 /* Skip until first preceding unquoted comment_fence. */
2529 bool fence_found = 0;
2530 ptrdiff_t ini = from, ini_byte = from_byte;
2532 while (1)
2534 DEC_BOTH (from, from_byte);
2535 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2536 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2537 if (SYNTAX (c) == Scomment_fence
2538 && !char_quoted (from, from_byte))
2540 fence_found = 1;
2541 break;
2543 else if (from == stop)
2544 break;
2545 rarely_quit (++quit_count);
2547 if (fence_found == 0)
2549 from = ini; /* Set point to ini + 1. */
2550 from_byte = ini_byte;
2551 goto leave;
2553 else
2554 /* We have skipped one comment. */
2555 break;
2557 else if (code == Sendcomment)
2559 found = back_comment (from, from_byte, stop, comnested, comstyle,
2560 &out_charpos, &out_bytepos);
2561 if (!found)
2563 if (c == '\n')
2564 /* This end-of-line is not an end-of-comment.
2565 Treat it like a whitespace.
2566 CC-mode (and maybe others) relies on this behavior. */
2568 else
2570 /* Failure: we should go back to the end of this
2571 not-quite-endcomment. */
2572 if (SYNTAX (c) != code)
2573 /* It was a two-char Sendcomment. */
2574 INC_BOTH (from, from_byte);
2575 goto leave;
2578 else
2580 /* We have skipped one comment. */
2581 from = out_charpos, from_byte = out_bytepos;
2582 break;
2585 else if (code != Swhitespace || quoted)
2587 leave:
2588 INC_BOTH (from, from_byte);
2589 SET_PT_BOTH (from, from_byte);
2590 return Qnil;
2593 rarely_quit (++quit_count);
2596 count1++;
2599 SET_PT_BOTH (from, from_byte);
2600 return Qt;
2603 /* Return syntax code of character C if C is an ASCII character
2604 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2606 static enum syntaxcode
2607 syntax_multibyte (int c, bool multibyte_symbol_p)
2609 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2612 static Lisp_Object
2613 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2615 Lisp_Object val;
2616 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2617 int c, c1;
2618 int stringterm;
2619 bool quoted;
2620 bool mathexit = 0;
2621 enum syntaxcode code;
2622 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2623 int comstyle = 0; /* Style of comment encountered. */
2624 bool comnested = 0; /* Whether the comment is nestable or not. */
2625 ptrdiff_t temp_pos;
2626 EMACS_INT last_good = from;
2627 bool found;
2628 ptrdiff_t from_byte;
2629 ptrdiff_t out_bytepos, out_charpos;
2630 EMACS_INT dummy;
2631 int dummy2;
2632 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2633 unsigned short int quit_count = 0;
2635 if (depth > 0) min_depth = 0;
2637 if (from > ZV) from = ZV;
2638 if (from < BEGV) from = BEGV;
2640 from_byte = CHAR_TO_BYTE (from);
2642 maybe_quit ();
2644 SETUP_SYNTAX_TABLE (from, count);
2645 while (count > 0)
2647 while (from < stop)
2649 rarely_quit (++quit_count);
2650 bool comstart_first, prefix;
2651 int syntax, other_syntax;
2652 UPDATE_SYNTAX_TABLE_FORWARD (from);
2653 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2654 syntax = SYNTAX_WITH_FLAGS (c);
2655 code = syntax_multibyte (c, multibyte_symbol_p);
2656 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2657 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2658 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2659 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2660 if (depth == min_depth)
2661 last_good = from;
2662 INC_BOTH (from, from_byte);
2663 UPDATE_SYNTAX_TABLE_FORWARD (from);
2664 if (from < stop && comstart_first
2665 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2666 other_syntax = SYNTAX_WITH_FLAGS (c),
2667 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2668 && parse_sexp_ignore_comments)
2670 /* We have encountered a comment start sequence and we
2671 are ignoring all text inside comments. We must record
2672 the comment style this sequence begins so that later,
2673 only a comment end of the same style actually ends
2674 the comment section. */
2675 code = Scomment;
2676 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2677 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2678 INC_BOTH (from, from_byte);
2679 UPDATE_SYNTAX_TABLE_FORWARD (from);
2682 if (prefix)
2683 continue;
2685 switch (code)
2687 case Sescape:
2688 case Scharquote:
2689 if (from == stop)
2690 goto lose;
2691 INC_BOTH (from, from_byte);
2692 /* Treat following character as a word constituent. */
2693 case Sword:
2694 case Ssymbol:
2695 if (depth || !sexpflag) break;
2696 /* This word counts as a sexp; return at end of it. */
2697 while (from < stop)
2699 UPDATE_SYNTAX_TABLE_FORWARD (from);
2701 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2702 switch (syntax_multibyte (c, multibyte_symbol_p))
2704 case Scharquote:
2705 case Sescape:
2706 INC_BOTH (from, from_byte);
2707 if (from == stop)
2708 goto lose;
2709 break;
2710 case Sword:
2711 case Ssymbol:
2712 case Squote:
2713 break;
2714 default:
2715 goto done;
2717 INC_BOTH (from, from_byte);
2718 rarely_quit (++quit_count);
2720 goto done;
2722 case Scomment_fence:
2723 comstyle = ST_COMMENT_STYLE;
2724 /* FALLTHROUGH */
2725 case Scomment:
2726 if (!parse_sexp_ignore_comments) break;
2727 UPDATE_SYNTAX_TABLE_FORWARD (from);
2728 found = forw_comment (from, from_byte, stop,
2729 comnested, comstyle, 0,
2730 &out_charpos, &out_bytepos, &dummy,
2731 &dummy2);
2732 from = out_charpos, from_byte = out_bytepos;
2733 if (!found)
2735 if (depth == 0)
2736 goto done;
2737 goto lose;
2739 INC_BOTH (from, from_byte);
2740 UPDATE_SYNTAX_TABLE_FORWARD (from);
2741 break;
2743 case Smath:
2744 if (!sexpflag)
2745 break;
2746 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2748 INC_BOTH (from, from_byte);
2750 if (mathexit)
2752 mathexit = 0;
2753 goto close1;
2755 mathexit = 1;
2757 case Sopen:
2758 if (!++depth) goto done;
2759 break;
2761 case Sclose:
2762 close1:
2763 if (!--depth) goto done;
2764 if (depth < min_depth)
2765 xsignal3 (Qscan_error,
2766 build_string ("Containing expression ends prematurely"),
2767 make_number (last_good), make_number (from));
2768 break;
2770 case Sstring:
2771 case Sstring_fence:
2772 temp_pos = dec_bytepos (from_byte);
2773 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2774 while (1)
2776 enum syntaxcode c_code;
2777 if (from >= stop)
2778 goto lose;
2779 UPDATE_SYNTAX_TABLE_FORWARD (from);
2780 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2781 c_code = syntax_multibyte (c, multibyte_symbol_p);
2782 if (code == Sstring
2783 ? c == stringterm && c_code == Sstring
2784 : c_code == Sstring_fence)
2785 break;
2787 if (c_code == Scharquote || c_code == Sescape)
2788 INC_BOTH (from, from_byte);
2789 INC_BOTH (from, from_byte);
2790 rarely_quit (++quit_count);
2792 INC_BOTH (from, from_byte);
2793 if (!depth && sexpflag) goto done;
2794 break;
2795 default:
2796 /* Ignore whitespace, punctuation, quote, endcomment. */
2797 break;
2801 /* Reached end of buffer. Error if within object, return nil if between */
2802 if (depth)
2803 goto lose;
2805 return Qnil;
2807 /* End of object reached */
2808 done:
2809 count--;
2812 while (count < 0)
2814 while (from > stop)
2816 rarely_quit (++quit_count);
2817 DEC_BOTH (from, from_byte);
2818 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2819 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2820 int syntax = SYNTAX_WITH_FLAGS (c);
2821 code = syntax_multibyte (c, multibyte_symbol_p);
2822 if (depth == min_depth)
2823 last_good = from;
2824 comstyle = 0;
2825 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2826 if (code == Sendcomment)
2827 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2828 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2829 && prev_char_comend_first (from, from_byte)
2830 && parse_sexp_ignore_comments)
2832 /* We must record the comment style encountered so that
2833 later, we can match only the proper comment begin
2834 sequence of the same style. */
2835 int c2, other_syntax;
2836 DEC_BOTH (from, from_byte);
2837 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2838 code = Sendcomment;
2839 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2840 other_syntax = SYNTAX_WITH_FLAGS (c2);
2841 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2842 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2845 /* Quoting turns anything except a comment-ender
2846 into a word character. Note that this cannot be true
2847 if we decremented FROM in the if-statement above. */
2848 if (code != Sendcomment && char_quoted (from, from_byte))
2850 DEC_BOTH (from, from_byte);
2851 code = Sword;
2853 else if (SYNTAX_FLAGS_PREFIX (syntax))
2854 continue;
2856 switch (code)
2858 case Sword:
2859 case Ssymbol:
2860 case Sescape:
2861 case Scharquote:
2862 if (depth || !sexpflag) break;
2863 /* This word counts as a sexp; count object finished
2864 after passing it. */
2865 while (from > stop)
2867 temp_pos = from_byte;
2868 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2869 DEC_POS (temp_pos);
2870 else
2871 temp_pos--;
2872 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2873 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2874 /* Don't allow comment-end to be quoted. */
2875 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2876 goto done2;
2877 quoted = char_quoted (from - 1, temp_pos);
2878 if (quoted)
2880 DEC_BOTH (from, from_byte);
2881 temp_pos = dec_bytepos (temp_pos);
2882 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2884 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2885 if (! quoted)
2886 switch (syntax_multibyte (c1, multibyte_symbol_p))
2888 case Sword: case Ssymbol: case Squote: break;
2889 default: goto done2;
2891 DEC_BOTH (from, from_byte);
2892 rarely_quit (++quit_count);
2894 goto done2;
2896 case Smath:
2897 if (!sexpflag)
2898 break;
2899 if (from > BEGV)
2901 temp_pos = dec_bytepos (from_byte);
2902 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2903 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2904 DEC_BOTH (from, from_byte);
2906 if (mathexit)
2908 mathexit = 0;
2909 goto open2;
2911 mathexit = 1;
2913 case Sclose:
2914 if (!++depth) goto done2;
2915 break;
2917 case Sopen:
2918 open2:
2919 if (!--depth) goto done2;
2920 if (depth < min_depth)
2921 xsignal3 (Qscan_error,
2922 build_string ("Containing expression ends prematurely"),
2923 make_number (last_good), make_number (from));
2924 break;
2926 case Sendcomment:
2927 if (!parse_sexp_ignore_comments)
2928 break;
2929 found = back_comment (from, from_byte, stop, comnested, comstyle,
2930 &out_charpos, &out_bytepos);
2931 /* FIXME: if !found, it really wasn't a comment-end.
2932 For single-char Sendcomment, we can't do much about it apart
2933 from skipping the char.
2934 For 2-char endcomments, we could try again, taking both
2935 chars as separate entities, but it's a lot of trouble
2936 for very little gain, so we don't bother either. -sm */
2937 if (found)
2938 from = out_charpos, from_byte = out_bytepos;
2939 break;
2941 case Scomment_fence:
2942 case Sstring_fence:
2943 while (1)
2945 if (from == stop)
2946 goto lose;
2947 DEC_BOTH (from, from_byte);
2948 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2949 if (!char_quoted (from, from_byte))
2951 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2952 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2953 break;
2955 rarely_quit (++quit_count);
2957 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2958 break;
2960 case Sstring:
2961 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2962 while (true)
2964 if (from == stop)
2965 goto lose;
2966 DEC_BOTH (from, from_byte);
2967 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2968 if (!char_quoted (from, from_byte))
2970 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2971 if (c == stringterm
2972 && (syntax_multibyte (c, multibyte_symbol_p)
2973 == Sstring))
2974 break;
2976 rarely_quit (++quit_count);
2978 if (!depth && sexpflag) goto done2;
2979 break;
2980 default:
2981 /* Ignore whitespace, punctuation, quote, endcomment. */
2982 break;
2986 /* Reached start of buffer. Error if within object, return nil if between */
2987 if (depth)
2988 goto lose;
2990 return Qnil;
2992 done2:
2993 count++;
2997 XSETFASTINT (val, from);
2998 return val;
3000 lose:
3001 xsignal3 (Qscan_error,
3002 build_string ("Unbalanced parentheses"),
3003 make_number (last_good), make_number (from));
3006 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3007 doc: /* Scan from character number FROM by COUNT lists.
3008 Scan forward if COUNT is positive, backward if COUNT is negative.
3009 Return the character number of the position thus found.
3011 A \"list", in this context, refers to a balanced parenthetical
3012 grouping, as determined by the syntax table.
3014 If DEPTH is nonzero, treat that as the nesting depth of the starting
3015 point (i.e. the starting point is DEPTH parentheses deep). This
3016 function scans over parentheses until the depth goes to zero COUNT
3017 times. Hence, positive DEPTH moves out that number of levels of
3018 parentheses, while negative DEPTH moves to a deeper level.
3020 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3022 If we reach the beginning or end of the accessible part of the buffer
3023 before we have scanned over COUNT lists, return nil if the depth at
3024 that point is zero, and signal a error if the depth is nonzero. */)
3025 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3027 CHECK_NUMBER (from);
3028 CHECK_NUMBER (count);
3029 CHECK_NUMBER (depth);
3031 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
3034 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3035 doc: /* Scan from character number FROM by COUNT balanced expressions.
3036 If COUNT is negative, scan backwards.
3037 Returns the character number of the position thus found.
3039 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3041 If the beginning or end of (the accessible part of) the buffer is reached
3042 in the middle of a parenthetical grouping, an error is signaled.
3043 If the beginning or end is reached between groupings
3044 but before count is used up, nil is returned. */)
3045 (Lisp_Object from, Lisp_Object count)
3047 CHECK_NUMBER (from);
3048 CHECK_NUMBER (count);
3050 return scan_lists (XINT (from), XINT (count), 0, 1);
3053 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3054 0, 0, 0,
3055 doc: /* Move point backward over any number of chars with prefix syntax.
3056 This includes chars with expression prefix syntax class (\\=') and those with
3057 the prefix syntax flag (p). */)
3058 (void)
3060 ptrdiff_t beg = BEGV;
3061 ptrdiff_t opoint = PT;
3062 ptrdiff_t opoint_byte = PT_BYTE;
3063 ptrdiff_t pos = PT;
3064 ptrdiff_t pos_byte = PT_BYTE;
3065 int c;
3067 if (pos <= beg)
3069 SET_PT_BOTH (opoint, opoint_byte);
3071 return Qnil;
3074 SETUP_SYNTAX_TABLE (pos, -1);
3076 DEC_BOTH (pos, pos_byte);
3078 while (!char_quoted (pos, pos_byte)
3079 /* Previous statement updates syntax table. */
3080 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3081 || syntax_prefix_flag_p (c)))
3083 opoint = pos;
3084 opoint_byte = pos_byte;
3086 if (pos <= beg)
3087 break;
3088 DEC_BOTH (pos, pos_byte);
3089 rarely_quit (pos);
3092 SET_PT_BOTH (opoint, opoint_byte);
3094 return Qnil;
3098 /* If the character at FROM_BYTE is the second part of a 2-character
3099 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3100 true. */
3101 static bool
3102 in_2char_comment_start (struct lisp_parse_state *state,
3103 int prev_from_syntax,
3104 ptrdiff_t prev_from,
3105 ptrdiff_t from_byte)
3107 int c1, syntax;
3108 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3109 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
3110 syntax = SYNTAX_WITH_FLAGS (c1),
3111 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3113 /* Record the comment style we have entered so that only
3114 the comment-end sequence of the same style actually
3115 terminates the comment section. */
3116 state->comstyle
3117 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3118 bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3119 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3120 state->incomment = comnested ? 1 : -1;
3121 state->comstr_start = prev_from;
3122 return true;
3124 return false;
3127 /* Parse forward from FROM / FROM_BYTE to END,
3128 assuming that FROM has state STATE,
3129 and return a description of the state of the parse at END.
3130 If STOPBEFORE, stop at the start of an atom.
3131 If COMMENTSTOP is 1, stop at the start of a comment.
3132 If COMMENTSTOP is -1, stop at the start or end of a comment,
3133 after the beginning of a string, or after the end of a string. */
3135 static void
3136 scan_sexps_forward (struct lisp_parse_state *state,
3137 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3138 EMACS_INT targetdepth, bool stopbefore,
3139 int commentstop)
3141 enum syntaxcode code;
3142 struct level { ptrdiff_t last, prev; };
3143 struct level levelstart[100];
3144 struct level *curlevel = levelstart;
3145 struct level *endlevel = levelstart + 100;
3146 EMACS_INT depth; /* Paren depth of current scanning location.
3147 level - levelstart equals this except
3148 when the depth becomes negative. */
3149 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3150 bool start_quoted = 0; /* True means starting after a char quote. */
3151 Lisp_Object tem;
3152 ptrdiff_t prev_from; /* Keep one character before FROM. */
3153 ptrdiff_t prev_from_byte;
3154 int prev_from_syntax, prev_prev_from_syntax;
3155 bool boundary_stop = commentstop == -1;
3156 bool nofence;
3157 bool found;
3158 ptrdiff_t out_bytepos, out_charpos;
3159 int temp;
3160 unsigned short int quit_count = 0;
3162 prev_from = from;
3163 prev_from_byte = from_byte;
3164 if (from != BEGV)
3165 DEC_BOTH (prev_from, prev_from_byte);
3167 /* Use this macro instead of `from++'. */
3168 #define INC_FROM \
3169 do { prev_from = from; \
3170 prev_from_byte = from_byte; \
3171 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3172 prev_prev_from_syntax = prev_from_syntax; \
3173 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3174 INC_BOTH (from, from_byte); \
3175 if (from < end) \
3176 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3177 } while (0)
3179 maybe_quit ();
3181 depth = state->depth;
3182 start_quoted = state->quoted;
3183 prev_prev_from_syntax = Smax;
3184 prev_from_syntax = state->prev_syntax;
3186 tem = state->levelstarts;
3187 while (!NILP (tem)) /* >= second enclosing sexps. */
3189 Lisp_Object temhd = Fcar (tem);
3190 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3191 curlevel->last = XINT (temhd);
3192 if (++curlevel == endlevel)
3193 curlevel--; /* error ("Nesting too deep for parser"); */
3194 curlevel->prev = -1;
3195 curlevel->last = -1;
3196 tem = Fcdr (tem);
3198 curlevel->prev = -1;
3199 curlevel->last = -1;
3201 state->quoted = 0;
3202 mindepth = depth;
3204 SETUP_SYNTAX_TABLE (from, 1);
3206 /* Enter the loop at a place appropriate for initial state. */
3208 if (state->incomment)
3209 goto startincomment;
3210 if (state->instring >= 0)
3212 nofence = state->instring != ST_STRING_STYLE;
3213 if (start_quoted)
3214 goto startquotedinstring;
3215 goto startinstring;
3217 else if (start_quoted)
3218 goto startquoted;
3219 else if ((from < end)
3220 && (in_2char_comment_start (state, prev_from_syntax,
3221 prev_from, from_byte)))
3223 INC_FROM;
3224 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3225 goto atcomment;
3228 while (from < end)
3230 rarely_quit (++quit_count);
3231 INC_FROM;
3233 if ((from < end)
3234 && (in_2char_comment_start (state, prev_from_syntax,
3235 prev_from, from_byte)))
3237 INC_FROM;
3238 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3239 goto atcomment;
3242 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3243 continue;
3244 code = prev_from_syntax & 0xff;
3245 switch (code)
3247 case Sescape:
3248 case Scharquote:
3249 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3250 curlevel->last = prev_from;
3251 startquoted:
3252 if (from == end) goto endquoted;
3253 INC_FROM;
3254 goto symstarted;
3255 /* treat following character as a word constituent */
3256 case Sword:
3257 case Ssymbol:
3258 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3259 curlevel->last = prev_from;
3260 symstarted:
3261 while (from < end)
3263 if (in_2char_comment_start (state, prev_from_syntax,
3264 prev_from, from_byte))
3266 INC_FROM;
3267 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3268 goto atcomment;
3271 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3272 switch (SYNTAX (symchar))
3274 case Scharquote:
3275 case Sescape:
3276 INC_FROM;
3277 if (from == end) goto endquoted;
3278 break;
3279 case Sword:
3280 case Ssymbol:
3281 case Squote:
3282 break;
3283 default:
3284 goto symdone;
3286 INC_FROM;
3287 rarely_quit (++quit_count);
3289 symdone:
3290 curlevel->prev = curlevel->last;
3291 break;
3293 case Scomment_fence:
3294 /* Record the comment style we have entered so that only
3295 the comment-end sequence of the same style actually
3296 terminates the comment section. */
3297 state->comstyle = ST_COMMENT_STYLE;
3298 state->incomment = -1;
3299 state->comstr_start = prev_from;
3300 goto atcomment;
3301 case Scomment:
3302 state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3303 state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3304 1 : -1);
3305 state->comstr_start = prev_from;
3306 atcomment:
3307 if (commentstop || boundary_stop) goto done;
3308 startincomment:
3309 /* The (from == BEGV) test was to enter the loop in the middle so
3310 that we find a 2-char comment ender even if we start in the
3311 middle of it. We don't want to do that if we're just at the
3312 beginning of the comment (think of (*) ... (*)). */
3313 found = forw_comment (from, from_byte, end,
3314 state->incomment, state->comstyle,
3315 from == BEGV ? 0 : prev_from_syntax,
3316 &out_charpos, &out_bytepos, &state->incomment,
3317 &prev_from_syntax);
3318 from = out_charpos; from_byte = out_bytepos;
3319 /* Beware! prev_from and friends (except prev_from_syntax)
3320 are invalid now. Luckily, the `done' doesn't use them
3321 and the INC_FROM sets them to a sane value without
3322 looking at them. */
3323 if (!found) goto done;
3324 INC_FROM;
3325 state->incomment = 0;
3326 state->comstyle = 0; /* reset the comment style */
3327 prev_from_syntax = Smax; /* For the comment closer */
3328 if (boundary_stop) goto done;
3329 break;
3331 case Sopen:
3332 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3333 depth++;
3334 /* curlevel++->last ran into compiler bug on Apollo */
3335 curlevel->last = prev_from;
3336 if (++curlevel == endlevel)
3337 curlevel--; /* error ("Nesting too deep for parser"); */
3338 curlevel->prev = -1;
3339 curlevel->last = -1;
3340 if (targetdepth == depth) goto done;
3341 break;
3343 case Sclose:
3344 depth--;
3345 if (depth < mindepth)
3346 mindepth = depth;
3347 if (curlevel != levelstart)
3348 curlevel--;
3349 curlevel->prev = curlevel->last;
3350 if (targetdepth == depth) goto done;
3351 break;
3353 case Sstring:
3354 case Sstring_fence:
3355 state->comstr_start = from - 1;
3356 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3357 curlevel->last = prev_from;
3358 state->instring = (code == Sstring
3359 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3360 : ST_STRING_STYLE);
3361 if (boundary_stop) goto done;
3362 startinstring:
3364 nofence = state->instring != ST_STRING_STYLE;
3366 while (1)
3368 int c;
3369 enum syntaxcode c_code;
3371 if (from >= end) goto done;
3372 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3373 c_code = SYNTAX (c);
3375 /* Check C_CODE here so that if the char has
3376 a syntax-table property which says it is NOT
3377 a string character, it does not end the string. */
3378 if (nofence && c == state->instring && c_code == Sstring)
3379 break;
3381 switch (c_code)
3383 case Sstring_fence:
3384 if (!nofence) goto string_end;
3385 break;
3387 case Scharquote:
3388 case Sescape:
3389 INC_FROM;
3390 startquotedinstring:
3391 if (from >= end) goto endquoted;
3392 break;
3394 default:
3395 break;
3397 INC_FROM;
3398 rarely_quit (++quit_count);
3401 string_end:
3402 state->instring = -1;
3403 curlevel->prev = curlevel->last;
3404 INC_FROM;
3405 if (boundary_stop) goto done;
3406 break;
3408 case Smath:
3409 /* FIXME: We should do something with it. */
3410 break;
3411 default:
3412 /* Ignore whitespace, punctuation, quote, endcomment. */
3413 break;
3416 goto done;
3418 stop: /* Here if stopping before start of sexp. */
3419 from = prev_from; /* We have just fetched the char that starts it; */
3420 from_byte = prev_from_byte;
3421 prev_from_syntax = prev_prev_from_syntax;
3422 goto done; /* but return the position before it. */
3424 endquoted:
3425 state->quoted = 1;
3426 done:
3427 state->depth = depth;
3428 state->mindepth = mindepth;
3429 state->thislevelstart = curlevel->prev;
3430 state->prevlevelstart
3431 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3432 state->location = from;
3433 state->location_byte = from_byte;
3434 state->levelstarts = Qnil;
3435 while (curlevel > levelstart)
3436 state->levelstarts = Fcons (make_number ((--curlevel)->last),
3437 state->levelstarts);
3438 state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3439 || state->quoted) ? prev_from_syntax : Smax;
3442 /* Convert a (lisp) parse state to the internal form used in
3443 scan_sexps_forward. */
3444 static void
3445 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3447 Lisp_Object tem;
3449 if (NILP (external))
3451 state->depth = 0;
3452 state->instring = -1;
3453 state->incomment = 0;
3454 state->quoted = 0;
3455 state->comstyle = 0; /* comment style a by default. */
3456 state->comstr_start = -1; /* no comment/string seen. */
3457 state->levelstarts = Qnil;
3458 state->prev_syntax = Smax;
3460 else
3462 tem = Fcar (external);
3463 if (!NILP (tem))
3464 state->depth = XINT (tem);
3465 else
3466 state->depth = 0;
3468 external = Fcdr (external);
3469 external = Fcdr (external);
3470 external = Fcdr (external);
3471 tem = Fcar (external);
3472 /* Check whether we are inside string_fence-style string: */
3473 state->instring = (!NILP (tem)
3474 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3475 : -1);
3477 external = Fcdr (external);
3478 tem = Fcar (external);
3479 state->incomment = (!NILP (tem)
3480 ? (INTEGERP (tem) ? XINT (tem) : -1)
3481 : 0);
3483 external = Fcdr (external);
3484 tem = Fcar (external);
3485 state->quoted = !NILP (tem);
3487 /* if the eighth element of the list is nil, we are in comment
3488 style a. If it is non-nil, we are in comment style b */
3489 external = Fcdr (external);
3490 external = Fcdr (external);
3491 tem = Fcar (external);
3492 state->comstyle = (NILP (tem)
3494 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3495 ? XINT (tem)
3496 : ST_COMMENT_STYLE));
3498 external = Fcdr (external);
3499 tem = Fcar (external);
3500 state->comstr_start =
3501 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3502 external = Fcdr (external);
3503 tem = Fcar (external);
3504 state->levelstarts = tem;
3506 external = Fcdr (external);
3507 tem = Fcar (external);
3508 state->prev_syntax = NILP (tem) ? Smax : XINT (tem);
3512 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3513 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3514 Parsing stops at TO or when certain criteria are met;
3515 point is set to where parsing stops.
3516 If fifth arg OLDSTATE is omitted or nil,
3517 parsing assumes that FROM is the beginning of a function.
3519 Value is a list of elements describing final state of parsing:
3520 0. depth in parens.
3521 1. character address of start of innermost containing list; nil if none.
3522 2. character address of start of last complete sexp terminated.
3523 3. non-nil if inside a string.
3524 (it is the character that will terminate the string,
3525 or t if the string should be terminated by a generic string delimiter.)
3526 4. nil if outside a comment, t if inside a non-nestable comment,
3527 else an integer (the current comment nesting).
3528 5. t if following a quote character.
3529 6. the minimum paren-depth encountered during this scan.
3530 7. style of comment, if any.
3531 8. character address of start of comment or string; nil if not in one.
3532 9. List of positions of currently open parens, outermost first.
3533 10. When the last position scanned holds the first character of a
3534 (potential) two character construct, the syntax of that position,
3535 otherwise nil. That construct can be a two character comment
3536 delimiter or an Escaped or Char-quoted character.
3537 11..... Possible further internal information used by `parse-partial-sexp'.
3539 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3540 in parentheses becomes equal to TARGETDEPTH.
3541 Fourth arg STOPBEFORE non-nil means stop when we come to
3542 any character that starts a sexp.
3543 Fifth arg OLDSTATE is a list like what this function returns.
3544 It is used to initialize the state of the parse. Elements number 1, 2, 6
3545 are ignored.
3546 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3547 If it is the symbol `syntax-table', stop after the start of a comment or a
3548 string, or after end of a comment or a string. */)
3549 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3550 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3552 struct lisp_parse_state state;
3553 EMACS_INT target;
3555 if (!NILP (targetdepth))
3557 CHECK_NUMBER (targetdepth);
3558 target = XINT (targetdepth);
3560 else
3561 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth. */
3563 validate_region (&from, &to);
3564 internalize_parse_state (oldstate, &state);
3565 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3566 XINT (to),
3567 target, !NILP (stopbefore),
3568 (NILP (commentstop)
3569 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3571 SET_PT_BOTH (state.location, state.location_byte);
3573 return
3574 Fcons (make_number (state.depth),
3575 Fcons (state.prevlevelstart < 0
3576 ? Qnil : make_number (state.prevlevelstart),
3577 Fcons (state.thislevelstart < 0
3578 ? Qnil : make_number (state.thislevelstart),
3579 Fcons (state.instring >= 0
3580 ? (state.instring == ST_STRING_STYLE
3581 ? Qt : make_number (state.instring)) : Qnil,
3582 Fcons (state.incomment < 0 ? Qt :
3583 (state.incomment == 0 ? Qnil :
3584 make_number (state.incomment)),
3585 Fcons (state.quoted ? Qt : Qnil,
3586 Fcons (make_number (state.mindepth),
3587 Fcons ((state.comstyle
3588 ? (state.comstyle == ST_COMMENT_STYLE
3589 ? Qsyntax_table
3590 : make_number (state.comstyle))
3591 : Qnil),
3592 Fcons (((state.incomment
3593 || (state.instring >= 0))
3594 ? make_number (state.comstr_start)
3595 : Qnil),
3596 Fcons (state.levelstarts,
3597 Fcons (state.prev_syntax == Smax
3598 ? Qnil
3599 : make_number (state.prev_syntax),
3600 Qnil)))))))))));
3603 void
3604 init_syntax_once (void)
3606 register int i, c;
3607 Lisp_Object temp;
3609 /* This has to be done here, before we call Fmake_char_table. */
3610 DEFSYM (Qsyntax_table, "syntax-table");
3612 /* Create objects which can be shared among syntax tables. */
3613 Vsyntax_code_object = make_uninit_vector (Smax);
3614 for (i = 0; i < Smax; i++)
3615 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3617 /* Now we are ready to set up this property, so we can
3618 create syntax tables. */
3619 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3621 temp = AREF (Vsyntax_code_object, Swhitespace);
3623 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3625 /* Control characters should not be whitespace. */
3626 temp = AREF (Vsyntax_code_object, Spunct);
3627 for (i = 0; i <= ' ' - 1; i++)
3628 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3629 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3631 /* Except that a few really are whitespace. */
3632 temp = AREF (Vsyntax_code_object, Swhitespace);
3633 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3634 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3635 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3636 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3637 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3639 temp = AREF (Vsyntax_code_object, Sword);
3640 for (i = 'a'; i <= 'z'; i++)
3641 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3642 for (i = 'A'; i <= 'Z'; i++)
3643 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3644 for (i = '0'; i <= '9'; i++)
3645 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3647 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3648 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3650 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3651 Fcons (make_number (Sopen), make_number (')')));
3652 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3653 Fcons (make_number (Sclose), make_number ('(')));
3654 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3655 Fcons (make_number (Sopen), make_number (']')));
3656 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3657 Fcons (make_number (Sclose), make_number ('[')));
3658 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3659 Fcons (make_number (Sopen), make_number ('}')));
3660 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3661 Fcons (make_number (Sclose), make_number ('{')));
3662 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3663 Fcons (make_number (Sstring), Qnil));
3664 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3665 Fcons (make_number (Sescape), Qnil));
3667 temp = AREF (Vsyntax_code_object, Ssymbol);
3668 for (i = 0; i < 10; i++)
3670 c = "_-+*/&|<>="[i];
3671 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3674 temp = AREF (Vsyntax_code_object, Spunct);
3675 for (i = 0; i < 12; i++)
3677 c = ".,;:?!#@~^'`"[i];
3678 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3681 /* All multibyte characters have syntax `word' by default. */
3682 temp = AREF (Vsyntax_code_object, Sword);
3683 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3686 void
3687 syms_of_syntax (void)
3689 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3691 staticpro (&Vsyntax_code_object);
3693 staticpro (&gl_state.object);
3694 staticpro (&gl_state.global_code);
3695 staticpro (&gl_state.current_syntax_table);
3696 staticpro (&gl_state.old_prop);
3698 /* Defined in regex.c. */
3699 staticpro (&re_match_object);
3701 DEFSYM (Qscan_error, "scan-error");
3702 Fput (Qscan_error, Qerror_conditions,
3703 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3704 Fput (Qscan_error, Qerror_message,
3705 build_pure_c_string ("Scan error"));
3707 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3708 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3710 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3711 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3712 Otherwise, that text property is simply ignored.
3713 See the info node `(elisp)Syntax Properties' for a description of the
3714 `syntax-table' property. */);
3716 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3717 doc: /* Position up to which syntax-table properties have been set. */);
3718 syntax_propertize__done = -1;
3719 DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3720 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3722 words_include_escapes = 0;
3723 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3724 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3726 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3727 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3728 multibyte_syntax_as_symbol = 0;
3730 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3731 open_paren_in_column_0_is_defun_start,
3732 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3733 open_paren_in_column_0_is_defun_start = 1;
3736 DEFVAR_LISP ("find-word-boundary-function-table",
3737 Vfind_word_boundary_function_table,
3738 doc: /*
3739 Char table of functions to search for the word boundary.
3740 Each function is called with two arguments; POS and LIMIT.
3741 POS and LIMIT are character positions in the current buffer.
3743 If POS is less than LIMIT, POS is at the first character of a word,
3744 and the return value of a function should be a position after the
3745 last character of that word.
3747 If POS is not less than LIMIT, POS is at the last character of a word,
3748 and the return value of a function should be a position at the first
3749 character of that word.
3751 In both cases, LIMIT bounds the search. */);
3752 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3754 DEFVAR_BOOL ("comment-end-can-be-escaped", Vcomment_end_can_be_escaped,
3755 doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
3756 Vcomment_end_can_be_escaped = 0;
3757 DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
3758 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
3760 defsubr (&Ssyntax_table_p);
3761 defsubr (&Ssyntax_table);
3762 defsubr (&Sstandard_syntax_table);
3763 defsubr (&Scopy_syntax_table);
3764 defsubr (&Sset_syntax_table);
3765 defsubr (&Schar_syntax);
3766 defsubr (&Smatching_paren);
3767 defsubr (&Sstring_to_syntax);
3768 defsubr (&Smodify_syntax_entry);
3769 defsubr (&Sinternal_describe_syntax_value);
3771 defsubr (&Sforward_word);
3773 defsubr (&Sskip_chars_forward);
3774 defsubr (&Sskip_chars_backward);
3775 defsubr (&Sskip_syntax_forward);
3776 defsubr (&Sskip_syntax_backward);
3778 defsubr (&Sforward_comment);
3779 defsubr (&Sscan_lists);
3780 defsubr (&Sscan_sexps);
3781 defsubr (&Sbackward_prefix_chars);
3782 defsubr (&Sparse_partial_sexp);