* admin/release-process: Improve wording.
[emacs.git] / src / syntax.c
blobf939a76a2e64b76f6066bb86335d355532452190
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2015 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include <sys/types.h>
25 #include "lisp.h"
26 #include "character.h"
27 #include "buffer.h"
28 #include "regex.h"
29 #include "syntax.h"
30 #include "intervals.h"
31 #include "category.h"
33 /* Make syntax table lookup grant data in gl_state. */
34 #define SYNTAX(c) syntax_property (c, 1)
35 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
36 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
38 /* Eight single-bit flags have the following meanings:
39 1. This character is the first of a two-character comment-start sequence.
40 2. This character is the second of a two-character comment-start sequence.
41 3. This character is the first of a two-character comment-end sequence.
42 4. This character is the second of a two-character comment-end sequence.
43 5. This character is a prefix, for backward-prefix-chars.
44 6. The char is part of a delimiter for comments of style "b".
45 7. This character is part of a nestable comment sequence.
46 8. The char is part of a delimiter for comments of style "c".
47 Note that any two-character sequence whose first character has flag 1
48 and whose second character has flag 2 will be interpreted as a comment start.
50 Bits 6 and 8 discriminate among different comment styles.
51 Languages such as C++ allow two orthogonal syntax start/end pairs
52 and bit 6 determines whether a comment-end or Scommentend
53 ends style a or b. Comment markers can start style a, b, c, or bc.
54 Style a is always the default.
55 For 2-char comment markers, the style b flag is looked up only on the second
56 char of the comment marker and on the first char of the comment ender.
57 For style c (like the nested flag), the flag can be placed on any of
58 the chars. */
60 /* These functions extract specific flags from an integer
61 that holds the syntax code and the flags. */
63 static bool
64 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
66 return (flags >> 16) & 1;
68 static bool
69 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
71 return (flags >> 17) & 1;
73 static bool
74 SYNTAX_FLAGS_COMEND_FIRST (int flags)
76 return (flags >> 18) & 1;
78 static bool
79 SYNTAX_FLAGS_COMEND_SECOND (int flags)
81 return (flags >> 19) & 1;
83 static bool
84 SYNTAX_FLAGS_PREFIX (int flags)
86 return (flags >> 20) & 1;
88 static bool
89 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
91 return (flags >> 21) & 1;
93 static bool
94 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
96 return (flags >> 23) & 1;
98 static int
99 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
101 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
103 static bool
104 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
106 return (flags >> 22) & 1;
109 /* FLAGS should be the flags of the main char of the comment marker, e.g.
110 the second for comstart and the first for comend. */
111 static int
112 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
114 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
115 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
116 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
119 /* Extract a particular flag for a given character. */
121 static bool
122 SYNTAX_COMEND_FIRST (int c)
124 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
127 /* We use these constants in place for comment-style and
128 string-ender-char to distinguish comments/strings started by
129 comment_fence and string_fence codes. */
131 enum
133 ST_COMMENT_STYLE = 256 + 1,
134 ST_STRING_STYLE = 256 + 2
137 /* This is the internal form of the parse state used in parse-partial-sexp. */
139 struct lisp_parse_state
141 EMACS_INT depth; /* Depth at end of parsing. */
142 int instring; /* -1 if not within string, else desired terminator. */
143 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
144 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
145 bool quoted; /* True if just after an escape char at end of parsing. */
146 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
147 /* Char number of most recent start-of-expression at current level */
148 ptrdiff_t thislevelstart;
149 /* Char number of start of containing expression */
150 ptrdiff_t prevlevelstart;
151 ptrdiff_t location; /* Char number at which parsing stopped. */
152 ptrdiff_t location_byte; /* Corresponding byte position. */
153 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
154 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
155 of levels (starting from outermost). */
158 /* These variables are a cache for finding the start of a defun.
159 find_start_pos is the place for which the defun start was found.
160 find_start_value is the defun start position found for it.
161 find_start_value_byte is the corresponding byte position.
162 find_start_buffer is the buffer it was found in.
163 find_start_begv is the BEGV value when it was found.
164 find_start_modiff is the value of MODIFF when it was found. */
166 static ptrdiff_t find_start_pos;
167 static ptrdiff_t find_start_value;
168 static ptrdiff_t find_start_value_byte;
169 static struct buffer *find_start_buffer;
170 static ptrdiff_t find_start_begv;
171 static EMACS_INT find_start_modiff;
174 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
175 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
176 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
177 static void scan_sexps_forward (struct lisp_parse_state *,
178 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
179 bool, Lisp_Object, int);
180 static bool in_classes (int, Lisp_Object);
181 static void parse_sexp_propertize (ptrdiff_t charpos);
183 /* This setter is used only in this file, so it can be private. */
184 static void
185 bset_syntax_table (struct buffer *b, Lisp_Object val)
187 b->syntax_table_ = val;
190 /* Whether the syntax of the character C has the prefix flag set. */
191 bool
192 syntax_prefix_flag_p (int c)
194 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
197 struct gl_state_s gl_state; /* Global state of syntax parser. */
199 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
200 to scan to property-change. */
202 /* Set the syntax entry VAL for char C in table TABLE. */
204 static void
205 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
207 CHAR_TABLE_SET (table, c, val);
210 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
211 RANGE is a cons (FROM . TO) specifying the range of characters. */
213 static void
214 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
215 Lisp_Object val)
217 Fset_char_table_range (table, range, val);
220 /* Extract the information from the entry for character C
221 in the current syntax table. */
223 static Lisp_Object
224 SYNTAX_MATCH (int c)
226 Lisp_Object ent = SYNTAX_ENTRY (c);
227 return CONSP (ent) ? XCDR (ent) : Qnil;
230 /* This should be called with FROM at the start of forward
231 search, or after the last position of the backward search. It
232 makes sure that the first char is picked up with correct table, so
233 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
234 call.
235 Sign of COUNT gives the direction of the search.
238 static void
239 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
241 SETUP_BUFFER_SYNTAX_TABLE ();
242 gl_state.b_property = BEGV;
243 gl_state.e_property = ZV + 1;
244 gl_state.object = Qnil;
245 gl_state.offset = 0;
246 if (parse_sexp_lookup_properties)
248 if (count > 0)
249 update_syntax_table_forward (from, true, Qnil);
250 else if (from > BEGV)
252 update_syntax_table (from - 1, count, true, Qnil);
253 parse_sexp_propertize (from - 1);
258 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
259 If it is t (which is only used in fast_c_string_match_ignore_case),
260 ignore properties altogether.
262 This is meant for regex.c to use. For buffers, regex.c passes arguments
263 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
264 So if it is a buffer, we set the offset field to BEGV. */
266 void
267 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
268 ptrdiff_t from, ptrdiff_t count)
270 SETUP_BUFFER_SYNTAX_TABLE ();
271 gl_state.object = object;
272 if (BUFFERP (gl_state.object))
274 struct buffer *buf = XBUFFER (gl_state.object);
275 gl_state.b_property = 1;
276 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
277 gl_state.offset = BUF_BEGV (buf) - 1;
279 else if (NILP (gl_state.object))
281 gl_state.b_property = 1;
282 gl_state.e_property = ZV - BEGV + 1;
283 gl_state.offset = BEGV - 1;
285 else if (EQ (gl_state.object, Qt))
287 gl_state.b_property = 0;
288 gl_state.e_property = PTRDIFF_MAX;
289 gl_state.offset = 0;
291 else
293 gl_state.b_property = 0;
294 gl_state.e_property = 1 + SCHARS (gl_state.object);
295 gl_state.offset = 0;
297 if (parse_sexp_lookup_properties)
298 update_syntax_table (from + gl_state.offset - (count <= 0),
299 count, 1, gl_state.object);
302 /* Update gl_state to an appropriate interval which contains CHARPOS. The
303 sign of COUNT give the relative position of CHARPOS wrt the previously
304 valid interval. If INIT, only [be]_property fields of gl_state are
305 valid at start, the rest is filled basing on OBJECT.
307 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
308 direction than the intervals - or in an interval. We update the
309 current syntax-table basing on the property of this interval, and
310 update the interval to start further than CHARPOS - or be
311 NULL. We also update lim_property to be the next value of
312 charpos to call this subroutine again - or be before/after the
313 start/end of OBJECT. */
315 void
316 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
317 Lisp_Object object)
319 Lisp_Object tmp_table;
320 int cnt = 0;
321 bool invalidate = true;
322 INTERVAL i;
324 if (init)
326 gl_state.old_prop = Qnil;
327 gl_state.start = gl_state.b_property;
328 gl_state.stop = gl_state.e_property;
329 i = interval_of (charpos, object);
330 gl_state.backward_i = gl_state.forward_i = i;
331 invalidate = false;
332 if (!i)
333 return;
334 /* interval_of updates only ->position of the return value, so
335 update the parents manually to speed up update_interval. */
336 while (!NULL_PARENT (i))
338 if (AM_RIGHT_CHILD (i))
339 INTERVAL_PARENT (i)->position = i->position
340 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
341 - TOTAL_LENGTH (INTERVAL_PARENT (i))
342 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
343 else
344 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
345 + TOTAL_LENGTH (i);
346 i = INTERVAL_PARENT (i);
348 i = gl_state.forward_i;
349 gl_state.b_property = i->position - gl_state.offset;
350 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
351 goto update;
353 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
355 /* We are guaranteed to be called with CHARPOS either in i,
356 or further off. */
357 if (!i)
358 error ("Error in syntax_table logic for to-the-end intervals");
359 else if (charpos < i->position) /* Move left. */
361 if (count > 0)
362 error ("Error in syntax_table logic for intervals <-");
363 /* Update the interval. */
364 i = update_interval (i, charpos);
365 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
367 invalidate = false;
368 gl_state.forward_i = i;
369 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
372 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
374 if (count < 0)
375 error ("Error in syntax_table logic for intervals ->");
376 /* Update the interval. */
377 i = update_interval (i, charpos);
378 if (i->position != gl_state.e_property)
380 invalidate = false;
381 gl_state.backward_i = i;
382 gl_state.b_property = i->position - gl_state.offset;
386 update:
387 tmp_table = textget (i->plist, Qsyntax_table);
389 if (invalidate)
390 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
392 if (invalidate) /* Did not get to adjacent interval. */
393 { /* with the same table => */
394 /* invalidate the old range. */
395 if (count > 0)
397 gl_state.backward_i = i;
398 gl_state.b_property = i->position - gl_state.offset;
400 else
402 gl_state.forward_i = i;
403 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
407 if (!EQ (tmp_table, gl_state.old_prop))
409 gl_state.current_syntax_table = tmp_table;
410 gl_state.old_prop = tmp_table;
411 if (EQ (Fsyntax_table_p (tmp_table), Qt))
413 gl_state.use_global = 0;
415 else if (CONSP (tmp_table))
417 gl_state.use_global = 1;
418 gl_state.global_code = tmp_table;
420 else
422 gl_state.use_global = 0;
423 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
427 while (i)
429 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
431 if (count > 0)
433 gl_state.e_property = i->position - gl_state.offset;
434 gl_state.forward_i = i;
436 else
438 gl_state.b_property
439 = i->position + LENGTH (i) - gl_state.offset;
440 gl_state.backward_i = i;
442 return;
444 else if (cnt == INTERVALS_AT_ONCE)
446 if (count > 0)
448 gl_state.e_property
449 = i->position + LENGTH (i) - gl_state.offset
450 /* e_property at EOB is not set to ZV but to ZV+1, so that
451 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
452 having to check eob between the two. */
453 + (next_interval (i) ? 0 : 1);
454 gl_state.forward_i = i;
456 else
458 gl_state.b_property = i->position - gl_state.offset;
459 gl_state.backward_i = i;
461 return;
463 cnt++;
464 i = count > 0 ? next_interval (i) : previous_interval (i);
466 eassert (i == NULL); /* This property goes to the end. */
467 if (count > 0)
469 gl_state.e_property = gl_state.stop;
470 gl_state.forward_i = i;
472 else
473 gl_state.b_property = gl_state.start;
476 static void
477 parse_sexp_propertize (ptrdiff_t charpos)
479 EMACS_INT zv = ZV;
480 if (syntax_propertize__done <= charpos
481 && syntax_propertize__done < zv)
483 EMACS_INT modiffs = CHARS_MODIFF;
484 safe_call1 (Qinternal__syntax_propertize,
485 make_number (min (zv, 1 + charpos)));
486 if (modiffs != CHARS_MODIFF)
487 error ("parse-sexp-propertize-function modified the buffer!");
488 if (syntax_propertize__done <= charpos
489 && syntax_propertize__done < zv)
490 error ("parse-sexp-propertize-function did not move"
491 " syntax-propertize--done");
492 SETUP_SYNTAX_TABLE (charpos, 1);
494 else if (gl_state.e_property > syntax_propertize__done)
496 gl_state.e_property = syntax_propertize__done;
497 gl_state.e_property_truncated = true;
499 else if (gl_state.e_property_truncated
500 && gl_state.e_property < syntax_propertize__done)
501 { /* When moving backward, e_property might be set without resetting
502 e_property_truncated, so the e_property_truncated flag may
503 occasionally be left raised spuriously. This should be rare. */
504 gl_state.e_property_truncated = false;
505 update_syntax_table_forward (charpos, false, Qnil);
509 void
510 update_syntax_table_forward (ptrdiff_t charpos, bool init,
511 Lisp_Object object)
513 if (gl_state.e_property_truncated)
515 eassert (NILP (object));
516 eassert (charpos >= gl_state.e_property);
518 else
520 update_syntax_table (charpos, 1, init, object);
521 if (NILP (object) && gl_state.e_property > syntax_propertize__done)
522 parse_sexp_propertize (charpos);
526 /* Returns true if char at CHARPOS is quoted.
527 Global syntax-table data should be set up already to be good at CHARPOS
528 or after. On return global syntax data is good for lookup at CHARPOS. */
530 static bool
531 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
533 enum syntaxcode code;
534 ptrdiff_t beg = BEGV;
535 bool quoted = 0;
536 ptrdiff_t orig = charpos;
538 while (charpos > beg)
540 int c;
541 DEC_BOTH (charpos, bytepos);
543 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
544 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
545 code = SYNTAX (c);
546 if (! (code == Scharquote || code == Sescape))
547 break;
549 quoted = !quoted;
552 UPDATE_SYNTAX_TABLE (orig);
553 return quoted;
556 /* Return the bytepos one character before BYTEPOS.
557 We assume that BYTEPOS is not at the start of the buffer. */
559 static ptrdiff_t
560 dec_bytepos (ptrdiff_t bytepos)
562 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
563 return bytepos - 1;
565 DEC_POS (bytepos);
566 return bytepos;
569 /* Return a defun-start position before POS and not too far before.
570 It should be the last one before POS, or nearly the last.
572 When open_paren_in_column_0_is_defun_start is nonzero,
573 only the beginning of the buffer is treated as a defun-start.
575 We record the information about where the scan started
576 and what its result was, so that another call in the same area
577 can return the same value very quickly.
579 There is no promise at which position the global syntax data is
580 valid on return from the subroutine, so the caller should explicitly
581 update the global data. */
583 static ptrdiff_t
584 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
586 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
588 /* Use previous finding, if it's valid and applies to this inquiry. */
589 if (current_buffer == find_start_buffer
590 /* Reuse the defun-start even if POS is a little farther on.
591 POS might be in the next defun, but that's ok.
592 Our value may not be the best possible, but will still be usable. */
593 && pos <= find_start_pos + 1000
594 && pos >= find_start_value
595 && BEGV == find_start_begv
596 && MODIFF == find_start_modiff)
597 return find_start_value;
599 if (!open_paren_in_column_0_is_defun_start)
601 find_start_value = BEGV;
602 find_start_value_byte = BEGV_BYTE;
603 goto found;
606 /* Back up to start of line. */
607 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
609 /* We optimize syntax-table lookup for rare updates. Thus we accept
610 only those `^\s(' which are good in global _and_ text-property
611 syntax-tables. */
612 SETUP_BUFFER_SYNTAX_TABLE ();
613 while (PT > BEGV)
615 int c;
617 /* Open-paren at start of line means we may have found our
618 defun-start. */
619 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
620 if (SYNTAX (c) == Sopen)
622 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
623 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
624 if (SYNTAX (c) == Sopen)
625 break;
626 /* Now fallback to the default value. */
627 SETUP_BUFFER_SYNTAX_TABLE ();
629 /* Move to beg of previous line. */
630 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
633 /* Record what we found, for the next try. */
634 find_start_value = PT;
635 find_start_value_byte = PT_BYTE;
636 TEMP_SET_PT_BOTH (opoint, opoint_byte);
638 found:
639 find_start_buffer = current_buffer;
640 find_start_modiff = MODIFF;
641 find_start_begv = BEGV;
642 find_start_pos = pos;
644 return find_start_value;
647 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
649 static bool
650 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
652 int c;
653 bool val;
655 DEC_BOTH (pos, pos_byte);
656 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
657 c = FETCH_CHAR (pos_byte);
658 val = SYNTAX_COMEND_FIRST (c);
659 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
660 return val;
663 /* Check whether charpos FROM is at the end of a comment.
664 FROM_BYTE is the bytepos corresponding to FROM.
665 Do not move back before STOP.
667 Return true if we find a comment ending at FROM/FROM_BYTE.
669 If successful, store the charpos of the comment's beginning
670 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
672 Global syntax data remains valid for backward search starting at
673 the returned value (or at FROM, if the search was not successful). */
675 static bool
676 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
677 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
678 ptrdiff_t *bytepos_ptr)
680 /* Look back, counting the parity of string-quotes,
681 and recording the comment-starters seen.
682 When we reach a safe place, assume that's not in a string;
683 then step the main scan to the earliest comment-starter seen
684 an even number of string quotes away from the safe place.
686 OFROM[I] is position of the earliest comment-starter seen
687 which is I+2X quotes from the comment-end.
688 PARITY is current parity of quotes from the comment end. */
689 int string_style = -1; /* Presumed outside of any string. */
690 bool string_lossage = 0;
691 /* Not a real lossage: indicates that we have passed a matching comment
692 starter plus a non-matching comment-ender, meaning that any matching
693 comment-starter we might see later could be a false positive (hidden
694 inside another comment).
695 Test case: { a (* b } c (* d *) */
696 bool comment_lossage = 0;
697 ptrdiff_t comment_end = from;
698 ptrdiff_t comment_end_byte = from_byte;
699 ptrdiff_t comstart_pos = 0;
700 ptrdiff_t comstart_byte IF_LINT (= 0);
701 /* Place where the containing defun starts,
702 or 0 if we didn't come across it yet. */
703 ptrdiff_t defun_start = 0;
704 ptrdiff_t defun_start_byte = 0;
705 enum syntaxcode code;
706 ptrdiff_t nesting = 1; /* Current comment nesting. */
707 int c;
708 int syntax = 0;
710 /* FIXME: A }} comment-ender style leads to incorrect behavior
711 in the case of {{ c }}} because we ignore the last two chars which are
712 assumed to be comment-enders although they aren't. */
714 /* At beginning of range to scan, we're outside of strings;
715 that determines quote parity to the comment-end. */
716 while (from != stop)
718 ptrdiff_t temp_byte;
719 int prev_syntax;
720 bool com2start, com2end, comstart;
722 /* Move back and examine a character. */
723 DEC_BOTH (from, from_byte);
724 UPDATE_SYNTAX_TABLE_BACKWARD (from);
726 prev_syntax = syntax;
727 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
728 syntax = SYNTAX_WITH_FLAGS (c);
729 code = SYNTAX (c);
731 /* Check for 2-char comment markers. */
732 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
733 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
734 && (comstyle
735 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
736 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
737 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
738 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
739 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
740 comstart = (com2start || code == Scomment);
742 /* Nasty cases with overlapping 2-char comment markers:
743 - snmp-mode: -- c -- foo -- c --
744 --- c --
745 ------ c --
746 - c-mode: *||*
747 |* *|* *|
748 |*| |* |*|
749 /// */
751 /* If a 2-char comment sequence partly overlaps with another,
752 we don't try to be clever. E.g. |*| in C, or }% in modes that
753 have %..\n and %{..}%. */
754 if (from > stop && (com2end || comstart))
756 ptrdiff_t next = from, next_byte = from_byte;
757 int next_c, next_syntax;
758 DEC_BOTH (next, next_byte);
759 UPDATE_SYNTAX_TABLE_BACKWARD (next);
760 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
761 next_syntax = SYNTAX_WITH_FLAGS (next_c);
762 if (((comstart || comnested)
763 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
764 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
765 || ((com2end || comnested)
766 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
767 && (comstyle
768 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
769 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
770 goto lossage;
771 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
774 if (com2start && comstart_pos == 0)
775 /* We're looking at a comment starter. But it might be a comment
776 ender as well (see snmp-mode). The first time we see one, we
777 need to consider it as a comment starter,
778 and the subsequent times as a comment ender. */
779 com2end = 0;
781 /* Turn a 2-char comment sequences into the appropriate syntax. */
782 if (com2end)
783 code = Sendcomment;
784 else if (com2start)
785 code = Scomment;
786 /* Ignore comment starters of a different style. */
787 else if (code == Scomment
788 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
789 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
790 continue;
792 /* Ignore escaped characters, except comment-enders. */
793 if (code != Sendcomment && char_quoted (from, from_byte))
794 continue;
796 switch (code)
798 case Sstring_fence:
799 case Scomment_fence:
800 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
801 case Sstring:
802 /* Track parity of quotes. */
803 if (string_style == -1)
804 /* Entering a string. */
805 string_style = c;
806 else if (string_style == c)
807 /* Leaving the string. */
808 string_style = -1;
809 else
810 /* If we have two kinds of string delimiters.
811 There's no way to grok this scanning backwards. */
812 string_lossage = 1;
813 break;
815 case Scomment:
816 /* We've already checked that it is the relevant comstyle. */
817 if (string_style != -1 || comment_lossage || string_lossage)
818 /* There are odd string quotes involved, so let's be careful.
819 Test case in Pascal: " { " a { " } */
820 goto lossage;
822 if (!comnested)
824 /* Record best comment-starter so far. */
825 comstart_pos = from;
826 comstart_byte = from_byte;
828 else if (--nesting <= 0)
829 /* nested comments have to be balanced, so we don't need to
830 keep looking for earlier ones. We use here the same (slightly
831 incorrect) reasoning as below: since it is followed by uniform
832 paired string quotes, this comment-start has to be outside of
833 strings, else the comment-end itself would be inside a string. */
834 goto done;
835 break;
837 case Sendcomment:
838 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
839 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
840 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
841 /* This is the same style of comment ender as ours. */
843 if (comnested)
844 nesting++;
845 else
846 /* Anything before that can't count because it would match
847 this comment-ender rather than ours. */
848 from = stop; /* Break out of the loop. */
850 else if (comstart_pos != 0 || c != '\n')
851 /* We're mixing comment styles here, so we'd better be careful.
852 The (comstart_pos != 0 || c != '\n') check is not quite correct
853 (we should just always set comment_lossage), but removing it
854 would imply that any multiline comment in C would go through
855 lossage, which seems overkill.
856 The failure should only happen in the rare cases such as
857 { (* } *) */
858 comment_lossage = 1;
859 break;
861 case Sopen:
862 /* Assume a defun-start point is outside of strings. */
863 if (open_paren_in_column_0_is_defun_start
864 && (from == stop
865 || (temp_byte = dec_bytepos (from_byte),
866 FETCH_CHAR (temp_byte) == '\n')))
868 defun_start = from;
869 defun_start_byte = from_byte;
870 from = stop; /* Break out of the loop. */
872 break;
874 default:
875 break;
879 if (comstart_pos == 0)
881 from = comment_end;
882 from_byte = comment_end_byte;
883 UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
885 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
886 or `done'), then we've found the beginning of the non-nested comment. */
887 else if (1) /* !comnested */
889 from = comstart_pos;
890 from_byte = comstart_byte;
891 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
893 else lossage:
895 struct lisp_parse_state state;
896 bool adjusted = true;
897 /* We had two kinds of string delimiters mixed up
898 together. Decode this going forwards.
899 Scan fwd from a known safe place (beginning-of-defun)
900 to the one in question; this records where we
901 last passed a comment starter. */
902 /* If we did not already find the defun start, find it now. */
903 if (defun_start == 0)
905 defun_start = find_defun_start (comment_end, comment_end_byte);
906 defun_start_byte = find_start_value_byte;
907 adjusted = (defun_start > BEGV);
911 scan_sexps_forward (&state,
912 defun_start, defun_start_byte,
913 comment_end, TYPE_MINIMUM (EMACS_INT),
914 0, Qnil, 0);
915 defun_start = comment_end;
916 if (!adjusted)
918 adjusted = true;
919 find_start_value
920 = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
921 : state.thislevelstart >= 0 ? state.thislevelstart
922 : find_start_value;
923 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
926 if (state.incomment == (comnested ? 1 : -1)
927 && state.comstyle == comstyle)
928 from = state.comstr_start;
929 else
931 from = comment_end;
932 if (state.incomment)
933 /* If comment_end is inside some other comment, maybe ours
934 is nested, so we need to try again from within the
935 surrounding comment. Example: { a (* " *) */
937 /* FIXME: We should advance by one or two chars. */
938 defun_start = state.comstr_start + 2;
939 defun_start_byte = CHAR_TO_BYTE (defun_start);
942 } while (defun_start < comment_end);
944 from_byte = CHAR_TO_BYTE (from);
945 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
948 done:
949 *charpos_ptr = from;
950 *bytepos_ptr = from_byte;
952 return from != comment_end;
955 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
956 doc: /* Return t if OBJECT is a syntax table.
957 Currently, any char-table counts as a syntax table. */)
958 (Lisp_Object object)
960 if (CHAR_TABLE_P (object)
961 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
962 return Qt;
963 return Qnil;
966 static void
967 check_syntax_table (Lisp_Object obj)
969 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
970 Qsyntax_table_p, obj);
973 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
974 doc: /* Return the current syntax table.
975 This is the one specified by the current buffer. */)
976 (void)
978 return BVAR (current_buffer, syntax_table);
981 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
982 Sstandard_syntax_table, 0, 0, 0,
983 doc: /* Return the standard syntax table.
984 This is the one used for new buffers. */)
985 (void)
987 return Vstandard_syntax_table;
990 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
991 doc: /* Construct a new syntax table and return it.
992 It is a copy of the TABLE, which defaults to the standard syntax table. */)
993 (Lisp_Object table)
995 Lisp_Object copy;
997 if (!NILP (table))
998 check_syntax_table (table);
999 else
1000 table = Vstandard_syntax_table;
1002 copy = Fcopy_sequence (table);
1004 /* Only the standard syntax table should have a default element.
1005 Other syntax tables should inherit from parents instead. */
1006 set_char_table_defalt (copy, Qnil);
1008 /* Copied syntax tables should all have parents.
1009 If we copied one with no parent, such as the standard syntax table,
1010 use the standard syntax table as the copy's parent. */
1011 if (NILP (XCHAR_TABLE (copy)->parent))
1012 Fset_char_table_parent (copy, Vstandard_syntax_table);
1013 return copy;
1016 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1017 doc: /* Select a new syntax table for the current buffer.
1018 One argument, a syntax table. */)
1019 (Lisp_Object table)
1021 int idx;
1022 check_syntax_table (table);
1023 bset_syntax_table (current_buffer, table);
1024 /* Indicate that this buffer now has a specified syntax table. */
1025 idx = PER_BUFFER_VAR_IDX (syntax_table);
1026 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1027 return table;
1030 /* Convert a letter which signifies a syntax code
1031 into the code it signifies.
1032 This is used by modify-syntax-entry, and other things. */
1034 unsigned char const syntax_spec_code[0400] =
1035 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1036 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1037 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1038 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1039 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1040 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1041 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1042 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1043 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1044 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1045 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1046 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1047 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1048 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1049 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1050 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1053 /* Indexed by syntax code, give the letter that describes it. */
1055 char const syntax_code_spec[16] =
1057 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1058 '!', '|'
1061 /* Indexed by syntax code, give the object (cons of syntax code and
1062 nil) to be stored in syntax table. Since these objects can be
1063 shared among syntax tables, we generate them in advance. By
1064 sharing objects, the function `describe-syntax' can give a more
1065 compact listing. */
1066 static Lisp_Object Vsyntax_code_object;
1069 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1070 doc: /* Return the syntax code of CHARACTER, described by a character.
1071 For example, if CHARACTER is a word constituent, the
1072 character `w' (119) is returned.
1073 The characters that correspond to various syntax codes
1074 are listed in the documentation of `modify-syntax-entry'. */)
1075 (Lisp_Object character)
1077 int char_int;
1078 CHECK_CHARACTER (character);
1079 char_int = XINT (character);
1080 SETUP_BUFFER_SYNTAX_TABLE ();
1081 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1084 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1085 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1086 (Lisp_Object character)
1088 int char_int;
1089 enum syntaxcode code;
1090 CHECK_CHARACTER (character);
1091 char_int = XINT (character);
1092 SETUP_BUFFER_SYNTAX_TABLE ();
1093 code = SYNTAX (char_int);
1094 if (code == Sopen || code == Sclose)
1095 return SYNTAX_MATCH (char_int);
1096 return Qnil;
1099 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1100 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1101 STRING should be a string of the form allowed as argument of
1102 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1103 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1104 the value of a `syntax-table' text property. */)
1105 (Lisp_Object string)
1107 const unsigned char *p;
1108 int val;
1109 Lisp_Object match;
1111 CHECK_STRING (string);
1113 p = SDATA (string);
1114 val = syntax_spec_code[*p++];
1115 if (val == 0377)
1116 error ("Invalid syntax description letter: %c", p[-1]);
1118 if (val == Sinherit)
1119 return Qnil;
1121 if (*p)
1123 int len;
1124 int character = STRING_CHAR_AND_LENGTH (p, len);
1125 XSETINT (match, character);
1126 if (XFASTINT (match) == ' ')
1127 match = Qnil;
1128 p += len;
1130 else
1131 match = Qnil;
1133 while (*p)
1134 switch (*p++)
1136 case '1':
1137 val |= 1 << 16;
1138 break;
1140 case '2':
1141 val |= 1 << 17;
1142 break;
1144 case '3':
1145 val |= 1 << 18;
1146 break;
1148 case '4':
1149 val |= 1 << 19;
1150 break;
1152 case 'p':
1153 val |= 1 << 20;
1154 break;
1156 case 'b':
1157 val |= 1 << 21;
1158 break;
1160 case 'n':
1161 val |= 1 << 22;
1162 break;
1164 case 'c':
1165 val |= 1 << 23;
1166 break;
1169 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1170 return AREF (Vsyntax_code_object, val);
1171 else
1172 /* Since we can't use a shared object, let's make a new one. */
1173 return Fcons (make_number (val), match);
1176 /* I really don't know why this is interactive
1177 help-form should at least be made useful whilst reading the second arg. */
1178 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1179 "cSet syntax for character: \nsSet syntax for %s to: ",
1180 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1181 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1182 the current buffer's syntax table.
1183 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1184 in the range MIN to MAX are changed.
1185 The first character of NEWENTRY should be one of the following:
1186 Space or - whitespace syntax. w word constituent.
1187 _ symbol constituent. . punctuation.
1188 ( open-parenthesis. ) close-parenthesis.
1189 " string quote. \\ escape.
1190 $ paired delimiter. \\=' expression quote or prefix operator.
1191 < comment starter. > comment ender.
1192 / character-quote. @ inherit from parent table.
1193 | generic string fence. ! generic comment fence.
1195 Only single-character comment start and end sequences are represented thus.
1196 Two-character sequences are represented as described below.
1197 The second character of NEWENTRY is the matching parenthesis,
1198 used only if the first character is `(' or `)'.
1199 Any additional characters are flags.
1200 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1201 1 means CHAR is the start of a two-char comment start sequence.
1202 2 means CHAR is the second character of such a sequence.
1203 3 means CHAR is the start of a two-char comment end sequence.
1204 4 means CHAR is the second character of such a sequence.
1206 There can be several orthogonal comment sequences. This is to support
1207 language modes such as C++. By default, all comment sequences are of style
1208 a, but you can set the comment sequence style to b (on the second character
1209 of a comment-start, and the first character of a comment-end sequence) and/or
1210 c (on any of its chars) using this flag:
1211 b means CHAR is part of comment sequence b.
1212 c means CHAR is part of comment sequence c.
1213 n means CHAR is part of a nestable comment sequence.
1215 p means CHAR is a prefix character for `backward-prefix-chars';
1216 such characters are treated as whitespace when they occur
1217 between expressions.
1218 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1219 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1221 if (CONSP (c))
1223 CHECK_CHARACTER_CAR (c);
1224 CHECK_CHARACTER_CDR (c);
1226 else
1227 CHECK_CHARACTER (c);
1229 if (NILP (syntax_table))
1230 syntax_table = BVAR (current_buffer, syntax_table);
1231 else
1232 check_syntax_table (syntax_table);
1234 newentry = Fstring_to_syntax (newentry);
1235 if (CONSP (c))
1236 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1237 else
1238 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1240 /* We clear the regexp cache, since character classes can now have
1241 different values from those in the compiled regexps.*/
1242 clear_regexp_cache ();
1244 return Qnil;
1247 /* Dump syntax table to buffer in human-readable format */
1249 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1250 Sinternal_describe_syntax_value, 1, 1, 0,
1251 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1252 (Lisp_Object syntax)
1254 int code, syntax_code;
1255 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1256 char str[2];
1257 Lisp_Object first, match_lisp, value = syntax;
1259 if (NILP (value))
1261 insert_string ("default");
1262 return syntax;
1265 if (CHAR_TABLE_P (value))
1267 insert_string ("deeper char-table ...");
1268 return syntax;
1271 if (!CONSP (value))
1273 insert_string ("invalid");
1274 return syntax;
1277 first = XCAR (value);
1278 match_lisp = XCDR (value);
1280 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1282 insert_string ("invalid");
1283 return syntax;
1286 syntax_code = XINT (first) & INT_MAX;
1287 code = syntax_code & 0377;
1288 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1289 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1290 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1291 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1292 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1293 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1294 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1295 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1297 if (Smax <= code)
1299 insert_string ("invalid");
1300 return syntax;
1303 str[0] = syntax_code_spec[code], str[1] = 0;
1304 insert (str, 1);
1306 if (NILP (match_lisp))
1307 insert (" ", 1);
1308 else
1309 insert_char (XINT (match_lisp));
1311 if (start1)
1312 insert ("1", 1);
1313 if (start2)
1314 insert ("2", 1);
1316 if (end1)
1317 insert ("3", 1);
1318 if (end2)
1319 insert ("4", 1);
1321 if (prefix)
1322 insert ("p", 1);
1323 if (comstyleb)
1324 insert ("b", 1);
1325 if (comstylec)
1326 insert ("c", 1);
1327 if (comnested)
1328 insert ("n", 1);
1330 insert_string ("\twhich means: ");
1332 switch (code)
1334 case Swhitespace:
1335 insert_string ("whitespace"); break;
1336 case Spunct:
1337 insert_string ("punctuation"); break;
1338 case Sword:
1339 insert_string ("word"); break;
1340 case Ssymbol:
1341 insert_string ("symbol"); break;
1342 case Sopen:
1343 insert_string ("open"); break;
1344 case Sclose:
1345 insert_string ("close"); break;
1346 case Squote:
1347 insert_string ("prefix"); break;
1348 case Sstring:
1349 insert_string ("string"); break;
1350 case Smath:
1351 insert_string ("math"); break;
1352 case Sescape:
1353 insert_string ("escape"); break;
1354 case Scharquote:
1355 insert_string ("charquote"); break;
1356 case Scomment:
1357 insert_string ("comment"); break;
1358 case Sendcomment:
1359 insert_string ("endcomment"); break;
1360 case Sinherit:
1361 insert_string ("inherit"); break;
1362 case Scomment_fence:
1363 insert_string ("comment fence"); break;
1364 case Sstring_fence:
1365 insert_string ("string fence"); break;
1366 default:
1367 insert_string ("invalid");
1368 return syntax;
1371 if (!NILP (match_lisp))
1373 insert_string (", matches ");
1374 insert_char (XINT (match_lisp));
1377 if (start1)
1378 insert_string (",\n\t is the first character of a comment-start sequence");
1379 if (start2)
1380 insert_string (",\n\t is the second character of a comment-start sequence");
1382 if (end1)
1383 insert_string (",\n\t is the first character of a comment-end sequence");
1384 if (end2)
1385 insert_string (",\n\t is the second character of a comment-end sequence");
1386 if (comstyleb)
1387 insert_string (" (comment style b)");
1388 if (comstylec)
1389 insert_string (" (comment style c)");
1390 if (comnested)
1391 insert_string (" (nestable)");
1393 if (prefix)
1395 AUTO_STRING (prefixdoc,
1396 ",\n\t is a prefix character for `backward-prefix-chars'");
1397 insert1 (Fsubstitute_command_keys (prefixdoc));
1400 return syntax;
1403 /* Return the position across COUNT words from FROM.
1404 If that many words cannot be found before the end of the buffer, return 0.
1405 COUNT negative means scan backward and stop at word beginning. */
1407 ptrdiff_t
1408 scan_words (register ptrdiff_t from, register EMACS_INT count)
1410 register ptrdiff_t beg = BEGV;
1411 register ptrdiff_t end = ZV;
1412 register ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1413 register enum syntaxcode code;
1414 int ch0, ch1;
1415 Lisp_Object func, pos;
1417 immediate_quit = 1;
1418 QUIT;
1420 SETUP_SYNTAX_TABLE (from, count);
1422 while (count > 0)
1424 while (1)
1426 if (from == end)
1428 immediate_quit = 0;
1429 return 0;
1431 UPDATE_SYNTAX_TABLE_FORWARD (from);
1432 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1433 code = SYNTAX (ch0);
1434 INC_BOTH (from, from_byte);
1435 if (words_include_escapes
1436 && (code == Sescape || code == Scharquote))
1437 break;
1438 if (code == Sword)
1439 break;
1441 /* Now CH0 is a character which begins a word and FROM is the
1442 position of the next character. */
1443 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1444 if (! NILP (Ffboundp (func)))
1446 pos = call2 (func, make_number (from - 1), make_number (end));
1447 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1449 from = XINT (pos);
1450 from_byte = CHAR_TO_BYTE (from);
1453 else
1455 while (1)
1457 if (from == end) break;
1458 UPDATE_SYNTAX_TABLE_FORWARD (from);
1459 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1460 code = SYNTAX (ch1);
1461 if ((code != Sword
1462 && (! words_include_escapes
1463 || (code != Sescape && code != Scharquote)))
1464 || word_boundary_p (ch0, ch1))
1465 break;
1466 INC_BOTH (from, from_byte);
1467 ch0 = ch1;
1470 count--;
1472 while (count < 0)
1474 while (1)
1476 if (from == beg)
1478 immediate_quit = 0;
1479 return 0;
1481 DEC_BOTH (from, from_byte);
1482 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1483 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1484 code = SYNTAX (ch1);
1485 if (words_include_escapes
1486 && (code == Sescape || code == Scharquote))
1487 break;
1488 if (code == Sword)
1489 break;
1491 /* Now CH1 is a character which ends a word and FROM is the
1492 position of it. */
1493 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1494 if (! NILP (Ffboundp (func)))
1496 pos = call2 (func, make_number (from), make_number (beg));
1497 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1499 from = XINT (pos);
1500 from_byte = CHAR_TO_BYTE (from);
1503 else
1505 while (1)
1507 if (from == beg)
1508 break;
1509 DEC_BOTH (from, from_byte);
1510 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1511 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1512 code = SYNTAX (ch0);
1513 if ((code != Sword
1514 && (! words_include_escapes
1515 || (code != Sescape && code != Scharquote)))
1516 || word_boundary_p (ch0, ch1))
1518 INC_BOTH (from, from_byte);
1519 break;
1521 ch1 = ch0;
1524 count++;
1527 immediate_quit = 0;
1529 return from;
1532 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1533 doc: /* Move point forward ARG words (backward if ARG is negative).
1534 If ARG is omitted or nil, move point forward one word.
1535 Normally returns t.
1536 If an edge of the buffer or a field boundary is reached, point is left there
1537 and the function returns nil. Field boundaries are not noticed if
1538 `inhibit-field-text-motion' is non-nil. */)
1539 (Lisp_Object arg)
1541 Lisp_Object tmp;
1542 ptrdiff_t orig_val, val;
1544 if (NILP (arg))
1545 XSETFASTINT (arg, 1);
1546 else
1547 CHECK_NUMBER (arg);
1549 val = orig_val = scan_words (PT, XINT (arg));
1550 if (! orig_val)
1551 val = XINT (arg) > 0 ? ZV : BEGV;
1553 /* Avoid jumping out of an input field. */
1554 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1555 Qnil, Qnil, Qnil);
1556 val = XFASTINT (tmp);
1558 SET_PT (val);
1559 return val == orig_val ? Qt : Qnil;
1562 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1563 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1564 STRING is like the inside of a `[...]' in a regular expression
1565 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1566 (but not at the end of a range; quoting is never needed there).
1567 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1568 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1569 Char classes, e.g. `[:alpha:]', are supported.
1571 Returns the distance traveled, either zero or positive. */)
1572 (Lisp_Object string, Lisp_Object lim)
1574 return skip_chars (1, string, lim, 1);
1577 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1578 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1579 See `skip-chars-forward' for details.
1580 Returns the distance traveled, either zero or negative. */)
1581 (Lisp_Object string, Lisp_Object lim)
1583 return skip_chars (0, string, lim, 1);
1586 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1587 doc: /* Move point forward across chars in specified syntax classes.
1588 SYNTAX is a string of syntax code characters.
1589 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1590 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1591 This function returns the distance traveled, either zero or positive. */)
1592 (Lisp_Object syntax, Lisp_Object lim)
1594 return skip_syntaxes (1, syntax, lim);
1597 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1598 doc: /* Move point backward across chars in specified syntax classes.
1599 SYNTAX is a string of syntax code characters.
1600 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1601 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1602 This function returns either zero or a negative number, and the absolute value
1603 of this is the distance traveled. */)
1604 (Lisp_Object syntax, Lisp_Object lim)
1606 return skip_syntaxes (0, syntax, lim);
1609 static Lisp_Object
1610 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1611 bool handle_iso_classes)
1613 int c;
1614 char fastmap[0400];
1615 /* Store the ranges of non-ASCII characters. */
1616 int *char_ranges IF_LINT (= NULL);
1617 int n_char_ranges = 0;
1618 bool negate = 0;
1619 ptrdiff_t i, i_byte;
1620 /* True if the current buffer is multibyte and the region contains
1621 non-ASCII chars. */
1622 bool multibyte;
1623 /* True if STRING is multibyte and it contains non-ASCII chars. */
1624 bool string_multibyte;
1625 ptrdiff_t size_byte;
1626 const unsigned char *str;
1627 int len;
1628 Lisp_Object iso_classes;
1629 USE_SAFE_ALLOCA;
1631 CHECK_STRING (string);
1632 iso_classes = Qnil;
1634 if (NILP (lim))
1635 XSETINT (lim, forwardp ? ZV : BEGV);
1636 else
1637 CHECK_NUMBER_COERCE_MARKER (lim);
1639 /* In any case, don't allow scan outside bounds of buffer. */
1640 if (XINT (lim) > ZV)
1641 XSETFASTINT (lim, ZV);
1642 if (XINT (lim) < BEGV)
1643 XSETFASTINT (lim, BEGV);
1645 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1646 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1647 string_multibyte = SBYTES (string) > SCHARS (string);
1649 memset (fastmap, 0, sizeof fastmap);
1651 str = SDATA (string);
1652 size_byte = SBYTES (string);
1654 i_byte = 0;
1655 if (i_byte < size_byte
1656 && SREF (string, 0) == '^')
1658 negate = 1; i_byte++;
1661 /* Find the characters specified and set their elements of fastmap.
1662 Handle backslashes and ranges specially.
1664 If STRING contains non-ASCII characters, setup char_ranges for
1665 them and use fastmap only for their leading codes. */
1667 if (! string_multibyte)
1669 bool string_has_eight_bit = 0;
1671 /* At first setup fastmap. */
1672 while (i_byte < size_byte)
1674 c = str[i_byte++];
1676 if (handle_iso_classes && c == '['
1677 && i_byte < size_byte
1678 && str[i_byte] == ':')
1680 const unsigned char *class_beg = str + i_byte + 1;
1681 const unsigned char *class_end = class_beg;
1682 const unsigned char *class_limit = str + size_byte - 2;
1683 /* Leave room for the null. */
1684 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1685 re_wctype_t cc;
1687 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1688 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1690 while (class_end < class_limit
1691 && *class_end >= 'a' && *class_end <= 'z')
1692 class_end++;
1694 if (class_end == class_beg
1695 || *class_end != ':' || class_end[1] != ']')
1696 goto not_a_class_name;
1698 memcpy (class_name, class_beg, class_end - class_beg);
1699 class_name[class_end - class_beg] = 0;
1701 cc = re_wctype (class_name);
1702 if (cc == 0)
1703 error ("Invalid ISO C character class");
1705 iso_classes = Fcons (make_number (cc), iso_classes);
1707 i_byte = class_end + 2 - str;
1708 continue;
1711 not_a_class_name:
1712 if (c == '\\')
1714 if (i_byte == size_byte)
1715 break;
1717 c = str[i_byte++];
1719 /* Treat `-' as range character only if another character
1720 follows. */
1721 if (i_byte + 1 < size_byte
1722 && str[i_byte] == '-')
1724 int c2;
1726 /* Skip over the dash. */
1727 i_byte++;
1729 /* Get the end of the range. */
1730 c2 = str[i_byte++];
1731 if (c2 == '\\'
1732 && i_byte < size_byte)
1733 c2 = str[i_byte++];
1735 if (c <= c2)
1737 int lim2 = c2 + 1;
1738 while (c < lim2)
1739 fastmap[c++] = 1;
1740 if (! ASCII_CHAR_P (c2))
1741 string_has_eight_bit = 1;
1744 else
1746 fastmap[c] = 1;
1747 if (! ASCII_CHAR_P (c))
1748 string_has_eight_bit = 1;
1752 /* If the current range is multibyte and STRING contains
1753 eight-bit chars, arrange fastmap and setup char_ranges for
1754 the corresponding multibyte chars. */
1755 if (multibyte && string_has_eight_bit)
1757 char *p1;
1758 char himap[0200 + 1];
1759 memcpy (himap, fastmap + 0200, 0200);
1760 himap[0200] = 0;
1761 memset (fastmap + 0200, 0, 0200);
1762 SAFE_NALLOCA (char_ranges, 2, 128);
1763 i = 0;
1765 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1767 /* Deduce the next range C..C2 from the next clump of 1s
1768 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1769 order half of the old FASTMAP. */
1770 int c2, leading_code;
1771 i = p1 - himap;
1772 c = BYTE8_TO_CHAR (i + 0200);
1773 i += strlen (p1);
1774 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1776 char_ranges[n_char_ranges++] = c;
1777 char_ranges[n_char_ranges++] = c2;
1778 leading_code = CHAR_LEADING_CODE (c);
1779 memset (fastmap + leading_code, 1,
1780 CHAR_LEADING_CODE (c2) - leading_code + 1);
1784 else /* STRING is multibyte */
1786 SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1788 while (i_byte < size_byte)
1790 int leading_code = str[i_byte];
1791 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1792 i_byte += len;
1794 if (handle_iso_classes && c == '['
1795 && i_byte < size_byte
1796 && STRING_CHAR (str + i_byte) == ':')
1798 const unsigned char *class_beg = str + i_byte + 1;
1799 const unsigned char *class_end = class_beg;
1800 const unsigned char *class_limit = str + size_byte - 2;
1801 /* Leave room for the null. */
1802 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1803 re_wctype_t cc;
1805 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1806 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1808 while (class_end < class_limit
1809 && *class_end >= 'a' && *class_end <= 'z')
1810 class_end++;
1812 if (class_end == class_beg
1813 || *class_end != ':' || class_end[1] != ']')
1814 goto not_a_class_name_multibyte;
1816 memcpy (class_name, class_beg, class_end - class_beg);
1817 class_name[class_end - class_beg] = 0;
1819 cc = re_wctype (class_name);
1820 if (cc == 0)
1821 error ("Invalid ISO C character class");
1823 iso_classes = Fcons (make_number (cc), iso_classes);
1825 i_byte = class_end + 2 - str;
1826 continue;
1829 not_a_class_name_multibyte:
1830 if (c == '\\')
1832 if (i_byte == size_byte)
1833 break;
1835 leading_code = str[i_byte];
1836 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1837 i_byte += len;
1839 /* Treat `-' as range character only if another character
1840 follows. */
1841 if (i_byte + 1 < size_byte
1842 && str[i_byte] == '-')
1844 int c2, leading_code2;
1846 /* Skip over the dash. */
1847 i_byte++;
1849 /* Get the end of the range. */
1850 leading_code2 = str[i_byte];
1851 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1852 i_byte += len;
1854 if (c2 == '\\'
1855 && i_byte < size_byte)
1857 leading_code2 = str[i_byte];
1858 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1859 i_byte += len;
1862 if (c > c2)
1863 continue;
1864 if (ASCII_CHAR_P (c))
1866 while (c <= c2 && c < 0x80)
1867 fastmap[c++] = 1;
1868 leading_code = CHAR_LEADING_CODE (c);
1870 if (! ASCII_CHAR_P (c))
1872 int lim2 = leading_code2 + 1;
1873 while (leading_code < lim2)
1874 fastmap[leading_code++] = 1;
1875 if (c <= c2)
1877 char_ranges[n_char_ranges++] = c;
1878 char_ranges[n_char_ranges++] = c2;
1882 else
1884 if (ASCII_CHAR_P (c))
1885 fastmap[c] = 1;
1886 else
1888 fastmap[leading_code] = 1;
1889 char_ranges[n_char_ranges++] = c;
1890 char_ranges[n_char_ranges++] = c;
1895 /* If the current range is unibyte and STRING contains non-ASCII
1896 chars, arrange fastmap for the corresponding unibyte
1897 chars. */
1899 if (! multibyte && n_char_ranges > 0)
1901 memset (fastmap + 0200, 0, 0200);
1902 for (i = 0; i < n_char_ranges; i += 2)
1904 int c1 = char_ranges[i];
1905 int lim2 = char_ranges[i + 1] + 1;
1907 for (; c1 < lim2; c1++)
1909 int b = CHAR_TO_BYTE_SAFE (c1);
1910 if (b >= 0)
1911 fastmap[b] = 1;
1917 /* If ^ was the first character, complement the fastmap. */
1918 if (negate)
1920 if (! multibyte)
1921 for (i = 0; i < sizeof fastmap; i++)
1922 fastmap[i] ^= 1;
1923 else
1925 for (i = 0; i < 0200; i++)
1926 fastmap[i] ^= 1;
1927 /* All non-ASCII chars possibly match. */
1928 for (; i < sizeof fastmap; i++)
1929 fastmap[i] = 1;
1934 ptrdiff_t start_point = PT;
1935 ptrdiff_t pos = PT;
1936 ptrdiff_t pos_byte = PT_BYTE;
1937 unsigned char *p = PT_ADDR, *endp, *stop;
1939 if (forwardp)
1941 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1942 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1944 else
1946 endp = CHAR_POS_ADDR (XINT (lim));
1947 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1950 immediate_quit = 1;
1951 /* This code may look up syntax tables using functions that rely on the
1952 gl_state object. To make sure this object is not out of date,
1953 let's initialize it manually.
1954 We ignore syntax-table text-properties for now, since that's
1955 what we've done in the past. */
1956 SETUP_BUFFER_SYNTAX_TABLE ();
1957 if (forwardp)
1959 if (multibyte)
1960 while (1)
1962 int nbytes;
1964 if (p >= stop)
1966 if (p >= endp)
1967 break;
1968 p = GAP_END_ADDR;
1969 stop = endp;
1971 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1972 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1974 if (negate)
1975 break;
1976 else
1977 goto fwd_ok;
1980 if (! fastmap[*p])
1981 break;
1982 if (! ASCII_CHAR_P (c))
1984 /* As we are looking at a multibyte character, we
1985 must look up the character in the table
1986 CHAR_RANGES. If there's no data in the table,
1987 that character is not what we want to skip. */
1989 /* The following code do the right thing even if
1990 n_char_ranges is zero (i.e. no data in
1991 CHAR_RANGES). */
1992 for (i = 0; i < n_char_ranges; i += 2)
1993 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1994 break;
1995 if (!(negate ^ (i < n_char_ranges)))
1996 break;
1998 fwd_ok:
1999 p += nbytes, pos++, pos_byte += nbytes;
2001 else
2002 while (1)
2004 if (p >= stop)
2006 if (p >= endp)
2007 break;
2008 p = GAP_END_ADDR;
2009 stop = endp;
2012 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
2014 if (negate)
2015 break;
2016 else
2017 goto fwd_unibyte_ok;
2020 if (!fastmap[*p])
2021 break;
2022 fwd_unibyte_ok:
2023 p++, pos++, pos_byte++;
2026 else
2028 if (multibyte)
2029 while (1)
2031 unsigned char *prev_p;
2033 if (p <= stop)
2035 if (p <= endp)
2036 break;
2037 p = GPT_ADDR;
2038 stop = endp;
2040 prev_p = p;
2041 while (--p >= stop && ! CHAR_HEAD_P (*p));
2042 c = STRING_CHAR (p);
2044 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2046 if (negate)
2047 break;
2048 else
2049 goto back_ok;
2052 if (! fastmap[*p])
2053 break;
2054 if (! ASCII_CHAR_P (c))
2056 /* See the comment in the previous similar code. */
2057 for (i = 0; i < n_char_ranges; i += 2)
2058 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2059 break;
2060 if (!(negate ^ (i < n_char_ranges)))
2061 break;
2063 back_ok:
2064 pos--, pos_byte -= prev_p - p;
2066 else
2067 while (1)
2069 if (p <= stop)
2071 if (p <= endp)
2072 break;
2073 p = GPT_ADDR;
2074 stop = endp;
2077 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2079 if (negate)
2080 break;
2081 else
2082 goto back_unibyte_ok;
2085 if (!fastmap[p[-1]])
2086 break;
2087 back_unibyte_ok:
2088 p--, pos--, pos_byte--;
2092 SET_PT_BOTH (pos, pos_byte);
2093 immediate_quit = 0;
2095 SAFE_FREE ();
2096 return make_number (PT - start_point);
2101 static Lisp_Object
2102 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2104 int c;
2105 unsigned char fastmap[0400];
2106 bool negate = 0;
2107 ptrdiff_t i, i_byte;
2108 bool multibyte;
2109 ptrdiff_t size_byte;
2110 unsigned char *str;
2112 CHECK_STRING (string);
2114 if (NILP (lim))
2115 XSETINT (lim, forwardp ? ZV : BEGV);
2116 else
2117 CHECK_NUMBER_COERCE_MARKER (lim);
2119 /* In any case, don't allow scan outside bounds of buffer. */
2120 if (XINT (lim) > ZV)
2121 XSETFASTINT (lim, ZV);
2122 if (XINT (lim) < BEGV)
2123 XSETFASTINT (lim, BEGV);
2125 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2126 return make_number (0);
2128 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2129 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2131 memset (fastmap, 0, sizeof fastmap);
2133 if (SBYTES (string) > SCHARS (string))
2134 /* As this is very rare case (syntax spec is ASCII only), don't
2135 consider efficiency. */
2136 string = string_make_unibyte (string);
2138 str = SDATA (string);
2139 size_byte = SBYTES (string);
2141 i_byte = 0;
2142 if (i_byte < size_byte
2143 && SREF (string, 0) == '^')
2145 negate = 1; i_byte++;
2148 /* Find the syntaxes specified and set their elements of fastmap. */
2150 while (i_byte < size_byte)
2152 c = str[i_byte++];
2153 fastmap[syntax_spec_code[c]] = 1;
2156 /* If ^ was the first character, complement the fastmap. */
2157 if (negate)
2158 for (i = 0; i < sizeof fastmap; i++)
2159 fastmap[i] ^= 1;
2162 ptrdiff_t start_point = PT;
2163 ptrdiff_t pos = PT;
2164 ptrdiff_t pos_byte = PT_BYTE;
2165 unsigned char *p = PT_ADDR, *endp, *stop;
2167 if (forwardp)
2169 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2170 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
2172 else
2174 endp = CHAR_POS_ADDR (XINT (lim));
2175 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
2178 immediate_quit = 1;
2179 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2180 if (forwardp)
2182 if (multibyte)
2184 while (1)
2186 int nbytes;
2188 if (p >= stop)
2190 if (p >= endp)
2191 break;
2192 p = GAP_END_ADDR;
2193 stop = endp;
2195 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2196 if (! fastmap[SYNTAX (c)])
2197 break;
2198 p += nbytes, pos++, pos_byte += nbytes;
2199 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2202 else
2204 while (1)
2206 if (p >= stop)
2208 if (p >= endp)
2209 break;
2210 p = GAP_END_ADDR;
2211 stop = endp;
2213 if (! fastmap[SYNTAX (*p)])
2214 break;
2215 p++, pos++, pos_byte++;
2216 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2220 else
2222 if (multibyte)
2224 while (1)
2226 unsigned char *prev_p;
2228 if (p <= stop)
2230 if (p <= endp)
2231 break;
2232 p = GPT_ADDR;
2233 stop = endp;
2235 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2236 prev_p = p;
2237 while (--p >= stop && ! CHAR_HEAD_P (*p));
2238 c = STRING_CHAR (p);
2239 if (! fastmap[SYNTAX (c)])
2240 break;
2241 pos--, pos_byte -= prev_p - p;
2244 else
2246 while (1)
2248 if (p <= stop)
2250 if (p <= endp)
2251 break;
2252 p = GPT_ADDR;
2253 stop = endp;
2255 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2256 if (! fastmap[SYNTAX (p[-1])])
2257 break;
2258 p--, pos--, pos_byte--;
2263 SET_PT_BOTH (pos, pos_byte);
2264 immediate_quit = 0;
2266 return make_number (PT - start_point);
2270 /* Return true if character C belongs to one of the ISO classes
2271 in the list ISO_CLASSES. Each class is represented by an
2272 integer which is its type according to re_wctype. */
2274 static bool
2275 in_classes (int c, Lisp_Object iso_classes)
2277 bool fits_class = 0;
2279 while (CONSP (iso_classes))
2281 Lisp_Object elt;
2282 elt = XCAR (iso_classes);
2283 iso_classes = XCDR (iso_classes);
2285 if (re_iswctype (c, XFASTINT (elt)))
2286 fits_class = 1;
2289 return fits_class;
2292 /* Jump over a comment, assuming we are at the beginning of one.
2293 FROM is the current position.
2294 FROM_BYTE is the bytepos corresponding to FROM.
2295 Do not move past STOP (a charpos).
2296 The comment over which we have to jump is of style STYLE
2297 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2298 NESTING should be positive to indicate the nesting at the beginning
2299 for nested comments and should be zero or negative else.
2300 ST_COMMENT_STYLE cannot be nested.
2301 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2302 (or 0 If the search cannot start in the middle of a two-character).
2304 If successful, return true and store the charpos of the comment's end
2305 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2306 Else, return false and store the charpos STOP into *CHARPOS_PTR, the
2307 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2308 (as defined for state.incomment) in *INCOMMENT_PTR.
2310 The comment end is the last character of the comment rather than the
2311 character just after the comment.
2313 Global syntax data is assumed to initially be valid for FROM and
2314 remains valid for forward search starting at the returned position. */
2316 static bool
2317 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2318 EMACS_INT nesting, int style, int prev_syntax,
2319 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2320 EMACS_INT *incomment_ptr)
2322 register int c, c1;
2323 register enum syntaxcode code;
2324 register int syntax, other_syntax;
2326 if (nesting <= 0) nesting = -1;
2328 /* Enter the loop in the middle so that we find
2329 a 2-char comment ender if we start in the middle of it. */
2330 syntax = prev_syntax;
2331 if (syntax != 0) goto forw_incomment;
2333 while (1)
2335 if (from == stop)
2337 *incomment_ptr = nesting;
2338 *charpos_ptr = from;
2339 *bytepos_ptr = from_byte;
2340 return 0;
2342 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2343 syntax = SYNTAX_WITH_FLAGS (c);
2344 code = syntax & 0xff;
2345 if (code == Sendcomment
2346 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2347 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2348 (nesting > 0 && --nesting == 0) : nesting < 0))
2349 /* We have encountered a comment end of the same style
2350 as the comment sequence which began this comment
2351 section. */
2352 break;
2353 if (code == Scomment_fence
2354 && style == ST_COMMENT_STYLE)
2355 /* We have encountered a comment end of the same style
2356 as the comment sequence which began this comment
2357 section. */
2358 break;
2359 if (nesting > 0
2360 && code == Scomment
2361 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2362 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2363 /* We have encountered a nested comment of the same style
2364 as the comment sequence which began this comment section. */
2365 nesting++;
2366 INC_BOTH (from, from_byte);
2367 UPDATE_SYNTAX_TABLE_FORWARD (from);
2369 forw_incomment:
2370 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2371 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2372 other_syntax = SYNTAX_WITH_FLAGS (c1),
2373 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2374 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2375 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2376 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2377 ? nesting > 0 : nesting < 0))
2379 if (--nesting <= 0)
2380 /* We have encountered a comment end of the same style
2381 as the comment sequence which began this comment section. */
2382 break;
2383 else
2385 INC_BOTH (from, from_byte);
2386 UPDATE_SYNTAX_TABLE_FORWARD (from);
2389 if (nesting > 0
2390 && from < stop
2391 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2392 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2393 other_syntax = SYNTAX_WITH_FLAGS (c1),
2394 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2395 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2396 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2397 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2398 /* We have encountered a nested comment of the same style
2399 as the comment sequence which began this comment section. */
2401 INC_BOTH (from, from_byte);
2402 UPDATE_SYNTAX_TABLE_FORWARD (from);
2403 nesting++;
2406 *charpos_ptr = from;
2407 *bytepos_ptr = from_byte;
2408 return 1;
2411 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2412 doc: /*
2413 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2414 Stop scanning if we find something other than a comment or whitespace.
2415 Set point to where scanning stops.
2416 If COUNT comments are found as expected, with nothing except whitespace
2417 between them, return t; otherwise return nil. */)
2418 (Lisp_Object count)
2420 ptrdiff_t from, from_byte, stop;
2421 int c, c1;
2422 enum syntaxcode code;
2423 int comstyle = 0; /* style of comment encountered */
2424 bool comnested = 0; /* whether the comment is nestable or not */
2425 bool found;
2426 EMACS_INT count1;
2427 ptrdiff_t out_charpos, out_bytepos;
2428 EMACS_INT dummy;
2430 CHECK_NUMBER (count);
2431 count1 = XINT (count);
2432 stop = count1 > 0 ? ZV : BEGV;
2434 immediate_quit = 1;
2435 QUIT;
2437 from = PT;
2438 from_byte = PT_BYTE;
2440 SETUP_SYNTAX_TABLE (from, count1);
2441 while (count1 > 0)
2445 bool comstart_first;
2446 int syntax, other_syntax;
2448 if (from == stop)
2450 SET_PT_BOTH (from, from_byte);
2451 immediate_quit = 0;
2452 return Qnil;
2454 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2455 syntax = SYNTAX_WITH_FLAGS (c);
2456 code = SYNTAX (c);
2457 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2458 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2459 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2460 INC_BOTH (from, from_byte);
2461 UPDATE_SYNTAX_TABLE_FORWARD (from);
2462 if (from < stop && comstart_first
2463 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2464 other_syntax = SYNTAX_WITH_FLAGS (c1),
2465 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2467 /* We have encountered a comment start sequence and we
2468 are ignoring all text inside comments. We must record
2469 the comment style this sequence begins so that later,
2470 only a comment end of the same style actually ends
2471 the comment section. */
2472 code = Scomment;
2473 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2474 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2475 INC_BOTH (from, from_byte);
2476 UPDATE_SYNTAX_TABLE_FORWARD (from);
2479 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2481 if (code == Scomment_fence)
2482 comstyle = ST_COMMENT_STYLE;
2483 else if (code != Scomment)
2485 immediate_quit = 0;
2486 DEC_BOTH (from, from_byte);
2487 SET_PT_BOTH (from, from_byte);
2488 return Qnil;
2490 /* We're at the start of a comment. */
2491 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2492 &out_charpos, &out_bytepos, &dummy);
2493 from = out_charpos; from_byte = out_bytepos;
2494 if (!found)
2496 immediate_quit = 0;
2497 SET_PT_BOTH (from, from_byte);
2498 return Qnil;
2500 INC_BOTH (from, from_byte);
2501 UPDATE_SYNTAX_TABLE_FORWARD (from);
2502 /* We have skipped one comment. */
2503 count1--;
2506 while (count1 < 0)
2508 while (1)
2510 bool quoted;
2511 int syntax;
2513 if (from <= stop)
2515 SET_PT_BOTH (BEGV, BEGV_BYTE);
2516 immediate_quit = 0;
2517 return Qnil;
2520 DEC_BOTH (from, from_byte);
2521 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2522 quoted = char_quoted (from, from_byte);
2523 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2524 syntax = SYNTAX_WITH_FLAGS (c);
2525 code = SYNTAX (c);
2526 comstyle = 0;
2527 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2528 if (code == Sendcomment)
2529 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2530 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2531 && prev_char_comend_first (from, from_byte)
2532 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2534 int other_syntax;
2535 /* We must record the comment style encountered so that
2536 later, we can match only the proper comment begin
2537 sequence of the same style. */
2538 DEC_BOTH (from, from_byte);
2539 code = Sendcomment;
2540 /* Calling char_quoted, above, set up global syntax position
2541 at the new value of FROM. */
2542 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2543 other_syntax = SYNTAX_WITH_FLAGS (c1);
2544 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2545 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2548 if (code == Scomment_fence)
2550 /* Skip until first preceding unquoted comment_fence. */
2551 bool fence_found = 0;
2552 ptrdiff_t ini = from, ini_byte = from_byte;
2554 while (1)
2556 DEC_BOTH (from, from_byte);
2557 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2558 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2559 if (SYNTAX (c) == Scomment_fence
2560 && !char_quoted (from, from_byte))
2562 fence_found = 1;
2563 break;
2565 else if (from == stop)
2566 break;
2568 if (fence_found == 0)
2570 from = ini; /* Set point to ini + 1. */
2571 from_byte = ini_byte;
2572 goto leave;
2574 else
2575 /* We have skipped one comment. */
2576 break;
2578 else if (code == Sendcomment)
2580 found = back_comment (from, from_byte, stop, comnested, comstyle,
2581 &out_charpos, &out_bytepos);
2582 if (!found)
2584 if (c == '\n')
2585 /* This end-of-line is not an end-of-comment.
2586 Treat it like a whitespace.
2587 CC-mode (and maybe others) relies on this behavior. */
2589 else
2591 /* Failure: we should go back to the end of this
2592 not-quite-endcomment. */
2593 if (SYNTAX (c) != code)
2594 /* It was a two-char Sendcomment. */
2595 INC_BOTH (from, from_byte);
2596 goto leave;
2599 else
2601 /* We have skipped one comment. */
2602 from = out_charpos, from_byte = out_bytepos;
2603 break;
2606 else if (code != Swhitespace || quoted)
2608 leave:
2609 immediate_quit = 0;
2610 INC_BOTH (from, from_byte);
2611 SET_PT_BOTH (from, from_byte);
2612 return Qnil;
2616 count1++;
2619 SET_PT_BOTH (from, from_byte);
2620 immediate_quit = 0;
2621 return Qt;
2624 /* Return syntax code of character C if C is an ASCII character
2625 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2627 static enum syntaxcode
2628 syntax_multibyte (int c, bool multibyte_symbol_p)
2630 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2633 static Lisp_Object
2634 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2636 Lisp_Object val;
2637 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2638 int c, c1;
2639 int stringterm;
2640 bool quoted;
2641 bool mathexit = 0;
2642 enum syntaxcode code;
2643 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2644 int comstyle = 0; /* Style of comment encountered. */
2645 bool comnested = 0; /* Whether the comment is nestable or not. */
2646 ptrdiff_t temp_pos;
2647 EMACS_INT last_good = from;
2648 bool found;
2649 ptrdiff_t from_byte;
2650 ptrdiff_t out_bytepos, out_charpos;
2651 EMACS_INT dummy;
2652 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2654 if (depth > 0) min_depth = 0;
2656 if (from > ZV) from = ZV;
2657 if (from < BEGV) from = BEGV;
2659 from_byte = CHAR_TO_BYTE (from);
2661 immediate_quit = 1;
2662 QUIT;
2664 SETUP_SYNTAX_TABLE (from, count);
2665 while (count > 0)
2667 while (from < stop)
2669 bool comstart_first, prefix;
2670 int syntax, other_syntax;
2671 UPDATE_SYNTAX_TABLE_FORWARD (from);
2672 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2673 syntax = SYNTAX_WITH_FLAGS (c);
2674 code = syntax_multibyte (c, multibyte_symbol_p);
2675 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2676 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2677 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2678 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2679 if (depth == min_depth)
2680 last_good = from;
2681 INC_BOTH (from, from_byte);
2682 UPDATE_SYNTAX_TABLE_FORWARD (from);
2683 if (from < stop && comstart_first
2684 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2685 other_syntax = SYNTAX_WITH_FLAGS (c),
2686 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2687 && parse_sexp_ignore_comments)
2689 /* We have encountered a comment start sequence and we
2690 are ignoring all text inside comments. We must record
2691 the comment style this sequence begins so that later,
2692 only a comment end of the same style actually ends
2693 the comment section. */
2694 code = Scomment;
2695 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2696 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2697 INC_BOTH (from, from_byte);
2698 UPDATE_SYNTAX_TABLE_FORWARD (from);
2701 if (prefix)
2702 continue;
2704 switch (code)
2706 case Sescape:
2707 case Scharquote:
2708 if (from == stop)
2709 goto lose;
2710 INC_BOTH (from, from_byte);
2711 /* Treat following character as a word constituent. */
2712 case Sword:
2713 case Ssymbol:
2714 if (depth || !sexpflag) break;
2715 /* This word counts as a sexp; return at end of it. */
2716 while (from < stop)
2718 UPDATE_SYNTAX_TABLE_FORWARD (from);
2720 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2721 switch (syntax_multibyte (c, multibyte_symbol_p))
2723 case Scharquote:
2724 case Sescape:
2725 INC_BOTH (from, from_byte);
2726 if (from == stop)
2727 goto lose;
2728 break;
2729 case Sword:
2730 case Ssymbol:
2731 case Squote:
2732 break;
2733 default:
2734 goto done;
2736 INC_BOTH (from, from_byte);
2738 goto done;
2740 case Scomment_fence:
2741 comstyle = ST_COMMENT_STYLE;
2742 /* FALLTHROUGH */
2743 case Scomment:
2744 if (!parse_sexp_ignore_comments) break;
2745 UPDATE_SYNTAX_TABLE_FORWARD (from);
2746 found = forw_comment (from, from_byte, stop,
2747 comnested, comstyle, 0,
2748 &out_charpos, &out_bytepos, &dummy);
2749 from = out_charpos, from_byte = out_bytepos;
2750 if (!found)
2752 if (depth == 0)
2753 goto done;
2754 goto lose;
2756 INC_BOTH (from, from_byte);
2757 UPDATE_SYNTAX_TABLE_FORWARD (from);
2758 break;
2760 case Smath:
2761 if (!sexpflag)
2762 break;
2763 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2765 INC_BOTH (from, from_byte);
2767 if (mathexit)
2769 mathexit = 0;
2770 goto close1;
2772 mathexit = 1;
2774 case Sopen:
2775 if (!++depth) goto done;
2776 break;
2778 case Sclose:
2779 close1:
2780 if (!--depth) goto done;
2781 if (depth < min_depth)
2782 xsignal3 (Qscan_error,
2783 build_string ("Containing expression ends prematurely"),
2784 make_number (last_good), make_number (from));
2785 break;
2787 case Sstring:
2788 case Sstring_fence:
2789 temp_pos = dec_bytepos (from_byte);
2790 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2791 while (1)
2793 enum syntaxcode c_code;
2794 if (from >= stop)
2795 goto lose;
2796 UPDATE_SYNTAX_TABLE_FORWARD (from);
2797 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2798 c_code = syntax_multibyte (c, multibyte_symbol_p);
2799 if (code == Sstring
2800 ? c == stringterm && c_code == Sstring
2801 : c_code == Sstring_fence)
2802 break;
2804 if (c_code == Scharquote || c_code == Sescape)
2805 INC_BOTH (from, from_byte);
2806 INC_BOTH (from, from_byte);
2808 INC_BOTH (from, from_byte);
2809 if (!depth && sexpflag) goto done;
2810 break;
2811 default:
2812 /* Ignore whitespace, punctuation, quote, endcomment. */
2813 break;
2817 /* Reached end of buffer. Error if within object, return nil if between */
2818 if (depth)
2819 goto lose;
2821 immediate_quit = 0;
2822 return Qnil;
2824 /* End of object reached */
2825 done:
2826 count--;
2829 while (count < 0)
2831 while (from > stop)
2833 int syntax;
2834 DEC_BOTH (from, from_byte);
2835 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2836 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2837 syntax= SYNTAX_WITH_FLAGS (c);
2838 code = syntax_multibyte (c, multibyte_symbol_p);
2839 if (depth == min_depth)
2840 last_good = from;
2841 comstyle = 0;
2842 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2843 if (code == Sendcomment)
2844 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2845 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2846 && prev_char_comend_first (from, from_byte)
2847 && parse_sexp_ignore_comments)
2849 /* We must record the comment style encountered so that
2850 later, we can match only the proper comment begin
2851 sequence of the same style. */
2852 int c2, other_syntax;
2853 DEC_BOTH (from, from_byte);
2854 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2855 code = Sendcomment;
2856 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2857 other_syntax = SYNTAX_WITH_FLAGS (c2);
2858 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2859 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2862 /* Quoting turns anything except a comment-ender
2863 into a word character. Note that this cannot be true
2864 if we decremented FROM in the if-statement above. */
2865 if (code != Sendcomment && char_quoted (from, from_byte))
2867 DEC_BOTH (from, from_byte);
2868 code = Sword;
2870 else if (SYNTAX_FLAGS_PREFIX (syntax))
2871 continue;
2873 switch (code)
2875 case Sword:
2876 case Ssymbol:
2877 case Sescape:
2878 case Scharquote:
2879 if (depth || !sexpflag) break;
2880 /* This word counts as a sexp; count object finished
2881 after passing it. */
2882 while (from > stop)
2884 temp_pos = from_byte;
2885 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2886 DEC_POS (temp_pos);
2887 else
2888 temp_pos--;
2889 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2890 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2891 /* Don't allow comment-end to be quoted. */
2892 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2893 goto done2;
2894 quoted = char_quoted (from - 1, temp_pos);
2895 if (quoted)
2897 DEC_BOTH (from, from_byte);
2898 temp_pos = dec_bytepos (temp_pos);
2899 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2901 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2902 if (! quoted)
2903 switch (syntax_multibyte (c1, multibyte_symbol_p))
2905 case Sword: case Ssymbol: case Squote: break;
2906 default: goto done2;
2908 DEC_BOTH (from, from_byte);
2910 goto done2;
2912 case Smath:
2913 if (!sexpflag)
2914 break;
2915 if (from > BEGV)
2917 temp_pos = dec_bytepos (from_byte);
2918 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2919 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2920 DEC_BOTH (from, from_byte);
2922 if (mathexit)
2924 mathexit = 0;
2925 goto open2;
2927 mathexit = 1;
2929 case Sclose:
2930 if (!++depth) goto done2;
2931 break;
2933 case Sopen:
2934 open2:
2935 if (!--depth) goto done2;
2936 if (depth < min_depth)
2937 xsignal3 (Qscan_error,
2938 build_string ("Containing expression ends prematurely"),
2939 make_number (last_good), make_number (from));
2940 break;
2942 case Sendcomment:
2943 if (!parse_sexp_ignore_comments)
2944 break;
2945 found = back_comment (from, from_byte, stop, comnested, comstyle,
2946 &out_charpos, &out_bytepos);
2947 /* FIXME: if !found, it really wasn't a comment-end.
2948 For single-char Sendcomment, we can't do much about it apart
2949 from skipping the char.
2950 For 2-char endcomments, we could try again, taking both
2951 chars as separate entities, but it's a lot of trouble
2952 for very little gain, so we don't bother either. -sm */
2953 if (found)
2954 from = out_charpos, from_byte = out_bytepos;
2955 break;
2957 case Scomment_fence:
2958 case Sstring_fence:
2959 while (1)
2961 if (from == stop)
2962 goto lose;
2963 DEC_BOTH (from, from_byte);
2964 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2965 if (!char_quoted (from, from_byte))
2967 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2968 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2969 break;
2972 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2973 break;
2975 case Sstring:
2976 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2977 while (1)
2979 if (from == stop)
2980 goto lose;
2981 DEC_BOTH (from, from_byte);
2982 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2983 if (!char_quoted (from, from_byte))
2985 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2986 if (c == stringterm
2987 && (syntax_multibyte (c, multibyte_symbol_p)
2988 == Sstring))
2989 break;
2992 if (!depth && sexpflag) goto done2;
2993 break;
2994 default:
2995 /* Ignore whitespace, punctuation, quote, endcomment. */
2996 break;
3000 /* Reached start of buffer. Error if within object, return nil if between */
3001 if (depth)
3002 goto lose;
3004 immediate_quit = 0;
3005 return Qnil;
3007 done2:
3008 count++;
3012 immediate_quit = 0;
3013 XSETFASTINT (val, from);
3014 return val;
3016 lose:
3017 xsignal3 (Qscan_error,
3018 build_string ("Unbalanced parentheses"),
3019 make_number (last_good), make_number (from));
3022 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3023 doc: /* Scan from character number FROM by COUNT lists.
3024 Scan forward if COUNT is positive, backward if COUNT is negative.
3025 Return the character number of the position thus found.
3027 A \"list", in this context, refers to a balanced parenthetical
3028 grouping, as determined by the syntax table.
3030 If DEPTH is nonzero, treat that as the nesting depth of the starting
3031 point (i.e. the starting point is DEPTH parentheses deep). This
3032 function scans over parentheses until the depth goes to zero COUNT
3033 times. Hence, positive DEPTH moves out that number of levels of
3034 parentheses, while negative DEPTH moves to a deeper level.
3036 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3038 If we reach the beginning or end of the accessible part of the buffer
3039 before we have scanned over COUNT lists, return nil if the depth at
3040 that point is zero, and signal a error if the depth is nonzero. */)
3041 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3043 CHECK_NUMBER (from);
3044 CHECK_NUMBER (count);
3045 CHECK_NUMBER (depth);
3047 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
3050 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3051 doc: /* Scan from character number FROM by COUNT balanced expressions.
3052 If COUNT is negative, scan backwards.
3053 Returns the character number of the position thus found.
3055 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3057 If the beginning or end of (the accessible part of) the buffer is reached
3058 in the middle of a parenthetical grouping, an error is signaled.
3059 If the beginning or end is reached between groupings
3060 but before count is used up, nil is returned. */)
3061 (Lisp_Object from, Lisp_Object count)
3063 CHECK_NUMBER (from);
3064 CHECK_NUMBER (count);
3066 return scan_lists (XINT (from), XINT (count), 0, 1);
3069 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3070 0, 0, 0,
3071 doc: /* Move point backward over any number of chars with prefix syntax.
3072 This includes chars with expression prefix syntax class (') and those with
3073 the prefix syntax flag (p). */)
3074 (void)
3076 ptrdiff_t beg = BEGV;
3077 ptrdiff_t opoint = PT;
3078 ptrdiff_t opoint_byte = PT_BYTE;
3079 ptrdiff_t pos = PT;
3080 ptrdiff_t pos_byte = PT_BYTE;
3081 int c;
3083 if (pos <= beg)
3085 SET_PT_BOTH (opoint, opoint_byte);
3087 return Qnil;
3090 SETUP_SYNTAX_TABLE (pos, -1);
3092 DEC_BOTH (pos, pos_byte);
3094 while (!char_quoted (pos, pos_byte)
3095 /* Previous statement updates syntax table. */
3096 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3097 || syntax_prefix_flag_p (c)))
3099 opoint = pos;
3100 opoint_byte = pos_byte;
3102 if (pos + 1 > beg)
3103 DEC_BOTH (pos, pos_byte);
3106 SET_PT_BOTH (opoint, opoint_byte);
3108 return Qnil;
3111 /* Parse forward from FROM / FROM_BYTE to END,
3112 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
3113 and return a description of the state of the parse at END.
3114 If STOPBEFORE, stop at the start of an atom.
3115 If COMMENTSTOP is 1, stop at the start of a comment.
3116 If COMMENTSTOP is -1, stop at the start or end of a comment,
3117 after the beginning of a string, or after the end of a string. */
3119 static void
3120 scan_sexps_forward (struct lisp_parse_state *stateptr,
3121 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3122 EMACS_INT targetdepth, bool stopbefore,
3123 Lisp_Object oldstate, int commentstop)
3125 struct lisp_parse_state state;
3126 enum syntaxcode code;
3127 int c1;
3128 bool comnested;
3129 struct level { ptrdiff_t last, prev; };
3130 struct level levelstart[100];
3131 struct level *curlevel = levelstart;
3132 struct level *endlevel = levelstart + 100;
3133 EMACS_INT depth; /* Paren depth of current scanning location.
3134 level - levelstart equals this except
3135 when the depth becomes negative. */
3136 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3137 bool start_quoted = 0; /* True means starting after a char quote. */
3138 Lisp_Object tem;
3139 ptrdiff_t prev_from; /* Keep one character before FROM. */
3140 ptrdiff_t prev_from_byte;
3141 int prev_from_syntax;
3142 bool boundary_stop = commentstop == -1;
3143 bool nofence;
3144 bool found;
3145 ptrdiff_t out_bytepos, out_charpos;
3146 int temp;
3148 prev_from = from;
3149 prev_from_byte = from_byte;
3150 if (from != BEGV)
3151 DEC_BOTH (prev_from, prev_from_byte);
3153 /* Use this macro instead of `from++'. */
3154 #define INC_FROM \
3155 do { prev_from = from; \
3156 prev_from_byte = from_byte; \
3157 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3158 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3159 INC_BOTH (from, from_byte); \
3160 if (from < end) \
3161 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3162 } while (0)
3164 immediate_quit = 1;
3165 QUIT;
3167 if (NILP (oldstate))
3169 depth = 0;
3170 state.instring = -1;
3171 state.incomment = 0;
3172 state.comstyle = 0; /* comment style a by default. */
3173 state.comstr_start = -1; /* no comment/string seen. */
3175 else
3177 tem = Fcar (oldstate);
3178 if (!NILP (tem))
3179 depth = XINT (tem);
3180 else
3181 depth = 0;
3183 oldstate = Fcdr (oldstate);
3184 oldstate = Fcdr (oldstate);
3185 oldstate = Fcdr (oldstate);
3186 tem = Fcar (oldstate);
3187 /* Check whether we are inside string_fence-style string: */
3188 state.instring = (!NILP (tem)
3189 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3190 : -1);
3192 oldstate = Fcdr (oldstate);
3193 tem = Fcar (oldstate);
3194 state.incomment = (!NILP (tem)
3195 ? (INTEGERP (tem) ? XINT (tem) : -1)
3196 : 0);
3198 oldstate = Fcdr (oldstate);
3199 tem = Fcar (oldstate);
3200 start_quoted = !NILP (tem);
3202 /* if the eighth element of the list is nil, we are in comment
3203 style a. If it is non-nil, we are in comment style b */
3204 oldstate = Fcdr (oldstate);
3205 oldstate = Fcdr (oldstate);
3206 tem = Fcar (oldstate);
3207 state.comstyle = (NILP (tem)
3209 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3210 ? XINT (tem)
3211 : ST_COMMENT_STYLE));
3213 oldstate = Fcdr (oldstate);
3214 tem = Fcar (oldstate);
3215 state.comstr_start =
3216 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3217 oldstate = Fcdr (oldstate);
3218 tem = Fcar (oldstate);
3219 while (!NILP (tem)) /* >= second enclosing sexps. */
3221 Lisp_Object temhd = Fcar (tem);
3222 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3223 curlevel->last = XINT (temhd);
3224 if (++curlevel == endlevel)
3225 curlevel--; /* error ("Nesting too deep for parser"); */
3226 curlevel->prev = -1;
3227 curlevel->last = -1;
3228 tem = Fcdr (tem);
3231 state.quoted = 0;
3232 mindepth = depth;
3234 curlevel->prev = -1;
3235 curlevel->last = -1;
3237 SETUP_SYNTAX_TABLE (prev_from, 1);
3238 temp = FETCH_CHAR (prev_from_byte);
3239 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3240 UPDATE_SYNTAX_TABLE_FORWARD (from);
3242 /* Enter the loop at a place appropriate for initial state. */
3244 if (state.incomment)
3245 goto startincomment;
3246 if (state.instring >= 0)
3248 nofence = state.instring != ST_STRING_STYLE;
3249 if (start_quoted)
3250 goto startquotedinstring;
3251 goto startinstring;
3253 else if (start_quoted)
3254 goto startquoted;
3256 while (from < end)
3258 int syntax;
3259 INC_FROM;
3260 code = prev_from_syntax & 0xff;
3262 if (from < end
3263 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3264 && (c1 = FETCH_CHAR (from_byte),
3265 syntax = SYNTAX_WITH_FLAGS (c1),
3266 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3267 /* Duplicate code to avoid a complex if-expression
3268 which causes trouble for the SGI compiler. */
3270 /* Record the comment style we have entered so that only
3271 the comment-end sequence of the same style actually
3272 terminates the comment section. */
3273 state.comstyle
3274 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3275 comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3276 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3277 state.incomment = comnested ? 1 : -1;
3278 state.comstr_start = prev_from;
3279 INC_FROM;
3280 code = Scomment;
3282 else if (code == Scomment_fence)
3284 /* Record the comment style we have entered so that only
3285 the comment-end sequence of the same style actually
3286 terminates the comment section. */
3287 state.comstyle = ST_COMMENT_STYLE;
3288 state.incomment = -1;
3289 state.comstr_start = prev_from;
3290 code = Scomment;
3292 else if (code == Scomment)
3294 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3295 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3296 1 : -1);
3297 state.comstr_start = prev_from;
3300 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3301 continue;
3302 switch (code)
3304 case Sescape:
3305 case Scharquote:
3306 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3307 curlevel->last = prev_from;
3308 startquoted:
3309 if (from == end) goto endquoted;
3310 INC_FROM;
3311 goto symstarted;
3312 /* treat following character as a word constituent */
3313 case Sword:
3314 case Ssymbol:
3315 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3316 curlevel->last = prev_from;
3317 symstarted:
3318 while (from < end)
3320 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3321 switch (SYNTAX (symchar))
3323 case Scharquote:
3324 case Sescape:
3325 INC_FROM;
3326 if (from == end) goto endquoted;
3327 break;
3328 case Sword:
3329 case Ssymbol:
3330 case Squote:
3331 break;
3332 default:
3333 goto symdone;
3335 INC_FROM;
3337 symdone:
3338 curlevel->prev = curlevel->last;
3339 break;
3341 case Scomment_fence: /* Can't happen because it's handled above. */
3342 case Scomment:
3343 if (commentstop || boundary_stop) goto done;
3344 startincomment:
3345 /* The (from == BEGV) test was to enter the loop in the middle so
3346 that we find a 2-char comment ender even if we start in the
3347 middle of it. We don't want to do that if we're just at the
3348 beginning of the comment (think of (*) ... (*)). */
3349 found = forw_comment (from, from_byte, end,
3350 state.incomment, state.comstyle,
3351 (from == BEGV || from < state.comstr_start + 3)
3352 ? 0 : prev_from_syntax,
3353 &out_charpos, &out_bytepos, &state.incomment);
3354 from = out_charpos; from_byte = out_bytepos;
3355 /* Beware! prev_from and friends are invalid now.
3356 Luckily, the `done' doesn't use them and the INC_FROM
3357 sets them to a sane value without looking at them. */
3358 if (!found) goto done;
3359 INC_FROM;
3360 state.incomment = 0;
3361 state.comstyle = 0; /* reset the comment style */
3362 if (boundary_stop) goto done;
3363 break;
3365 case Sopen:
3366 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3367 depth++;
3368 /* curlevel++->last ran into compiler bug on Apollo */
3369 curlevel->last = prev_from;
3370 if (++curlevel == endlevel)
3371 curlevel--; /* error ("Nesting too deep for parser"); */
3372 curlevel->prev = -1;
3373 curlevel->last = -1;
3374 if (targetdepth == depth) goto done;
3375 break;
3377 case Sclose:
3378 depth--;
3379 if (depth < mindepth)
3380 mindepth = depth;
3381 if (curlevel != levelstart)
3382 curlevel--;
3383 curlevel->prev = curlevel->last;
3384 if (targetdepth == depth) goto done;
3385 break;
3387 case Sstring:
3388 case Sstring_fence:
3389 state.comstr_start = from - 1;
3390 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3391 curlevel->last = prev_from;
3392 state.instring = (code == Sstring
3393 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3394 : ST_STRING_STYLE);
3395 if (boundary_stop) goto done;
3396 startinstring:
3398 nofence = state.instring != ST_STRING_STYLE;
3400 while (1)
3402 int c;
3403 enum syntaxcode c_code;
3405 if (from >= end) goto done;
3406 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3407 c_code = SYNTAX (c);
3409 /* Check C_CODE here so that if the char has
3410 a syntax-table property which says it is NOT
3411 a string character, it does not end the string. */
3412 if (nofence && c == state.instring && c_code == Sstring)
3413 break;
3415 switch (c_code)
3417 case Sstring_fence:
3418 if (!nofence) goto string_end;
3419 break;
3421 case Scharquote:
3422 case Sescape:
3423 INC_FROM;
3424 startquotedinstring:
3425 if (from >= end) goto endquoted;
3426 break;
3428 default:
3429 break;
3431 INC_FROM;
3434 string_end:
3435 state.instring = -1;
3436 curlevel->prev = curlevel->last;
3437 INC_FROM;
3438 if (boundary_stop) goto done;
3439 break;
3441 case Smath:
3442 /* FIXME: We should do something with it. */
3443 break;
3444 default:
3445 /* Ignore whitespace, punctuation, quote, endcomment. */
3446 break;
3449 goto done;
3451 stop: /* Here if stopping before start of sexp. */
3452 from = prev_from; /* We have just fetched the char that starts it; */
3453 from_byte = prev_from_byte;
3454 goto done; /* but return the position before it. */
3456 endquoted:
3457 state.quoted = 1;
3458 done:
3459 state.depth = depth;
3460 state.mindepth = mindepth;
3461 state.thislevelstart = curlevel->prev;
3462 state.prevlevelstart
3463 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3464 state.location = from;
3465 state.location_byte = from_byte;
3466 state.levelstarts = Qnil;
3467 while (curlevel > levelstart)
3468 state.levelstarts = Fcons (make_number ((--curlevel)->last),
3469 state.levelstarts);
3470 immediate_quit = 0;
3472 *stateptr = state;
3475 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3476 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3477 Parsing stops at TO or when certain criteria are met;
3478 point is set to where parsing stops.
3479 If fifth arg OLDSTATE is omitted or nil,
3480 parsing assumes that FROM is the beginning of a function.
3481 Value is a list of elements describing final state of parsing:
3482 0. depth in parens.
3483 1. character address of start of innermost containing list; nil if none.
3484 2. character address of start of last complete sexp terminated.
3485 3. non-nil if inside a string.
3486 (it is the character that will terminate the string,
3487 or t if the string should be terminated by a generic string delimiter.)
3488 4. nil if outside a comment, t if inside a non-nestable comment,
3489 else an integer (the current comment nesting).
3490 5. t if following a quote character.
3491 6. the minimum paren-depth encountered during this scan.
3492 7. style of comment, if any.
3493 8. character address of start of comment or string; nil if not in one.
3494 9. Intermediate data for continuation of parsing (subject to change).
3495 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3496 in parentheses becomes equal to TARGETDEPTH.
3497 Fourth arg STOPBEFORE non-nil means stop when come to
3498 any character that starts a sexp.
3499 Fifth arg OLDSTATE is a list like what this function returns.
3500 It is used to initialize the state of the parse. Elements number 1, 2, 6
3501 are ignored.
3502 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3503 If it is symbol `syntax-table', stop after the start of a comment or a
3504 string, or after end of a comment or a string. */)
3505 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3506 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3508 struct lisp_parse_state state;
3509 EMACS_INT target;
3511 if (!NILP (targetdepth))
3513 CHECK_NUMBER (targetdepth);
3514 target = XINT (targetdepth);
3516 else
3517 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth. */
3519 validate_region (&from, &to);
3520 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3521 XINT (to),
3522 target, !NILP (stopbefore), oldstate,
3523 (NILP (commentstop)
3524 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3526 SET_PT_BOTH (state.location, state.location_byte);
3528 return Fcons (make_number (state.depth),
3529 Fcons (state.prevlevelstart < 0
3530 ? Qnil : make_number (state.prevlevelstart),
3531 Fcons (state.thislevelstart < 0
3532 ? Qnil : make_number (state.thislevelstart),
3533 Fcons (state.instring >= 0
3534 ? (state.instring == ST_STRING_STYLE
3535 ? Qt : make_number (state.instring)) : Qnil,
3536 Fcons (state.incomment < 0 ? Qt :
3537 (state.incomment == 0 ? Qnil :
3538 make_number (state.incomment)),
3539 Fcons (state.quoted ? Qt : Qnil,
3540 Fcons (make_number (state.mindepth),
3541 Fcons ((state.comstyle
3542 ? (state.comstyle == ST_COMMENT_STYLE
3543 ? Qsyntax_table
3544 : make_number (state.comstyle))
3545 : Qnil),
3546 Fcons (((state.incomment
3547 || (state.instring >= 0))
3548 ? make_number (state.comstr_start)
3549 : Qnil),
3550 Fcons (state.levelstarts, Qnil))))))))));
3553 void
3554 init_syntax_once (void)
3556 register int i, c;
3557 Lisp_Object temp;
3559 /* This has to be done here, before we call Fmake_char_table. */
3560 DEFSYM (Qsyntax_table, "syntax-table");
3562 /* Create objects which can be shared among syntax tables. */
3563 Vsyntax_code_object = make_uninit_vector (Smax);
3564 for (i = 0; i < Smax; i++)
3565 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3567 /* Now we are ready to set up this property, so we can
3568 create syntax tables. */
3569 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3571 temp = AREF (Vsyntax_code_object, Swhitespace);
3573 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3575 /* Control characters should not be whitespace. */
3576 temp = AREF (Vsyntax_code_object, Spunct);
3577 for (i = 0; i <= ' ' - 1; i++)
3578 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3579 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3581 /* Except that a few really are whitespace. */
3582 temp = AREF (Vsyntax_code_object, Swhitespace);
3583 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3584 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3585 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3586 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3587 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3589 temp = AREF (Vsyntax_code_object, Sword);
3590 for (i = 'a'; i <= 'z'; i++)
3591 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3592 for (i = 'A'; i <= 'Z'; i++)
3593 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3594 for (i = '0'; i <= '9'; i++)
3595 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3597 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3598 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3600 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3601 Fcons (make_number (Sopen), make_number (')')));
3602 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3603 Fcons (make_number (Sclose), make_number ('(')));
3604 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3605 Fcons (make_number (Sopen), make_number (']')));
3606 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3607 Fcons (make_number (Sclose), make_number ('[')));
3608 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3609 Fcons (make_number (Sopen), make_number ('}')));
3610 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3611 Fcons (make_number (Sclose), make_number ('{')));
3612 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3613 Fcons (make_number (Sstring), Qnil));
3614 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3615 Fcons (make_number (Sescape), Qnil));
3617 temp = AREF (Vsyntax_code_object, Ssymbol);
3618 for (i = 0; i < 10; i++)
3620 c = "_-+*/&|<>="[i];
3621 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3624 temp = AREF (Vsyntax_code_object, Spunct);
3625 for (i = 0; i < 12; i++)
3627 c = ".,;:?!#@~^'`"[i];
3628 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3631 /* All multibyte characters have syntax `word' by default. */
3632 temp = AREF (Vsyntax_code_object, Sword);
3633 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3636 void
3637 syms_of_syntax (void)
3639 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3641 staticpro (&Vsyntax_code_object);
3643 staticpro (&gl_state.object);
3644 staticpro (&gl_state.global_code);
3645 staticpro (&gl_state.current_syntax_table);
3646 staticpro (&gl_state.old_prop);
3648 /* Defined in regex.c. */
3649 staticpro (&re_match_object);
3651 DEFSYM (Qscan_error, "scan-error");
3652 Fput (Qscan_error, Qerror_conditions,
3653 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3654 Fput (Qscan_error, Qerror_message,
3655 build_pure_c_string ("Scan error"));
3657 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3658 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3660 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3661 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3662 Otherwise, that text property is simply ignored.
3663 See the info node `(elisp)Syntax Properties' for a description of the
3664 `syntax-table' property. */);
3666 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3667 doc: /* Position up to which syntax-table properties have been set. */);
3668 syntax_propertize__done = -1;
3669 DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3670 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3672 words_include_escapes = 0;
3673 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3674 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3676 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3677 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3678 multibyte_syntax_as_symbol = 0;
3680 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3681 open_paren_in_column_0_is_defun_start,
3682 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3683 open_paren_in_column_0_is_defun_start = 1;
3686 DEFVAR_LISP ("find-word-boundary-function-table",
3687 Vfind_word_boundary_function_table,
3688 doc: /*
3689 Char table of functions to search for the word boundary.
3690 Each function is called with two arguments; POS and LIMIT.
3691 POS and LIMIT are character positions in the current buffer.
3693 If POS is less than LIMIT, POS is at the first character of a word,
3694 and the return value of a function is a position after the last
3695 character of that word.
3697 If POS is not less than LIMIT, POS is at the last character of a word,
3698 and the return value of a function is a position at the first
3699 character of that word.
3701 In both cases, LIMIT bounds the search. */);
3702 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3704 defsubr (&Ssyntax_table_p);
3705 defsubr (&Ssyntax_table);
3706 defsubr (&Sstandard_syntax_table);
3707 defsubr (&Scopy_syntax_table);
3708 defsubr (&Sset_syntax_table);
3709 defsubr (&Schar_syntax);
3710 defsubr (&Smatching_paren);
3711 defsubr (&Sstring_to_syntax);
3712 defsubr (&Smodify_syntax_entry);
3713 defsubr (&Sinternal_describe_syntax_value);
3715 defsubr (&Sforward_word);
3717 defsubr (&Sskip_chars_forward);
3718 defsubr (&Sskip_chars_backward);
3719 defsubr (&Sskip_syntax_forward);
3720 defsubr (&Sskip_syntax_backward);
3722 defsubr (&Sforward_comment);
3723 defsubr (&Sscan_lists);
3724 defsubr (&Sscan_sexps);
3725 defsubr (&Sbackward_prefix_chars);
3726 defsubr (&Sparse_partial_sexp);