Update copyright year to 2014 by running admin/update-copyright.
[emacs.git] / src / syntax.c
blob65d6fe9f4f74bc5305cb6e5fc3c223003e2320c5
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2014 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 #include <config.h>
23 #include <sys/types.h>
25 #include "lisp.h"
26 #include "commands.h"
27 #include "character.h"
28 #include "buffer.h"
29 #include "keymap.h"
30 #include "regex.h"
32 #include "syntax.h"
33 #include "intervals.h"
34 #include "category.h"
36 /* Make syntax table lookup grant data in gl_state. */
37 #define SYNTAX(c) syntax_property (c, 1)
38 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
39 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
41 /* Eight single-bit flags have the following meanings:
42 1. This character is the first of a two-character comment-start sequence.
43 2. This character is the second of a two-character comment-start sequence.
44 3. This character is the first of a two-character comment-end sequence.
45 4. This character is the second of a two-character comment-end sequence.
46 5. This character is a prefix, for backward-prefix-chars.
47 6. The char is part of a delimiter for comments of style "b".
48 7. This character is part of a nestable comment sequence.
49 8. The char is part of a delimiter for comments of style "c".
50 Note that any two-character sequence whose first character has flag 1
51 and whose second character has flag 2 will be interpreted as a comment start.
53 Bits 6 and 8 discriminate among different comment styles.
54 Languages such as C++ allow two orthogonal syntax start/end pairs
55 and bit 6 determines whether a comment-end or Scommentend
56 ends style a or b. Comment markers can start style a, b, c, or bc.
57 Style a is always the default.
58 For 2-char comment markers, the style b flag is looked up only on the second
59 char of the comment marker and on the first char of the comment ender.
60 For style c (like the nested flag), the flag can be placed on any of
61 the chars. */
63 /* These functions extract specific flags from an integer
64 that holds the syntax code and the flags. */
66 static bool
67 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
69 return (flags >> 16) & 1;
71 static bool
72 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
74 return (flags >> 17) & 1;
76 static bool
77 SYNTAX_FLAGS_COMEND_FIRST (int flags)
79 return (flags >> 18) & 1;
81 static bool
82 SYNTAX_FLAGS_COMEND_SECOND (int flags)
84 return (flags >> 19) & 1;
86 static bool
87 SYNTAX_FLAGS_PREFIX (int flags)
89 return (flags >> 20) & 1;
91 static bool
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
94 return (flags >> 21) & 1;
96 static bool
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
99 return (flags >> 23) & 1;
101 static int
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
104 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
106 static bool
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
109 return (flags >> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
114 static int
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
122 /* Extract a particular flag for a given character. */
124 static bool
125 SYNTAX_COMEND_FIRST (int c)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
134 enum
136 ST_COMMENT_STYLE = 256 + 1,
137 ST_STRING_STYLE = 256 + 2
140 static Lisp_Object Qsyntax_table_p;
141 static Lisp_Object Qsyntax_table, Qscan_error;
143 /* This is the internal form of the parse state used in parse-partial-sexp. */
145 struct lisp_parse_state
147 EMACS_INT depth; /* Depth at end of parsing. */
148 int instring; /* -1 if not within string, else desired terminator. */
149 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
150 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
151 bool quoted; /* True if just after an escape char at end of parsing. */
152 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
153 /* Char number of most recent start-of-expression at current level */
154 ptrdiff_t thislevelstart;
155 /* Char number of start of containing expression */
156 ptrdiff_t prevlevelstart;
157 ptrdiff_t location; /* Char number at which parsing stopped. */
158 ptrdiff_t location_byte; /* Corresponding byte position. */
159 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
160 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
161 of levels (starting from outermost). */
164 /* These variables are a cache for finding the start of a defun.
165 find_start_pos is the place for which the defun start was found.
166 find_start_value is the defun start position found for it.
167 find_start_value_byte is the corresponding byte position.
168 find_start_buffer is the buffer it was found in.
169 find_start_begv is the BEGV value when it was found.
170 find_start_modiff is the value of MODIFF when it was found. */
172 static ptrdiff_t find_start_pos;
173 static ptrdiff_t find_start_value;
174 static ptrdiff_t find_start_value_byte;
175 static struct buffer *find_start_buffer;
176 static ptrdiff_t find_start_begv;
177 static EMACS_INT find_start_modiff;
180 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
181 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
182 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
183 static void scan_sexps_forward (struct lisp_parse_state *,
184 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
185 bool, Lisp_Object, int);
186 static bool in_classes (int, Lisp_Object);
188 /* This setter is used only in this file, so it can be private. */
189 static void
190 bset_syntax_table (struct buffer *b, Lisp_Object val)
192 b->INTERNAL_FIELD (syntax_table) = val;
195 /* Whether the syntax of the character C has the prefix flag set. */
196 bool
197 syntax_prefix_flag_p (int c)
199 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
202 struct gl_state_s gl_state; /* Global state of syntax parser. */
204 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
205 to scan to property-change. */
207 /* Set the syntax entry VAL for char C in table TABLE. */
209 static void
210 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
212 CHAR_TABLE_SET (table, c, val);
215 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
216 RANGE is a cons (FROM . TO) specifying the range of characters. */
218 static void
219 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
220 Lisp_Object val)
222 Fset_char_table_range (table, range, val);
225 /* Extract the information from the entry for character C
226 in the current syntax table. */
228 static Lisp_Object
229 SYNTAX_MATCH (int c)
231 Lisp_Object ent = SYNTAX_ENTRY (c);
232 return CONSP (ent) ? XCDR (ent) : Qnil;
235 /* This should be called with FROM at the start of forward
236 search, or after the last position of the backward search. It
237 makes sure that the first char is picked up with correct table, so
238 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
239 call.
240 Sign of COUNT gives the direction of the search.
243 static void
244 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
246 SETUP_BUFFER_SYNTAX_TABLE ();
247 gl_state.b_property = BEGV;
248 gl_state.e_property = ZV + 1;
249 gl_state.object = Qnil;
250 gl_state.offset = 0;
251 if (parse_sexp_lookup_properties)
252 if (count > 0 || from > BEGV)
253 update_syntax_table (count > 0 ? from : from - 1, count, 1, Qnil);
256 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
257 If it is t (which is only used in fast_c_string_match_ignore_case),
258 ignore properties altogether.
260 This is meant for regex.c to use. For buffers, regex.c passes arguments
261 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
262 So if it is a buffer, we set the offset field to BEGV. */
264 void
265 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
266 ptrdiff_t from, ptrdiff_t count)
268 SETUP_BUFFER_SYNTAX_TABLE ();
269 gl_state.object = object;
270 if (BUFFERP (gl_state.object))
272 struct buffer *buf = XBUFFER (gl_state.object);
273 gl_state.b_property = 1;
274 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
275 gl_state.offset = BUF_BEGV (buf) - 1;
277 else if (NILP (gl_state.object))
279 gl_state.b_property = 1;
280 gl_state.e_property = ZV - BEGV + 1;
281 gl_state.offset = BEGV - 1;
283 else if (EQ (gl_state.object, Qt))
285 gl_state.b_property = 0;
286 gl_state.e_property = PTRDIFF_MAX;
287 gl_state.offset = 0;
289 else
291 gl_state.b_property = 0;
292 gl_state.e_property = 1 + SCHARS (gl_state.object);
293 gl_state.offset = 0;
295 if (parse_sexp_lookup_properties)
296 update_syntax_table (from + gl_state.offset - (count <= 0),
297 count, 1, gl_state.object);
300 /* Update gl_state to an appropriate interval which contains CHARPOS. The
301 sign of COUNT give the relative position of CHARPOS wrt the previously
302 valid interval. If INIT, only [be]_property fields of gl_state are
303 valid at start, the rest is filled basing on OBJECT.
305 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
306 direction than the intervals - or in an interval. We update the
307 current syntax-table basing on the property of this interval, and
308 update the interval to start further than CHARPOS - or be
309 NULL. We also update lim_property to be the next value of
310 charpos to call this subroutine again - or be before/after the
311 start/end of OBJECT. */
313 void
314 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
315 Lisp_Object object)
317 Lisp_Object tmp_table;
318 int cnt = 0;
319 bool invalidate = 1;
320 INTERVAL i;
322 if (init)
324 gl_state.old_prop = Qnil;
325 gl_state.start = gl_state.b_property;
326 gl_state.stop = gl_state.e_property;
327 i = interval_of (charpos, object);
328 gl_state.backward_i = gl_state.forward_i = i;
329 invalidate = 0;
330 if (!i)
331 return;
332 /* interval_of updates only ->position of the return value, so
333 update the parents manually to speed up update_interval. */
334 while (!NULL_PARENT (i))
336 if (AM_RIGHT_CHILD (i))
337 INTERVAL_PARENT (i)->position = i->position
338 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
339 - TOTAL_LENGTH (INTERVAL_PARENT (i))
340 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
341 else
342 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
343 + TOTAL_LENGTH (i);
344 i = INTERVAL_PARENT (i);
346 i = gl_state.forward_i;
347 gl_state.b_property = i->position - gl_state.offset;
348 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
349 goto update;
351 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
353 /* We are guaranteed to be called with CHARPOS either in i,
354 or further off. */
355 if (!i)
356 error ("Error in syntax_table logic for to-the-end intervals");
357 else if (charpos < i->position) /* Move left. */
359 if (count > 0)
360 error ("Error in syntax_table logic for intervals <-");
361 /* Update the interval. */
362 i = update_interval (i, charpos);
363 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
365 invalidate = 0;
366 gl_state.forward_i = i;
367 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
370 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
372 if (count < 0)
373 error ("Error in syntax_table logic for intervals ->");
374 /* Update the interval. */
375 i = update_interval (i, charpos);
376 if (i->position != gl_state.e_property)
378 invalidate = 0;
379 gl_state.backward_i = i;
380 gl_state.b_property = i->position - gl_state.offset;
384 update:
385 tmp_table = textget (i->plist, Qsyntax_table);
387 if (invalidate)
388 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
390 if (invalidate) /* Did not get to adjacent interval. */
391 { /* with the same table => */
392 /* invalidate the old range. */
393 if (count > 0)
395 gl_state.backward_i = i;
396 gl_state.b_property = i->position - gl_state.offset;
398 else
400 gl_state.forward_i = i;
401 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
405 if (!EQ (tmp_table, gl_state.old_prop))
407 gl_state.current_syntax_table = tmp_table;
408 gl_state.old_prop = tmp_table;
409 if (EQ (Fsyntax_table_p (tmp_table), Qt))
411 gl_state.use_global = 0;
413 else if (CONSP (tmp_table))
415 gl_state.use_global = 1;
416 gl_state.global_code = tmp_table;
418 else
420 gl_state.use_global = 0;
421 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
425 while (i)
427 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
429 if (count > 0)
431 gl_state.e_property = i->position - gl_state.offset;
432 gl_state.forward_i = i;
434 else
436 gl_state.b_property
437 = i->position + LENGTH (i) - gl_state.offset;
438 gl_state.backward_i = i;
440 return;
442 else if (cnt == INTERVALS_AT_ONCE)
444 if (count > 0)
446 gl_state.e_property
447 = i->position + LENGTH (i) - gl_state.offset
448 /* e_property at EOB is not set to ZV but to ZV+1, so that
449 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
450 having to check eob between the two. */
451 + (next_interval (i) ? 0 : 1);
452 gl_state.forward_i = i;
454 else
456 gl_state.b_property = i->position - gl_state.offset;
457 gl_state.backward_i = i;
459 return;
461 cnt++;
462 i = count > 0 ? next_interval (i) : previous_interval (i);
464 eassert (i == NULL); /* This property goes to the end. */
465 if (count > 0)
466 gl_state.e_property = gl_state.stop;
467 else
468 gl_state.b_property = gl_state.start;
471 /* Returns true if char at CHARPOS is quoted.
472 Global syntax-table data should be set up already to be good at CHARPOS
473 or after. On return global syntax data is good for lookup at CHARPOS. */
475 static bool
476 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
478 enum syntaxcode code;
479 ptrdiff_t beg = BEGV;
480 bool quoted = 0;
481 ptrdiff_t orig = charpos;
483 while (charpos > beg)
485 int c;
486 DEC_BOTH (charpos, bytepos);
488 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
489 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
490 code = SYNTAX (c);
491 if (! (code == Scharquote || code == Sescape))
492 break;
494 quoted = !quoted;
497 UPDATE_SYNTAX_TABLE (orig);
498 return quoted;
501 /* Return the bytepos one character before BYTEPOS.
502 We assume that BYTEPOS is not at the start of the buffer. */
504 static ptrdiff_t
505 dec_bytepos (ptrdiff_t bytepos)
507 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
508 return bytepos - 1;
510 DEC_POS (bytepos);
511 return bytepos;
514 /* Return a defun-start position before POS and not too far before.
515 It should be the last one before POS, or nearly the last.
517 When open_paren_in_column_0_is_defun_start is nonzero,
518 only the beginning of the buffer is treated as a defun-start.
520 We record the information about where the scan started
521 and what its result was, so that another call in the same area
522 can return the same value very quickly.
524 There is no promise at which position the global syntax data is
525 valid on return from the subroutine, so the caller should explicitly
526 update the global data. */
528 static ptrdiff_t
529 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
531 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
533 if (!open_paren_in_column_0_is_defun_start)
535 find_start_value = BEGV;
536 find_start_value_byte = BEGV_BYTE;
537 find_start_buffer = current_buffer;
538 find_start_modiff = MODIFF;
539 find_start_begv = BEGV;
540 find_start_pos = pos;
541 return BEGV;
544 /* Use previous finding, if it's valid and applies to this inquiry. */
545 if (current_buffer == find_start_buffer
546 /* Reuse the defun-start even if POS is a little farther on.
547 POS might be in the next defun, but that's ok.
548 Our value may not be the best possible, but will still be usable. */
549 && pos <= find_start_pos + 1000
550 && pos >= find_start_value
551 && BEGV == find_start_begv
552 && MODIFF == find_start_modiff)
553 return find_start_value;
555 /* Back up to start of line. */
556 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
558 /* We optimize syntax-table lookup for rare updates. Thus we accept
559 only those `^\s(' which are good in global _and_ text-property
560 syntax-tables. */
561 SETUP_BUFFER_SYNTAX_TABLE ();
562 while (PT > BEGV)
564 int c;
566 /* Open-paren at start of line means we may have found our
567 defun-start. */
568 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
569 if (SYNTAX (c) == Sopen)
571 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
572 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
573 if (SYNTAX (c) == Sopen)
574 break;
575 /* Now fallback to the default value. */
576 SETUP_BUFFER_SYNTAX_TABLE ();
578 /* Move to beg of previous line. */
579 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
582 /* Record what we found, for the next try. */
583 find_start_value = PT;
584 find_start_value_byte = PT_BYTE;
585 find_start_buffer = current_buffer;
586 find_start_modiff = MODIFF;
587 find_start_begv = BEGV;
588 find_start_pos = pos;
590 TEMP_SET_PT_BOTH (opoint, opoint_byte);
592 return find_start_value;
595 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
597 static bool
598 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
600 int c;
601 bool val;
603 DEC_BOTH (pos, pos_byte);
604 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
605 c = FETCH_CHAR (pos_byte);
606 val = SYNTAX_COMEND_FIRST (c);
607 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
608 return val;
611 /* Check whether charpos FROM is at the end of a comment.
612 FROM_BYTE is the bytepos corresponding to FROM.
613 Do not move back before STOP.
615 Return true if we find a comment ending at FROM/FROM_BYTE.
617 If successful, store the charpos of the comment's beginning
618 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
620 Global syntax data remains valid for backward search starting at
621 the returned value (or at FROM, if the search was not successful). */
623 static bool
624 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
625 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
626 ptrdiff_t *bytepos_ptr)
628 /* Look back, counting the parity of string-quotes,
629 and recording the comment-starters seen.
630 When we reach a safe place, assume that's not in a string;
631 then step the main scan to the earliest comment-starter seen
632 an even number of string quotes away from the safe place.
634 OFROM[I] is position of the earliest comment-starter seen
635 which is I+2X quotes from the comment-end.
636 PARITY is current parity of quotes from the comment end. */
637 int string_style = -1; /* Presumed outside of any string. */
638 bool string_lossage = 0;
639 /* Not a real lossage: indicates that we have passed a matching comment
640 starter plus a non-matching comment-ender, meaning that any matching
641 comment-starter we might see later could be a false positive (hidden
642 inside another comment).
643 Test case: { a (* b } c (* d *) */
644 bool comment_lossage = 0;
645 ptrdiff_t comment_end = from;
646 ptrdiff_t comment_end_byte = from_byte;
647 ptrdiff_t comstart_pos = 0;
648 ptrdiff_t comstart_byte IF_LINT (= 0);
649 /* Place where the containing defun starts,
650 or 0 if we didn't come across it yet. */
651 ptrdiff_t defun_start = 0;
652 ptrdiff_t defun_start_byte = 0;
653 enum syntaxcode code;
654 ptrdiff_t nesting = 1; /* current comment nesting */
655 int c;
656 int syntax = 0;
658 /* FIXME: A }} comment-ender style leads to incorrect behavior
659 in the case of {{ c }}} because we ignore the last two chars which are
660 assumed to be comment-enders although they aren't. */
662 /* At beginning of range to scan, we're outside of strings;
663 that determines quote parity to the comment-end. */
664 while (from != stop)
666 ptrdiff_t temp_byte;
667 int prev_syntax;
668 bool com2start, com2end, comstart;
670 /* Move back and examine a character. */
671 DEC_BOTH (from, from_byte);
672 UPDATE_SYNTAX_TABLE_BACKWARD (from);
674 prev_syntax = syntax;
675 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
676 syntax = SYNTAX_WITH_FLAGS (c);
677 code = SYNTAX (c);
679 /* Check for 2-char comment markers. */
680 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
681 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
682 && (comstyle
683 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
684 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
685 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
686 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
687 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
688 comstart = (com2start || code == Scomment);
690 /* Nasty cases with overlapping 2-char comment markers:
691 - snmp-mode: -- c -- foo -- c --
692 --- c --
693 ------ c --
694 - c-mode: *||*
695 |* *|* *|
696 |*| |* |*|
697 /// */
699 /* If a 2-char comment sequence partly overlaps with another,
700 we don't try to be clever. E.g. |*| in C, or }% in modes that
701 have %..\n and %{..}%. */
702 if (from > stop && (com2end || comstart))
704 ptrdiff_t next = from, next_byte = from_byte;
705 int next_c, next_syntax;
706 DEC_BOTH (next, next_byte);
707 UPDATE_SYNTAX_TABLE_BACKWARD (next);
708 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
709 next_syntax = SYNTAX_WITH_FLAGS (next_c);
710 if (((comstart || comnested)
711 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
712 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
713 || ((com2end || comnested)
714 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
715 && (comstyle
716 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
717 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
718 goto lossage;
719 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
722 if (com2start && comstart_pos == 0)
723 /* We're looking at a comment starter. But it might be a comment
724 ender as well (see snmp-mode). The first time we see one, we
725 need to consider it as a comment starter,
726 and the subsequent times as a comment ender. */
727 com2end = 0;
729 /* Turn a 2-char comment sequences into the appropriate syntax. */
730 if (com2end)
731 code = Sendcomment;
732 else if (com2start)
733 code = Scomment;
734 /* Ignore comment starters of a different style. */
735 else if (code == Scomment
736 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
737 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
738 continue;
740 /* Ignore escaped characters, except comment-enders. */
741 if (code != Sendcomment && char_quoted (from, from_byte))
742 continue;
744 switch (code)
746 case Sstring_fence:
747 case Scomment_fence:
748 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
749 case Sstring:
750 /* Track parity of quotes. */
751 if (string_style == -1)
752 /* Entering a string. */
753 string_style = c;
754 else if (string_style == c)
755 /* Leaving the string. */
756 string_style = -1;
757 else
758 /* If we have two kinds of string delimiters.
759 There's no way to grok this scanning backwards. */
760 string_lossage = 1;
761 break;
763 case Scomment:
764 /* We've already checked that it is the relevant comstyle. */
765 if (string_style != -1 || comment_lossage || string_lossage)
766 /* There are odd string quotes involved, so let's be careful.
767 Test case in Pascal: " { " a { " } */
768 goto lossage;
770 if (!comnested)
772 /* Record best comment-starter so far. */
773 comstart_pos = from;
774 comstart_byte = from_byte;
776 else if (--nesting <= 0)
777 /* nested comments have to be balanced, so we don't need to
778 keep looking for earlier ones. We use here the same (slightly
779 incorrect) reasoning as below: since it is followed by uniform
780 paired string quotes, this comment-start has to be outside of
781 strings, else the comment-end itself would be inside a string. */
782 goto done;
783 break;
785 case Sendcomment:
786 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
787 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
788 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
789 /* This is the same style of comment ender as ours. */
791 if (comnested)
792 nesting++;
793 else
794 /* Anything before that can't count because it would match
795 this comment-ender rather than ours. */
796 from = stop; /* Break out of the loop. */
798 else if (comstart_pos != 0 || c != '\n')
799 /* We're mixing comment styles here, so we'd better be careful.
800 The (comstart_pos != 0 || c != '\n') check is not quite correct
801 (we should just always set comment_lossage), but removing it
802 would imply that any multiline comment in C would go through
803 lossage, which seems overkill.
804 The failure should only happen in the rare cases such as
805 { (* } *) */
806 comment_lossage = 1;
807 break;
809 case Sopen:
810 /* Assume a defun-start point is outside of strings. */
811 if (open_paren_in_column_0_is_defun_start
812 && (from == stop
813 || (temp_byte = dec_bytepos (from_byte),
814 FETCH_CHAR (temp_byte) == '\n')))
816 defun_start = from;
817 defun_start_byte = from_byte;
818 from = stop; /* Break out of the loop. */
820 break;
822 default:
823 break;
827 if (comstart_pos == 0)
829 from = comment_end;
830 from_byte = comment_end_byte;
831 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
833 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
834 or `done'), then we've found the beginning of the non-nested comment. */
835 else if (1) /* !comnested */
837 from = comstart_pos;
838 from_byte = comstart_byte;
839 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
841 else
843 struct lisp_parse_state state;
844 lossage:
845 /* We had two kinds of string delimiters mixed up
846 together. Decode this going forwards.
847 Scan fwd from a known safe place (beginning-of-defun)
848 to the one in question; this records where we
849 last passed a comment starter. */
850 /* If we did not already find the defun start, find it now. */
851 if (defun_start == 0)
853 defun_start = find_defun_start (comment_end, comment_end_byte);
854 defun_start_byte = find_start_value_byte;
858 scan_sexps_forward (&state,
859 defun_start, defun_start_byte,
860 comment_end, TYPE_MINIMUM (EMACS_INT),
861 0, Qnil, 0);
862 defun_start = comment_end;
863 if (state.incomment == (comnested ? 1 : -1)
864 && state.comstyle == comstyle)
865 from = state.comstr_start;
866 else
868 from = comment_end;
869 if (state.incomment)
870 /* If comment_end is inside some other comment, maybe ours
871 is nested, so we need to try again from within the
872 surrounding comment. Example: { a (* " *) */
874 /* FIXME: We should advance by one or two chars. */
875 defun_start = state.comstr_start + 2;
876 defun_start_byte = CHAR_TO_BYTE (defun_start);
879 } while (defun_start < comment_end);
881 from_byte = CHAR_TO_BYTE (from);
882 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
885 done:
886 *charpos_ptr = from;
887 *bytepos_ptr = from_byte;
889 return from != comment_end;
892 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
893 doc: /* Return t if OBJECT is a syntax table.
894 Currently, any char-table counts as a syntax table. */)
895 (Lisp_Object object)
897 if (CHAR_TABLE_P (object)
898 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
899 return Qt;
900 return Qnil;
903 static void
904 check_syntax_table (Lisp_Object obj)
906 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
907 Qsyntax_table_p, obj);
910 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
911 doc: /* Return the current syntax table.
912 This is the one specified by the current buffer. */)
913 (void)
915 return BVAR (current_buffer, syntax_table);
918 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
919 Sstandard_syntax_table, 0, 0, 0,
920 doc: /* Return the standard syntax table.
921 This is the one used for new buffers. */)
922 (void)
924 return Vstandard_syntax_table;
927 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
928 doc: /* Construct a new syntax table and return it.
929 It is a copy of the TABLE, which defaults to the standard syntax table. */)
930 (Lisp_Object table)
932 Lisp_Object copy;
934 if (!NILP (table))
935 check_syntax_table (table);
936 else
937 table = Vstandard_syntax_table;
939 copy = Fcopy_sequence (table);
941 /* Only the standard syntax table should have a default element.
942 Other syntax tables should inherit from parents instead. */
943 set_char_table_defalt (copy, Qnil);
945 /* Copied syntax tables should all have parents.
946 If we copied one with no parent, such as the standard syntax table,
947 use the standard syntax table as the copy's parent. */
948 if (NILP (XCHAR_TABLE (copy)->parent))
949 Fset_char_table_parent (copy, Vstandard_syntax_table);
950 return copy;
953 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
954 doc: /* Select a new syntax table for the current buffer.
955 One argument, a syntax table. */)
956 (Lisp_Object table)
958 int idx;
959 check_syntax_table (table);
960 bset_syntax_table (current_buffer, table);
961 /* Indicate that this buffer now has a specified syntax table. */
962 idx = PER_BUFFER_VAR_IDX (syntax_table);
963 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
964 return table;
967 /* Convert a letter which signifies a syntax code
968 into the code it signifies.
969 This is used by modify-syntax-entry, and other things. */
971 unsigned char const syntax_spec_code[0400] =
972 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
973 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
974 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
975 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
976 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
977 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
978 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
979 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
980 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
981 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
982 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
983 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
984 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
985 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
986 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
987 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
990 /* Indexed by syntax code, give the letter that describes it. */
992 char const syntax_code_spec[16] =
994 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
995 '!', '|'
998 /* Indexed by syntax code, give the object (cons of syntax code and
999 nil) to be stored in syntax table. Since these objects can be
1000 shared among syntax tables, we generate them in advance. By
1001 sharing objects, the function `describe-syntax' can give a more
1002 compact listing. */
1003 static Lisp_Object Vsyntax_code_object;
1006 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1007 doc: /* Return the syntax code of CHARACTER, described by a character.
1008 For example, if CHARACTER is a word constituent, the
1009 character `w' (119) is returned.
1010 The characters that correspond to various syntax codes
1011 are listed in the documentation of `modify-syntax-entry'. */)
1012 (Lisp_Object character)
1014 int char_int;
1015 CHECK_CHARACTER (character);
1016 char_int = XINT (character);
1017 SETUP_BUFFER_SYNTAX_TABLE ();
1018 return make_number (syntax_code_spec[SYNTAX (char_int)]);
1021 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1022 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1023 (Lisp_Object character)
1025 int char_int;
1026 enum syntaxcode code;
1027 CHECK_CHARACTER (character);
1028 char_int = XINT (character);
1029 SETUP_BUFFER_SYNTAX_TABLE ();
1030 code = SYNTAX (char_int);
1031 if (code == Sopen || code == Sclose)
1032 return SYNTAX_MATCH (char_int);
1033 return Qnil;
1036 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1037 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1038 STRING should be a string of the form allowed as argument of
1039 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1040 cons cell \(CODE . MATCHING-CHAR) which can be used, for example, as
1041 the value of a `syntax-table' text property. */)
1042 (Lisp_Object string)
1044 const unsigned char *p;
1045 int val;
1046 Lisp_Object match;
1048 CHECK_STRING (string);
1050 p = SDATA (string);
1051 val = syntax_spec_code[*p++];
1052 if (val == 0377)
1053 error ("Invalid syntax description letter: %c", p[-1]);
1055 if (val == Sinherit)
1056 return Qnil;
1058 if (*p)
1060 int len;
1061 int character = STRING_CHAR_AND_LENGTH (p, len);
1062 XSETINT (match, character);
1063 if (XFASTINT (match) == ' ')
1064 match = Qnil;
1065 p += len;
1067 else
1068 match = Qnil;
1070 while (*p)
1071 switch (*p++)
1073 case '1':
1074 val |= 1 << 16;
1075 break;
1077 case '2':
1078 val |= 1 << 17;
1079 break;
1081 case '3':
1082 val |= 1 << 18;
1083 break;
1085 case '4':
1086 val |= 1 << 19;
1087 break;
1089 case 'p':
1090 val |= 1 << 20;
1091 break;
1093 case 'b':
1094 val |= 1 << 21;
1095 break;
1097 case 'n':
1098 val |= 1 << 22;
1099 break;
1101 case 'c':
1102 val |= 1 << 23;
1103 break;
1106 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1107 return AREF (Vsyntax_code_object, val);
1108 else
1109 /* Since we can't use a shared object, let's make a new one. */
1110 return Fcons (make_number (val), match);
1113 /* I really don't know why this is interactive
1114 help-form should at least be made useful whilst reading the second arg. */
1115 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1116 "cSet syntax for character: \nsSet syntax for %s to: ",
1117 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1118 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1119 the current buffer's syntax table.
1120 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1121 in the range MIN to MAX are changed.
1122 The first character of NEWENTRY should be one of the following:
1123 Space or - whitespace syntax. w word constituent.
1124 _ symbol constituent. . punctuation.
1125 ( open-parenthesis. ) close-parenthesis.
1126 " string quote. \\ escape.
1127 $ paired delimiter. ' expression quote or prefix operator.
1128 < comment starter. > comment ender.
1129 / character-quote. @ inherit from parent table.
1130 | generic string fence. ! generic comment fence.
1132 Only single-character comment start and end sequences are represented thus.
1133 Two-character sequences are represented as described below.
1134 The second character of NEWENTRY is the matching parenthesis,
1135 used only if the first character is `(' or `)'.
1136 Any additional characters are flags.
1137 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1138 1 means CHAR is the start of a two-char comment start sequence.
1139 2 means CHAR is the second character of such a sequence.
1140 3 means CHAR is the start of a two-char comment end sequence.
1141 4 means CHAR is the second character of such a sequence.
1143 There can be several orthogonal comment sequences. This is to support
1144 language modes such as C++. By default, all comment sequences are of style
1145 a, but you can set the comment sequence style to b (on the second character
1146 of a comment-start, and the first character of a comment-end sequence) and/or
1147 c (on any of its chars) using this flag:
1148 b means CHAR is part of comment sequence b.
1149 c means CHAR is part of comment sequence c.
1150 n means CHAR is part of a nestable comment sequence.
1152 p means CHAR is a prefix character for `backward-prefix-chars';
1153 such characters are treated as whitespace when they occur
1154 between expressions.
1155 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1156 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1158 if (CONSP (c))
1160 CHECK_CHARACTER_CAR (c);
1161 CHECK_CHARACTER_CDR (c);
1163 else
1164 CHECK_CHARACTER (c);
1166 if (NILP (syntax_table))
1167 syntax_table = BVAR (current_buffer, syntax_table);
1168 else
1169 check_syntax_table (syntax_table);
1171 newentry = Fstring_to_syntax (newentry);
1172 if (CONSP (c))
1173 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1174 else
1175 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1177 /* We clear the regexp cache, since character classes can now have
1178 different values from those in the compiled regexps.*/
1179 clear_regexp_cache ();
1181 return Qnil;
1184 /* Dump syntax table to buffer in human-readable format */
1186 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1187 Sinternal_describe_syntax_value, 1, 1, 0,
1188 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1189 (Lisp_Object syntax)
1191 int code, syntax_code;
1192 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1193 char str[2];
1194 Lisp_Object first, match_lisp, value = syntax;
1196 if (NILP (value))
1198 insert_string ("default");
1199 return syntax;
1202 if (CHAR_TABLE_P (value))
1204 insert_string ("deeper char-table ...");
1205 return syntax;
1208 if (!CONSP (value))
1210 insert_string ("invalid");
1211 return syntax;
1214 first = XCAR (value);
1215 match_lisp = XCDR (value);
1217 if (!INTEGERP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1219 insert_string ("invalid");
1220 return syntax;
1223 syntax_code = XINT (first) & INT_MAX;
1224 code = syntax_code & 0377;
1225 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1226 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1227 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1228 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1229 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1230 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1231 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1232 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1234 if (Smax <= code)
1236 insert_string ("invalid");
1237 return syntax;
1240 str[0] = syntax_code_spec[code], str[1] = 0;
1241 insert (str, 1);
1243 if (NILP (match_lisp))
1244 insert (" ", 1);
1245 else
1246 insert_char (XINT (match_lisp));
1248 if (start1)
1249 insert ("1", 1);
1250 if (start2)
1251 insert ("2", 1);
1253 if (end1)
1254 insert ("3", 1);
1255 if (end2)
1256 insert ("4", 1);
1258 if (prefix)
1259 insert ("p", 1);
1260 if (comstyleb)
1261 insert ("b", 1);
1262 if (comstylec)
1263 insert ("c", 1);
1264 if (comnested)
1265 insert ("n", 1);
1267 insert_string ("\twhich means: ");
1269 switch (code)
1271 case Swhitespace:
1272 insert_string ("whitespace"); break;
1273 case Spunct:
1274 insert_string ("punctuation"); break;
1275 case Sword:
1276 insert_string ("word"); break;
1277 case Ssymbol:
1278 insert_string ("symbol"); break;
1279 case Sopen:
1280 insert_string ("open"); break;
1281 case Sclose:
1282 insert_string ("close"); break;
1283 case Squote:
1284 insert_string ("prefix"); break;
1285 case Sstring:
1286 insert_string ("string"); break;
1287 case Smath:
1288 insert_string ("math"); break;
1289 case Sescape:
1290 insert_string ("escape"); break;
1291 case Scharquote:
1292 insert_string ("charquote"); break;
1293 case Scomment:
1294 insert_string ("comment"); break;
1295 case Sendcomment:
1296 insert_string ("endcomment"); break;
1297 case Sinherit:
1298 insert_string ("inherit"); break;
1299 case Scomment_fence:
1300 insert_string ("comment fence"); break;
1301 case Sstring_fence:
1302 insert_string ("string fence"); break;
1303 default:
1304 insert_string ("invalid");
1305 return syntax;
1308 if (!NILP (match_lisp))
1310 insert_string (", matches ");
1311 insert_char (XINT (match_lisp));
1314 if (start1)
1315 insert_string (",\n\t is the first character of a comment-start sequence");
1316 if (start2)
1317 insert_string (",\n\t is the second character of a comment-start sequence");
1319 if (end1)
1320 insert_string (",\n\t is the first character of a comment-end sequence");
1321 if (end2)
1322 insert_string (",\n\t is the second character of a comment-end sequence");
1323 if (comstyleb)
1324 insert_string (" (comment style b)");
1325 if (comstylec)
1326 insert_string (" (comment style c)");
1327 if (comnested)
1328 insert_string (" (nestable)");
1330 if (prefix)
1331 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1333 return syntax;
1336 /* Return the position across COUNT words from FROM.
1337 If that many words cannot be found before the end of the buffer, return 0.
1338 COUNT negative means scan backward and stop at word beginning. */
1340 ptrdiff_t
1341 scan_words (register ptrdiff_t from, register EMACS_INT count)
1343 register ptrdiff_t beg = BEGV;
1344 register ptrdiff_t end = ZV;
1345 register ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1346 register enum syntaxcode code;
1347 int ch0, ch1;
1348 Lisp_Object func, pos;
1350 immediate_quit = 1;
1351 QUIT;
1353 SETUP_SYNTAX_TABLE (from, count);
1355 while (count > 0)
1357 while (1)
1359 if (from == end)
1361 immediate_quit = 0;
1362 return 0;
1364 UPDATE_SYNTAX_TABLE_FORWARD (from);
1365 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1366 code = SYNTAX (ch0);
1367 INC_BOTH (from, from_byte);
1368 if (words_include_escapes
1369 && (code == Sescape || code == Scharquote))
1370 break;
1371 if (code == Sword)
1372 break;
1374 /* Now CH0 is a character which begins a word and FROM is the
1375 position of the next character. */
1376 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1377 if (! NILP (Ffboundp (func)))
1379 pos = call2 (func, make_number (from - 1), make_number (end));
1380 if (INTEGERP (pos) && from < XINT (pos) && XINT (pos) <= ZV)
1382 from = XINT (pos);
1383 from_byte = CHAR_TO_BYTE (from);
1386 else
1388 while (1)
1390 if (from == end) break;
1391 UPDATE_SYNTAX_TABLE_FORWARD (from);
1392 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1393 code = SYNTAX (ch1);
1394 if ((code != Sword
1395 && (! words_include_escapes
1396 || (code != Sescape && code != Scharquote)))
1397 || word_boundary_p (ch0, ch1))
1398 break;
1399 INC_BOTH (from, from_byte);
1400 ch0 = ch1;
1403 count--;
1405 while (count < 0)
1407 while (1)
1409 if (from == beg)
1411 immediate_quit = 0;
1412 return 0;
1414 DEC_BOTH (from, from_byte);
1415 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1416 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1417 code = SYNTAX (ch1);
1418 if (words_include_escapes
1419 && (code == Sescape || code == Scharquote))
1420 break;
1421 if (code == Sword)
1422 break;
1424 /* Now CH1 is a character which ends a word and FROM is the
1425 position of it. */
1426 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1427 if (! NILP (Ffboundp (func)))
1429 pos = call2 (func, make_number (from), make_number (beg));
1430 if (INTEGERP (pos) && BEGV <= XINT (pos) && XINT (pos) < from)
1432 from = XINT (pos);
1433 from_byte = CHAR_TO_BYTE (from);
1436 else
1438 while (1)
1440 if (from == beg)
1441 break;
1442 DEC_BOTH (from, from_byte);
1443 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1444 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1445 code = SYNTAX (ch0);
1446 if ((code != Sword
1447 && (! words_include_escapes
1448 || (code != Sescape && code != Scharquote)))
1449 || word_boundary_p (ch0, ch1))
1451 INC_BOTH (from, from_byte);
1452 break;
1454 ch1 = ch0;
1457 count++;
1460 immediate_quit = 0;
1462 return from;
1465 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1466 doc: /* Move point forward ARG words (backward if ARG is negative).
1467 If ARG is omitted or nil, move point forward one word.
1468 Normally returns t.
1469 If an edge of the buffer or a field boundary is reached, point is left there
1470 and the function returns nil. Field boundaries are not noticed if
1471 `inhibit-field-text-motion' is non-nil. */)
1472 (Lisp_Object arg)
1474 Lisp_Object tmp;
1475 ptrdiff_t orig_val, val;
1477 if (NILP (arg))
1478 XSETFASTINT (arg, 1);
1479 else
1480 CHECK_NUMBER (arg);
1482 val = orig_val = scan_words (PT, XINT (arg));
1483 if (! orig_val)
1484 val = XINT (arg) > 0 ? ZV : BEGV;
1486 /* Avoid jumping out of an input field. */
1487 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1488 Qt, Qnil, Qnil);
1489 val = XFASTINT (tmp);
1491 SET_PT (val);
1492 return val == orig_val ? Qt : Qnil;
1495 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1496 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1497 STRING is like the inside of a `[...]' in a regular expression
1498 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1499 (but not at the end of a range; quoting is never needed there).
1500 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1501 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1502 Char classes, e.g. `[:alpha:]', are supported.
1504 Returns the distance traveled, either zero or positive. */)
1505 (Lisp_Object string, Lisp_Object lim)
1507 return skip_chars (1, string, lim, 1);
1510 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1511 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1512 See `skip-chars-forward' for details.
1513 Returns the distance traveled, either zero or negative. */)
1514 (Lisp_Object string, Lisp_Object lim)
1516 return skip_chars (0, string, lim, 1);
1519 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1520 doc: /* Move point forward across chars in specified syntax classes.
1521 SYNTAX is a string of syntax code characters.
1522 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1523 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1524 This function returns the distance traveled, either zero or positive. */)
1525 (Lisp_Object syntax, Lisp_Object lim)
1527 return skip_syntaxes (1, syntax, lim);
1530 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1531 doc: /* Move point backward across chars in specified syntax classes.
1532 SYNTAX is a string of syntax code characters.
1533 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1534 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1535 This function returns the distance traveled, either zero or negative. */)
1536 (Lisp_Object syntax, Lisp_Object lim)
1538 return skip_syntaxes (0, syntax, lim);
1541 static Lisp_Object
1542 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1543 bool handle_iso_classes)
1545 int c;
1546 char fastmap[0400];
1547 /* Store the ranges of non-ASCII characters. */
1548 int *char_ranges IF_LINT (= NULL);
1549 int n_char_ranges = 0;
1550 bool negate = 0;
1551 ptrdiff_t i, i_byte;
1552 /* True if the current buffer is multibyte and the region contains
1553 non-ASCII chars. */
1554 bool multibyte;
1555 /* True if STRING is multibyte and it contains non-ASCII chars. */
1556 bool string_multibyte;
1557 ptrdiff_t size_byte;
1558 const unsigned char *str;
1559 int len;
1560 Lisp_Object iso_classes;
1562 CHECK_STRING (string);
1563 iso_classes = Qnil;
1565 if (NILP (lim))
1566 XSETINT (lim, forwardp ? ZV : BEGV);
1567 else
1568 CHECK_NUMBER_COERCE_MARKER (lim);
1570 /* In any case, don't allow scan outside bounds of buffer. */
1571 if (XINT (lim) > ZV)
1572 XSETFASTINT (lim, ZV);
1573 if (XINT (lim) < BEGV)
1574 XSETFASTINT (lim, BEGV);
1576 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1577 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1578 string_multibyte = SBYTES (string) > SCHARS (string);
1580 memset (fastmap, 0, sizeof fastmap);
1582 str = SDATA (string);
1583 size_byte = SBYTES (string);
1585 i_byte = 0;
1586 if (i_byte < size_byte
1587 && SREF (string, 0) == '^')
1589 negate = 1; i_byte++;
1592 /* Find the characters specified and set their elements of fastmap.
1593 Handle backslashes and ranges specially.
1595 If STRING contains non-ASCII characters, setup char_ranges for
1596 them and use fastmap only for their leading codes. */
1598 if (! string_multibyte)
1600 bool string_has_eight_bit = 0;
1602 /* At first setup fastmap. */
1603 while (i_byte < size_byte)
1605 c = str[i_byte++];
1607 if (handle_iso_classes && c == '['
1608 && i_byte < size_byte
1609 && str[i_byte] == ':')
1611 const unsigned char *class_beg = str + i_byte + 1;
1612 const unsigned char *class_end = class_beg;
1613 const unsigned char *class_limit = str + size_byte - 2;
1614 /* Leave room for the null. */
1615 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1616 re_wctype_t cc;
1618 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1619 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1621 while (class_end < class_limit
1622 && *class_end >= 'a' && *class_end <= 'z')
1623 class_end++;
1625 if (class_end == class_beg
1626 || *class_end != ':' || class_end[1] != ']')
1627 goto not_a_class_name;
1629 memcpy (class_name, class_beg, class_end - class_beg);
1630 class_name[class_end - class_beg] = 0;
1632 cc = re_wctype (class_name);
1633 if (cc == 0)
1634 error ("Invalid ISO C character class");
1636 iso_classes = Fcons (make_number (cc), iso_classes);
1638 i_byte = class_end + 2 - str;
1639 continue;
1642 not_a_class_name:
1643 if (c == '\\')
1645 if (i_byte == size_byte)
1646 break;
1648 c = str[i_byte++];
1650 /* Treat `-' as range character only if another character
1651 follows. */
1652 if (i_byte + 1 < size_byte
1653 && str[i_byte] == '-')
1655 int c2;
1657 /* Skip over the dash. */
1658 i_byte++;
1660 /* Get the end of the range. */
1661 c2 = str[i_byte++];
1662 if (c2 == '\\'
1663 && i_byte < size_byte)
1664 c2 = str[i_byte++];
1666 if (c <= c2)
1668 int lim2 = c2 + 1;
1669 while (c < lim2)
1670 fastmap[c++] = 1;
1671 if (! ASCII_CHAR_P (c2))
1672 string_has_eight_bit = 1;
1675 else
1677 fastmap[c] = 1;
1678 if (! ASCII_CHAR_P (c))
1679 string_has_eight_bit = 1;
1683 /* If the current range is multibyte and STRING contains
1684 eight-bit chars, arrange fastmap and setup char_ranges for
1685 the corresponding multibyte chars. */
1686 if (multibyte && string_has_eight_bit)
1688 char *p1;
1689 char himap[0200 + 1];
1690 memcpy (himap, fastmap + 0200, 0200);
1691 himap[0200] = 0;
1692 memset (fastmap + 0200, 0, 0200);
1693 char_ranges = alloca (sizeof *char_ranges * 128 * 2);
1694 i = 0;
1696 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1698 /* Deduce the next range C..C2 from the next clump of 1s
1699 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1700 order half of the old FASTMAP. */
1701 int c2, leading_code;
1702 i = p1 - himap;
1703 c = BYTE8_TO_CHAR (i + 0200);
1704 i += strlen (p1);
1705 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1707 char_ranges[n_char_ranges++] = c;
1708 char_ranges[n_char_ranges++] = c2;
1709 leading_code = CHAR_LEADING_CODE (c);
1710 memset (fastmap + leading_code, 1,
1711 CHAR_LEADING_CODE (c2) - leading_code + 1);
1715 else /* STRING is multibyte */
1717 char_ranges = alloca (sizeof *char_ranges * SCHARS (string) * 2);
1719 while (i_byte < size_byte)
1721 int leading_code = str[i_byte];
1722 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1723 i_byte += len;
1725 if (handle_iso_classes && c == '['
1726 && i_byte < size_byte
1727 && STRING_CHAR (str + i_byte) == ':')
1729 const unsigned char *class_beg = str + i_byte + 1;
1730 const unsigned char *class_end = class_beg;
1731 const unsigned char *class_limit = str + size_byte - 2;
1732 /* Leave room for the null. */
1733 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1734 re_wctype_t cc;
1736 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1737 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1739 while (class_end < class_limit
1740 && *class_end >= 'a' && *class_end <= 'z')
1741 class_end++;
1743 if (class_end == class_beg
1744 || *class_end != ':' || class_end[1] != ']')
1745 goto not_a_class_name_multibyte;
1747 memcpy (class_name, class_beg, class_end - class_beg);
1748 class_name[class_end - class_beg] = 0;
1750 cc = re_wctype (class_name);
1751 if (cc == 0)
1752 error ("Invalid ISO C character class");
1754 iso_classes = Fcons (make_number (cc), iso_classes);
1756 i_byte = class_end + 2 - str;
1757 continue;
1760 not_a_class_name_multibyte:
1761 if (c == '\\')
1763 if (i_byte == size_byte)
1764 break;
1766 leading_code = str[i_byte];
1767 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1768 i_byte += len;
1770 /* Treat `-' as range character only if another character
1771 follows. */
1772 if (i_byte + 1 < size_byte
1773 && str[i_byte] == '-')
1775 int c2, leading_code2;
1777 /* Skip over the dash. */
1778 i_byte++;
1780 /* Get the end of the range. */
1781 leading_code2 = str[i_byte];
1782 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1783 i_byte += len;
1785 if (c2 == '\\'
1786 && i_byte < size_byte)
1788 leading_code2 = str[i_byte];
1789 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1790 i_byte += len;
1793 if (c > c2)
1794 continue;
1795 if (ASCII_CHAR_P (c))
1797 while (c <= c2 && c < 0x80)
1798 fastmap[c++] = 1;
1799 leading_code = CHAR_LEADING_CODE (c);
1801 if (! ASCII_CHAR_P (c))
1803 int lim2 = leading_code2 + 1;
1804 while (leading_code < lim2)
1805 fastmap[leading_code++] = 1;
1806 if (c <= c2)
1808 char_ranges[n_char_ranges++] = c;
1809 char_ranges[n_char_ranges++] = c2;
1813 else
1815 if (ASCII_CHAR_P (c))
1816 fastmap[c] = 1;
1817 else
1819 fastmap[leading_code] = 1;
1820 char_ranges[n_char_ranges++] = c;
1821 char_ranges[n_char_ranges++] = c;
1826 /* If the current range is unibyte and STRING contains non-ASCII
1827 chars, arrange fastmap for the corresponding unibyte
1828 chars. */
1830 if (! multibyte && n_char_ranges > 0)
1832 memset (fastmap + 0200, 0, 0200);
1833 for (i = 0; i < n_char_ranges; i += 2)
1835 int c1 = char_ranges[i];
1836 int lim2 = char_ranges[i + 1] + 1;
1838 for (; c1 < lim2; c1++)
1840 int b = CHAR_TO_BYTE_SAFE (c1);
1841 if (b >= 0)
1842 fastmap[b] = 1;
1848 /* If ^ was the first character, complement the fastmap. */
1849 if (negate)
1851 if (! multibyte)
1852 for (i = 0; i < sizeof fastmap; i++)
1853 fastmap[i] ^= 1;
1854 else
1856 for (i = 0; i < 0200; i++)
1857 fastmap[i] ^= 1;
1858 /* All non-ASCII chars possibly match. */
1859 for (; i < sizeof fastmap; i++)
1860 fastmap[i] = 1;
1865 ptrdiff_t start_point = PT;
1866 ptrdiff_t pos = PT;
1867 ptrdiff_t pos_byte = PT_BYTE;
1868 unsigned char *p = PT_ADDR, *endp, *stop;
1870 if (forwardp)
1872 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1873 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1875 else
1877 endp = CHAR_POS_ADDR (XINT (lim));
1878 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1881 immediate_quit = 1;
1882 /* This code may look up syntax tables using functions that rely on the
1883 gl_state object. To make sure this object is not out of date,
1884 let's initialize it manually.
1885 We ignore syntax-table text-properties for now, since that's
1886 what we've done in the past. */
1887 SETUP_BUFFER_SYNTAX_TABLE ();
1888 if (forwardp)
1890 if (multibyte)
1891 while (1)
1893 int nbytes;
1895 if (p >= stop)
1897 if (p >= endp)
1898 break;
1899 p = GAP_END_ADDR;
1900 stop = endp;
1902 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1903 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1905 if (negate)
1906 break;
1907 else
1908 goto fwd_ok;
1911 if (! fastmap[*p])
1912 break;
1913 if (! ASCII_CHAR_P (c))
1915 /* As we are looking at a multibyte character, we
1916 must look up the character in the table
1917 CHAR_RANGES. If there's no data in the table,
1918 that character is not what we want to skip. */
1920 /* The following code do the right thing even if
1921 n_char_ranges is zero (i.e. no data in
1922 CHAR_RANGES). */
1923 for (i = 0; i < n_char_ranges; i += 2)
1924 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1925 break;
1926 if (!(negate ^ (i < n_char_ranges)))
1927 break;
1929 fwd_ok:
1930 p += nbytes, pos++, pos_byte += nbytes;
1932 else
1933 while (1)
1935 if (p >= stop)
1937 if (p >= endp)
1938 break;
1939 p = GAP_END_ADDR;
1940 stop = endp;
1943 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1945 if (negate)
1946 break;
1947 else
1948 goto fwd_unibyte_ok;
1951 if (!fastmap[*p])
1952 break;
1953 fwd_unibyte_ok:
1954 p++, pos++, pos_byte++;
1957 else
1959 if (multibyte)
1960 while (1)
1962 unsigned char *prev_p;
1964 if (p <= stop)
1966 if (p <= endp)
1967 break;
1968 p = GPT_ADDR;
1969 stop = endp;
1971 prev_p = p;
1972 while (--p >= stop && ! CHAR_HEAD_P (*p));
1973 c = STRING_CHAR (p);
1975 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1977 if (negate)
1978 break;
1979 else
1980 goto back_ok;
1983 if (! fastmap[*p])
1984 break;
1985 if (! ASCII_CHAR_P (c))
1987 /* See the comment in the previous similar code. */
1988 for (i = 0; i < n_char_ranges; i += 2)
1989 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1990 break;
1991 if (!(negate ^ (i < n_char_ranges)))
1992 break;
1994 back_ok:
1995 pos--, pos_byte -= prev_p - p;
1997 else
1998 while (1)
2000 if (p <= stop)
2002 if (p <= endp)
2003 break;
2004 p = GPT_ADDR;
2005 stop = endp;
2008 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2010 if (negate)
2011 break;
2012 else
2013 goto back_unibyte_ok;
2016 if (!fastmap[p[-1]])
2017 break;
2018 back_unibyte_ok:
2019 p--, pos--, pos_byte--;
2023 SET_PT_BOTH (pos, pos_byte);
2024 immediate_quit = 0;
2026 return make_number (PT - start_point);
2031 static Lisp_Object
2032 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2034 int c;
2035 unsigned char fastmap[0400];
2036 bool negate = 0;
2037 ptrdiff_t i, i_byte;
2038 bool multibyte;
2039 ptrdiff_t size_byte;
2040 unsigned char *str;
2042 CHECK_STRING (string);
2044 if (NILP (lim))
2045 XSETINT (lim, forwardp ? ZV : BEGV);
2046 else
2047 CHECK_NUMBER_COERCE_MARKER (lim);
2049 /* In any case, don't allow scan outside bounds of buffer. */
2050 if (XINT (lim) > ZV)
2051 XSETFASTINT (lim, ZV);
2052 if (XINT (lim) < BEGV)
2053 XSETFASTINT (lim, BEGV);
2055 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
2056 return make_number (0);
2058 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2059 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
2061 memset (fastmap, 0, sizeof fastmap);
2063 if (SBYTES (string) > SCHARS (string))
2064 /* As this is very rare case (syntax spec is ASCII only), don't
2065 consider efficiency. */
2066 string = string_make_unibyte (string);
2068 str = SDATA (string);
2069 size_byte = SBYTES (string);
2071 i_byte = 0;
2072 if (i_byte < size_byte
2073 && SREF (string, 0) == '^')
2075 negate = 1; i_byte++;
2078 /* Find the syntaxes specified and set their elements of fastmap. */
2080 while (i_byte < size_byte)
2082 c = str[i_byte++];
2083 fastmap[syntax_spec_code[c]] = 1;
2086 /* If ^ was the first character, complement the fastmap. */
2087 if (negate)
2088 for (i = 0; i < sizeof fastmap; i++)
2089 fastmap[i] ^= 1;
2092 ptrdiff_t start_point = PT;
2093 ptrdiff_t pos = PT;
2094 ptrdiff_t pos_byte = PT_BYTE;
2095 unsigned char *p = PT_ADDR, *endp, *stop;
2097 if (forwardp)
2099 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
2100 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
2102 else
2104 endp = CHAR_POS_ADDR (XINT (lim));
2105 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
2108 immediate_quit = 1;
2109 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2110 if (forwardp)
2112 if (multibyte)
2114 while (1)
2116 int nbytes;
2118 if (p >= stop)
2120 if (p >= endp)
2121 break;
2122 p = GAP_END_ADDR;
2123 stop = endp;
2125 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2126 if (! fastmap[SYNTAX (c)])
2127 break;
2128 p += nbytes, pos++, pos_byte += nbytes;
2129 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2132 else
2134 while (1)
2136 if (p >= stop)
2138 if (p >= endp)
2139 break;
2140 p = GAP_END_ADDR;
2141 stop = endp;
2143 if (! fastmap[SYNTAX (*p)])
2144 break;
2145 p++, pos++, pos_byte++;
2146 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2150 else
2152 if (multibyte)
2154 while (1)
2156 unsigned char *prev_p;
2158 if (p <= stop)
2160 if (p <= endp)
2161 break;
2162 p = GPT_ADDR;
2163 stop = endp;
2165 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2166 prev_p = p;
2167 while (--p >= stop && ! CHAR_HEAD_P (*p));
2168 c = STRING_CHAR (p);
2169 if (! fastmap[SYNTAX (c)])
2170 break;
2171 pos--, pos_byte -= prev_p - p;
2174 else
2176 while (1)
2178 if (p <= stop)
2180 if (p <= endp)
2181 break;
2182 p = GPT_ADDR;
2183 stop = endp;
2185 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2186 if (! fastmap[SYNTAX (p[-1])])
2187 break;
2188 p--, pos--, pos_byte--;
2193 SET_PT_BOTH (pos, pos_byte);
2194 immediate_quit = 0;
2196 return make_number (PT - start_point);
2200 /* Return true if character C belongs to one of the ISO classes
2201 in the list ISO_CLASSES. Each class is represented by an
2202 integer which is its type according to re_wctype. */
2204 static bool
2205 in_classes (int c, Lisp_Object iso_classes)
2207 bool fits_class = 0;
2209 while (CONSP (iso_classes))
2211 Lisp_Object elt;
2212 elt = XCAR (iso_classes);
2213 iso_classes = XCDR (iso_classes);
2215 if (re_iswctype (c, XFASTINT (elt)))
2216 fits_class = 1;
2219 return fits_class;
2222 /* Jump over a comment, assuming we are at the beginning of one.
2223 FROM is the current position.
2224 FROM_BYTE is the bytepos corresponding to FROM.
2225 Do not move past STOP (a charpos).
2226 The comment over which we have to jump is of style STYLE
2227 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2228 NESTING should be positive to indicate the nesting at the beginning
2229 for nested comments and should be zero or negative else.
2230 ST_COMMENT_STYLE cannot be nested.
2231 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2232 (or 0 If the search cannot start in the middle of a two-character).
2234 If successful, return true and store the charpos of the comment's end
2235 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2236 Else, return false and store the charpos STOP into *CHARPOS_PTR, the
2237 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2238 (as defined for state.incomment) in *INCOMMENT_PTR.
2240 The comment end is the last character of the comment rather than the
2241 character just after the comment.
2243 Global syntax data is assumed to initially be valid for FROM and
2244 remains valid for forward search starting at the returned position. */
2246 static bool
2247 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2248 EMACS_INT nesting, int style, int prev_syntax,
2249 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2250 EMACS_INT *incomment_ptr)
2252 register int c, c1;
2253 register enum syntaxcode code;
2254 register int syntax, other_syntax;
2256 if (nesting <= 0) nesting = -1;
2258 /* Enter the loop in the middle so that we find
2259 a 2-char comment ender if we start in the middle of it. */
2260 syntax = prev_syntax;
2261 if (syntax != 0) goto forw_incomment;
2263 while (1)
2265 if (from == stop)
2267 *incomment_ptr = nesting;
2268 *charpos_ptr = from;
2269 *bytepos_ptr = from_byte;
2270 return 0;
2272 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2273 syntax = SYNTAX_WITH_FLAGS (c);
2274 code = syntax & 0xff;
2275 if (code == Sendcomment
2276 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2277 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2278 (nesting > 0 && --nesting == 0) : nesting < 0))
2279 /* we have encountered a comment end of the same style
2280 as the comment sequence which began this comment
2281 section */
2282 break;
2283 if (code == Scomment_fence
2284 && style == ST_COMMENT_STYLE)
2285 /* we have encountered a comment end of the same style
2286 as the comment sequence which began this comment
2287 section. */
2288 break;
2289 if (nesting > 0
2290 && code == Scomment
2291 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2292 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2293 /* we have encountered a nested comment of the same style
2294 as the comment sequence which began this comment section */
2295 nesting++;
2296 INC_BOTH (from, from_byte);
2297 UPDATE_SYNTAX_TABLE_FORWARD (from);
2299 forw_incomment:
2300 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2301 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2302 other_syntax = SYNTAX_WITH_FLAGS (c1),
2303 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2304 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2305 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2306 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2307 ? nesting > 0 : nesting < 0))
2309 if (--nesting <= 0)
2310 /* we have encountered a comment end of the same style
2311 as the comment sequence which began this comment
2312 section */
2313 break;
2314 else
2316 INC_BOTH (from, from_byte);
2317 UPDATE_SYNTAX_TABLE_FORWARD (from);
2320 if (nesting > 0
2321 && from < stop
2322 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2323 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2324 other_syntax = SYNTAX_WITH_FLAGS (c1),
2325 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2326 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2327 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2328 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2329 /* we have encountered a nested comment of the same style
2330 as the comment sequence which began this comment
2331 section */
2333 INC_BOTH (from, from_byte);
2334 UPDATE_SYNTAX_TABLE_FORWARD (from);
2335 nesting++;
2338 *charpos_ptr = from;
2339 *bytepos_ptr = from_byte;
2340 return 1;
2343 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2344 doc: /*
2345 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2346 Stop scanning if we find something other than a comment or whitespace.
2347 Set point to where scanning stops.
2348 If COUNT comments are found as expected, with nothing except whitespace
2349 between them, return t; otherwise return nil. */)
2350 (Lisp_Object count)
2352 ptrdiff_t from, from_byte, stop;
2353 int c, c1;
2354 enum syntaxcode code;
2355 int comstyle = 0; /* style of comment encountered */
2356 bool comnested = 0; /* whether the comment is nestable or not */
2357 bool found;
2358 EMACS_INT count1;
2359 ptrdiff_t out_charpos, out_bytepos;
2360 EMACS_INT dummy;
2362 CHECK_NUMBER (count);
2363 count1 = XINT (count);
2364 stop = count1 > 0 ? ZV : BEGV;
2366 immediate_quit = 1;
2367 QUIT;
2369 from = PT;
2370 from_byte = PT_BYTE;
2372 SETUP_SYNTAX_TABLE (from, count1);
2373 while (count1 > 0)
2377 bool comstart_first;
2378 int syntax, other_syntax;
2380 if (from == stop)
2382 SET_PT_BOTH (from, from_byte);
2383 immediate_quit = 0;
2384 return Qnil;
2386 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2387 syntax = SYNTAX_WITH_FLAGS (c);
2388 code = SYNTAX (c);
2389 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2390 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2391 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2392 INC_BOTH (from, from_byte);
2393 UPDATE_SYNTAX_TABLE_FORWARD (from);
2394 if (from < stop && comstart_first
2395 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2396 other_syntax = SYNTAX_WITH_FLAGS (c1),
2397 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2399 /* We have encountered a comment start sequence and we
2400 are ignoring all text inside comments. We must record
2401 the comment style this sequence begins so that later,
2402 only a comment end of the same style actually ends
2403 the comment section. */
2404 code = Scomment;
2405 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2406 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2407 INC_BOTH (from, from_byte);
2408 UPDATE_SYNTAX_TABLE_FORWARD (from);
2411 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2413 if (code == Scomment_fence)
2414 comstyle = ST_COMMENT_STYLE;
2415 else if (code != Scomment)
2417 immediate_quit = 0;
2418 DEC_BOTH (from, from_byte);
2419 SET_PT_BOTH (from, from_byte);
2420 return Qnil;
2422 /* We're at the start of a comment. */
2423 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2424 &out_charpos, &out_bytepos, &dummy);
2425 from = out_charpos; from_byte = out_bytepos;
2426 if (!found)
2428 immediate_quit = 0;
2429 SET_PT_BOTH (from, from_byte);
2430 return Qnil;
2432 INC_BOTH (from, from_byte);
2433 UPDATE_SYNTAX_TABLE_FORWARD (from);
2434 /* We have skipped one comment. */
2435 count1--;
2438 while (count1 < 0)
2440 while (1)
2442 bool quoted;
2443 int syntax;
2445 if (from <= stop)
2447 SET_PT_BOTH (BEGV, BEGV_BYTE);
2448 immediate_quit = 0;
2449 return Qnil;
2452 DEC_BOTH (from, from_byte);
2453 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2454 quoted = char_quoted (from, from_byte);
2455 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2456 syntax = SYNTAX_WITH_FLAGS (c);
2457 code = SYNTAX (c);
2458 comstyle = 0;
2459 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2460 if (code == Sendcomment)
2461 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2462 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2463 && prev_char_comend_first (from, from_byte)
2464 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2466 int other_syntax;
2467 /* We must record the comment style encountered so that
2468 later, we can match only the proper comment begin
2469 sequence of the same style. */
2470 DEC_BOTH (from, from_byte);
2471 code = Sendcomment;
2472 /* Calling char_quoted, above, set up global syntax position
2473 at the new value of FROM. */
2474 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2475 other_syntax = SYNTAX_WITH_FLAGS (c1);
2476 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2477 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2480 if (code == Scomment_fence)
2482 /* Skip until first preceding unquoted comment_fence. */
2483 bool fence_found = 0;
2484 ptrdiff_t ini = from, ini_byte = from_byte;
2486 while (1)
2488 DEC_BOTH (from, from_byte);
2489 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2490 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2491 if (SYNTAX (c) == Scomment_fence
2492 && !char_quoted (from, from_byte))
2494 fence_found = 1;
2495 break;
2497 else if (from == stop)
2498 break;
2500 if (fence_found == 0)
2502 from = ini; /* Set point to ini + 1. */
2503 from_byte = ini_byte;
2504 goto leave;
2506 else
2507 /* We have skipped one comment. */
2508 break;
2510 else if (code == Sendcomment)
2512 found = back_comment (from, from_byte, stop, comnested, comstyle,
2513 &out_charpos, &out_bytepos);
2514 if (!found)
2516 if (c == '\n')
2517 /* This end-of-line is not an end-of-comment.
2518 Treat it like a whitespace.
2519 CC-mode (and maybe others) relies on this behavior. */
2521 else
2523 /* Failure: we should go back to the end of this
2524 not-quite-endcomment. */
2525 if (SYNTAX (c) != code)
2526 /* It was a two-char Sendcomment. */
2527 INC_BOTH (from, from_byte);
2528 goto leave;
2531 else
2533 /* We have skipped one comment. */
2534 from = out_charpos, from_byte = out_bytepos;
2535 break;
2538 else if (code != Swhitespace || quoted)
2540 leave:
2541 immediate_quit = 0;
2542 INC_BOTH (from, from_byte);
2543 SET_PT_BOTH (from, from_byte);
2544 return Qnil;
2548 count1++;
2551 SET_PT_BOTH (from, from_byte);
2552 immediate_quit = 0;
2553 return Qt;
2556 /* Return syntax code of character C if C is an ASCII character
2557 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2559 static enum syntaxcode
2560 syntax_multibyte (int c, bool multibyte_symbol_p)
2562 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2565 static Lisp_Object
2566 scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2568 Lisp_Object val;
2569 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2570 int c, c1;
2571 int stringterm;
2572 bool quoted;
2573 bool mathexit = 0;
2574 enum syntaxcode code;
2575 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2576 int comstyle = 0; /* style of comment encountered */
2577 bool comnested = 0; /* whether the comment is nestable or not */
2578 ptrdiff_t temp_pos;
2579 EMACS_INT last_good = from;
2580 bool found;
2581 ptrdiff_t from_byte;
2582 ptrdiff_t out_bytepos, out_charpos;
2583 EMACS_INT dummy;
2584 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2586 if (depth > 0) min_depth = 0;
2588 if (from > ZV) from = ZV;
2589 if (from < BEGV) from = BEGV;
2591 from_byte = CHAR_TO_BYTE (from);
2593 immediate_quit = 1;
2594 QUIT;
2596 SETUP_SYNTAX_TABLE (from, count);
2597 while (count > 0)
2599 while (from < stop)
2601 bool comstart_first, prefix;
2602 int syntax, other_syntax;
2603 UPDATE_SYNTAX_TABLE_FORWARD (from);
2604 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2605 syntax = SYNTAX_WITH_FLAGS (c);
2606 code = syntax_multibyte (c, multibyte_symbol_p);
2607 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2608 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2609 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2610 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2611 if (depth == min_depth)
2612 last_good = from;
2613 INC_BOTH (from, from_byte);
2614 UPDATE_SYNTAX_TABLE_FORWARD (from);
2615 if (from < stop && comstart_first
2616 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2617 other_syntax = SYNTAX_WITH_FLAGS (c),
2618 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2619 && parse_sexp_ignore_comments)
2621 /* we have encountered a comment start sequence and we
2622 are ignoring all text inside comments. We must record
2623 the comment style this sequence begins so that later,
2624 only a comment end of the same style actually ends
2625 the comment section */
2626 code = Scomment;
2627 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2628 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2629 INC_BOTH (from, from_byte);
2630 UPDATE_SYNTAX_TABLE_FORWARD (from);
2633 if (prefix)
2634 continue;
2636 switch (code)
2638 case Sescape:
2639 case Scharquote:
2640 if (from == stop)
2641 goto lose;
2642 INC_BOTH (from, from_byte);
2643 /* treat following character as a word constituent */
2644 case Sword:
2645 case Ssymbol:
2646 if (depth || !sexpflag) break;
2647 /* This word counts as a sexp; return at end of it. */
2648 while (from < stop)
2650 UPDATE_SYNTAX_TABLE_FORWARD (from);
2652 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2653 switch (syntax_multibyte (c, multibyte_symbol_p))
2655 case Scharquote:
2656 case Sescape:
2657 INC_BOTH (from, from_byte);
2658 if (from == stop)
2659 goto lose;
2660 break;
2661 case Sword:
2662 case Ssymbol:
2663 case Squote:
2664 break;
2665 default:
2666 goto done;
2668 INC_BOTH (from, from_byte);
2670 goto done;
2672 case Scomment_fence:
2673 comstyle = ST_COMMENT_STYLE;
2674 /* FALLTHROUGH */
2675 case Scomment:
2676 if (!parse_sexp_ignore_comments) break;
2677 UPDATE_SYNTAX_TABLE_FORWARD (from);
2678 found = forw_comment (from, from_byte, stop,
2679 comnested, comstyle, 0,
2680 &out_charpos, &out_bytepos, &dummy);
2681 from = out_charpos, from_byte = out_bytepos;
2682 if (!found)
2684 if (depth == 0)
2685 goto done;
2686 goto lose;
2688 INC_BOTH (from, from_byte);
2689 UPDATE_SYNTAX_TABLE_FORWARD (from);
2690 break;
2692 case Smath:
2693 if (!sexpflag)
2694 break;
2695 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2697 INC_BOTH (from, from_byte);
2699 if (mathexit)
2701 mathexit = 0;
2702 goto close1;
2704 mathexit = 1;
2706 case Sopen:
2707 if (!++depth) goto done;
2708 break;
2710 case Sclose:
2711 close1:
2712 if (!--depth) goto done;
2713 if (depth < min_depth)
2714 xsignal3 (Qscan_error,
2715 build_string ("Containing expression ends prematurely"),
2716 make_number (last_good), make_number (from));
2717 break;
2719 case Sstring:
2720 case Sstring_fence:
2721 temp_pos = dec_bytepos (from_byte);
2722 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2723 while (1)
2725 enum syntaxcode c_code;
2726 if (from >= stop)
2727 goto lose;
2728 UPDATE_SYNTAX_TABLE_FORWARD (from);
2729 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2730 c_code = syntax_multibyte (c, multibyte_symbol_p);
2731 if (code == Sstring
2732 ? c == stringterm && c_code == Sstring
2733 : c_code == Sstring_fence)
2734 break;
2736 switch (c_code)
2738 case Scharquote:
2739 case Sescape:
2740 INC_BOTH (from, from_byte);
2742 INC_BOTH (from, from_byte);
2744 INC_BOTH (from, from_byte);
2745 if (!depth && sexpflag) goto done;
2746 break;
2747 default:
2748 /* Ignore whitespace, punctuation, quote, endcomment. */
2749 break;
2753 /* Reached end of buffer. Error if within object, return nil if between */
2754 if (depth)
2755 goto lose;
2757 immediate_quit = 0;
2758 return Qnil;
2760 /* End of object reached */
2761 done:
2762 count--;
2765 while (count < 0)
2767 while (from > stop)
2769 int syntax;
2770 DEC_BOTH (from, from_byte);
2771 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2772 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2773 syntax= SYNTAX_WITH_FLAGS (c);
2774 code = syntax_multibyte (c, multibyte_symbol_p);
2775 if (depth == min_depth)
2776 last_good = from;
2777 comstyle = 0;
2778 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2779 if (code == Sendcomment)
2780 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2781 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2782 && prev_char_comend_first (from, from_byte)
2783 && parse_sexp_ignore_comments)
2785 /* We must record the comment style encountered so that
2786 later, we can match only the proper comment begin
2787 sequence of the same style. */
2788 int c2, other_syntax;
2789 DEC_BOTH (from, from_byte);
2790 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2791 code = Sendcomment;
2792 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2793 other_syntax = SYNTAX_WITH_FLAGS (c2);
2794 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2795 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2798 /* Quoting turns anything except a comment-ender
2799 into a word character. Note that this cannot be true
2800 if we decremented FROM in the if-statement above. */
2801 if (code != Sendcomment && char_quoted (from, from_byte))
2803 DEC_BOTH (from, from_byte);
2804 code = Sword;
2806 else if (SYNTAX_FLAGS_PREFIX (syntax))
2807 continue;
2809 switch (code)
2811 case Sword:
2812 case Ssymbol:
2813 case Sescape:
2814 case Scharquote:
2815 if (depth || !sexpflag) break;
2816 /* This word counts as a sexp; count object finished
2817 after passing it. */
2818 while (from > stop)
2820 temp_pos = from_byte;
2821 if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2822 DEC_POS (temp_pos);
2823 else
2824 temp_pos--;
2825 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2826 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2827 /* Don't allow comment-end to be quoted. */
2828 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2829 goto done2;
2830 quoted = char_quoted (from - 1, temp_pos);
2831 if (quoted)
2833 DEC_BOTH (from, from_byte);
2834 temp_pos = dec_bytepos (temp_pos);
2835 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2837 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2838 if (! quoted)
2839 switch (syntax_multibyte (c1, multibyte_symbol_p))
2841 case Sword: case Ssymbol: case Squote: break;
2842 default: goto done2;
2844 DEC_BOTH (from, from_byte);
2846 goto done2;
2848 case Smath:
2849 if (!sexpflag)
2850 break;
2851 temp_pos = dec_bytepos (from_byte);
2852 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2853 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2854 DEC_BOTH (from, from_byte);
2855 if (mathexit)
2857 mathexit = 0;
2858 goto open2;
2860 mathexit = 1;
2862 case Sclose:
2863 if (!++depth) goto done2;
2864 break;
2866 case Sopen:
2867 open2:
2868 if (!--depth) goto done2;
2869 if (depth < min_depth)
2870 xsignal3 (Qscan_error,
2871 build_string ("Containing expression ends prematurely"),
2872 make_number (last_good), make_number (from));
2873 break;
2875 case Sendcomment:
2876 if (!parse_sexp_ignore_comments)
2877 break;
2878 found = back_comment (from, from_byte, stop, comnested, comstyle,
2879 &out_charpos, &out_bytepos);
2880 /* FIXME: if !found, it really wasn't a comment-end.
2881 For single-char Sendcomment, we can't do much about it apart
2882 from skipping the char.
2883 For 2-char endcomments, we could try again, taking both
2884 chars as separate entities, but it's a lot of trouble
2885 for very little gain, so we don't bother either. -sm */
2886 if (found)
2887 from = out_charpos, from_byte = out_bytepos;
2888 break;
2890 case Scomment_fence:
2891 case Sstring_fence:
2892 while (1)
2894 if (from == stop)
2895 goto lose;
2896 DEC_BOTH (from, from_byte);
2897 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2898 if (!char_quoted (from, from_byte))
2900 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2901 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2902 break;
2905 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2906 break;
2908 case Sstring:
2909 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2910 while (1)
2912 if (from == stop)
2913 goto lose;
2914 DEC_BOTH (from, from_byte);
2915 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2916 if (!char_quoted (from, from_byte))
2918 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2919 if (c == stringterm
2920 && (syntax_multibyte (c, multibyte_symbol_p)
2921 == Sstring))
2922 break;
2925 if (!depth && sexpflag) goto done2;
2926 break;
2927 default:
2928 /* Ignore whitespace, punctuation, quote, endcomment. */
2929 break;
2933 /* Reached start of buffer. Error if within object, return nil if between */
2934 if (depth)
2935 goto lose;
2937 immediate_quit = 0;
2938 return Qnil;
2940 done2:
2941 count++;
2945 immediate_quit = 0;
2946 XSETFASTINT (val, from);
2947 return val;
2949 lose:
2950 xsignal3 (Qscan_error,
2951 build_string ("Unbalanced parentheses"),
2952 make_number (last_good), make_number (from));
2955 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2956 doc: /* Scan from character number FROM by COUNT lists.
2957 Scan forward if COUNT is positive, backward if COUNT is negative.
2958 Return the character number of the position thus found.
2960 A \"list", in this context, refers to a balanced parenthetical
2961 grouping, as determined by the syntax table.
2963 If DEPTH is nonzero, treat that as the nesting depth of the starting
2964 point (i.e. the starting point is DEPTH parentheses deep). This
2965 function scans over parentheses until the depth goes to zero COUNT
2966 times. Hence, positive DEPTH moves out that number of levels of
2967 parentheses, while negative DEPTH moves to a deeper level.
2969 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2971 If we reach the beginning or end of the accessible part of the buffer
2972 before we have scanned over COUNT lists, return nil if the depth at
2973 that point is zero, and signal a error if the depth is nonzero. */)
2974 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2976 CHECK_NUMBER (from);
2977 CHECK_NUMBER (count);
2978 CHECK_NUMBER (depth);
2980 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2983 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2984 doc: /* Scan from character number FROM by COUNT balanced expressions.
2985 If COUNT is negative, scan backwards.
2986 Returns the character number of the position thus found.
2988 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2990 If the beginning or end of (the accessible part of) the buffer is reached
2991 in the middle of a parenthetical grouping, an error is signaled.
2992 If the beginning or end is reached between groupings
2993 but before count is used up, nil is returned. */)
2994 (Lisp_Object from, Lisp_Object count)
2996 CHECK_NUMBER (from);
2997 CHECK_NUMBER (count);
2999 return scan_lists (XINT (from), XINT (count), 0, 1);
3002 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3003 0, 0, 0,
3004 doc: /* Move point backward over any number of chars with prefix syntax.
3005 This includes chars with "quote" or "prefix" syntax (' or p). */)
3006 (void)
3008 ptrdiff_t beg = BEGV;
3009 ptrdiff_t opoint = PT;
3010 ptrdiff_t opoint_byte = PT_BYTE;
3011 ptrdiff_t pos = PT;
3012 ptrdiff_t pos_byte = PT_BYTE;
3013 int c;
3015 if (pos <= beg)
3017 SET_PT_BOTH (opoint, opoint_byte);
3019 return Qnil;
3022 SETUP_SYNTAX_TABLE (pos, -1);
3024 DEC_BOTH (pos, pos_byte);
3026 while (!char_quoted (pos, pos_byte)
3027 /* Previous statement updates syntax table. */
3028 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3029 || syntax_prefix_flag_p (c)))
3031 opoint = pos;
3032 opoint_byte = pos_byte;
3034 if (pos + 1 > beg)
3035 DEC_BOTH (pos, pos_byte);
3038 SET_PT_BOTH (opoint, opoint_byte);
3040 return Qnil;
3043 /* Parse forward from FROM / FROM_BYTE to END,
3044 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
3045 and return a description of the state of the parse at END.
3046 If STOPBEFORE, stop at the start of an atom.
3047 If COMMENTSTOP is 1, stop at the start of a comment.
3048 If COMMENTSTOP is -1, stop at the start or end of a comment,
3049 after the beginning of a string, or after the end of a string. */
3051 static void
3052 scan_sexps_forward (struct lisp_parse_state *stateptr,
3053 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3054 EMACS_INT targetdepth, bool stopbefore,
3055 Lisp_Object oldstate, int commentstop)
3057 struct lisp_parse_state state;
3058 enum syntaxcode code;
3059 int c1;
3060 bool comnested;
3061 struct level { ptrdiff_t last, prev; };
3062 struct level levelstart[100];
3063 struct level *curlevel = levelstart;
3064 struct level *endlevel = levelstart + 100;
3065 EMACS_INT depth; /* Paren depth of current scanning location.
3066 level - levelstart equals this except
3067 when the depth becomes negative. */
3068 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3069 bool start_quoted = 0; /* True means starting after a char quote. */
3070 Lisp_Object tem;
3071 ptrdiff_t prev_from; /* Keep one character before FROM. */
3072 ptrdiff_t prev_from_byte;
3073 int prev_from_syntax;
3074 bool boundary_stop = commentstop == -1;
3075 bool nofence;
3076 bool found;
3077 ptrdiff_t out_bytepos, out_charpos;
3078 int temp;
3080 prev_from = from;
3081 prev_from_byte = from_byte;
3082 if (from != BEGV)
3083 DEC_BOTH (prev_from, prev_from_byte);
3085 /* Use this macro instead of `from++'. */
3086 #define INC_FROM \
3087 do { prev_from = from; \
3088 prev_from_byte = from_byte; \
3089 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3090 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3091 INC_BOTH (from, from_byte); \
3092 if (from < end) \
3093 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3094 } while (0)
3096 immediate_quit = 1;
3097 QUIT;
3099 if (NILP (oldstate))
3101 depth = 0;
3102 state.instring = -1;
3103 state.incomment = 0;
3104 state.comstyle = 0; /* comment style a by default. */
3105 state.comstr_start = -1; /* no comment/string seen. */
3107 else
3109 tem = Fcar (oldstate);
3110 if (!NILP (tem))
3111 depth = XINT (tem);
3112 else
3113 depth = 0;
3115 oldstate = Fcdr (oldstate);
3116 oldstate = Fcdr (oldstate);
3117 oldstate = Fcdr (oldstate);
3118 tem = Fcar (oldstate);
3119 /* Check whether we are inside string_fence-style string: */
3120 state.instring = (!NILP (tem)
3121 ? (CHARACTERP (tem) ? XFASTINT (tem) : ST_STRING_STYLE)
3122 : -1);
3124 oldstate = Fcdr (oldstate);
3125 tem = Fcar (oldstate);
3126 state.incomment = (!NILP (tem)
3127 ? (INTEGERP (tem) ? XINT (tem) : -1)
3128 : 0);
3130 oldstate = Fcdr (oldstate);
3131 tem = Fcar (oldstate);
3132 start_quoted = !NILP (tem);
3134 /* if the eighth element of the list is nil, we are in comment
3135 style a. If it is non-nil, we are in comment style b */
3136 oldstate = Fcdr (oldstate);
3137 oldstate = Fcdr (oldstate);
3138 tem = Fcar (oldstate);
3139 state.comstyle = (NILP (tem)
3141 : (RANGED_INTEGERP (0, tem, ST_COMMENT_STYLE)
3142 ? XINT (tem)
3143 : ST_COMMENT_STYLE));
3145 oldstate = Fcdr (oldstate);
3146 tem = Fcar (oldstate);
3147 state.comstr_start =
3148 RANGED_INTEGERP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XINT (tem) : -1;
3149 oldstate = Fcdr (oldstate);
3150 tem = Fcar (oldstate);
3151 while (!NILP (tem)) /* >= second enclosing sexps. */
3153 Lisp_Object temhd = Fcar (tem);
3154 if (RANGED_INTEGERP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3155 curlevel->last = XINT (temhd);
3156 if (++curlevel == endlevel)
3157 curlevel--; /* error ("Nesting too deep for parser"); */
3158 curlevel->prev = -1;
3159 curlevel->last = -1;
3160 tem = Fcdr (tem);
3163 state.quoted = 0;
3164 mindepth = depth;
3166 curlevel->prev = -1;
3167 curlevel->last = -1;
3169 SETUP_SYNTAX_TABLE (prev_from, 1);
3170 temp = FETCH_CHAR (prev_from_byte);
3171 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3172 UPDATE_SYNTAX_TABLE_FORWARD (from);
3174 /* Enter the loop at a place appropriate for initial state. */
3176 if (state.incomment)
3177 goto startincomment;
3178 if (state.instring >= 0)
3180 nofence = state.instring != ST_STRING_STYLE;
3181 if (start_quoted)
3182 goto startquotedinstring;
3183 goto startinstring;
3185 else if (start_quoted)
3186 goto startquoted;
3188 while (from < end)
3190 int syntax;
3191 INC_FROM;
3192 code = prev_from_syntax & 0xff;
3194 if (from < end
3195 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3196 && (c1 = FETCH_CHAR (from_byte),
3197 syntax = SYNTAX_WITH_FLAGS (c1),
3198 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3199 /* Duplicate code to avoid a complex if-expression
3200 which causes trouble for the SGI compiler. */
3202 /* Record the comment style we have entered so that only
3203 the comment-end sequence of the same style actually
3204 terminates the comment section. */
3205 state.comstyle
3206 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3207 comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3208 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3209 state.incomment = comnested ? 1 : -1;
3210 state.comstr_start = prev_from;
3211 INC_FROM;
3212 code = Scomment;
3214 else if (code == Scomment_fence)
3216 /* Record the comment style we have entered so that only
3217 the comment-end sequence of the same style actually
3218 terminates the comment section. */
3219 state.comstyle = ST_COMMENT_STYLE;
3220 state.incomment = -1;
3221 state.comstr_start = prev_from;
3222 code = Scomment;
3224 else if (code == Scomment)
3226 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3227 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3228 1 : -1);
3229 state.comstr_start = prev_from;
3232 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3233 continue;
3234 switch (code)
3236 case Sescape:
3237 case Scharquote:
3238 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3239 curlevel->last = prev_from;
3240 startquoted:
3241 if (from == end) goto endquoted;
3242 INC_FROM;
3243 goto symstarted;
3244 /* treat following character as a word constituent */
3245 case Sword:
3246 case Ssymbol:
3247 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3248 curlevel->last = prev_from;
3249 symstarted:
3250 while (from < end)
3252 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3253 switch (SYNTAX (symchar))
3255 case Scharquote:
3256 case Sescape:
3257 INC_FROM;
3258 if (from == end) goto endquoted;
3259 break;
3260 case Sword:
3261 case Ssymbol:
3262 case Squote:
3263 break;
3264 default:
3265 goto symdone;
3267 INC_FROM;
3269 symdone:
3270 curlevel->prev = curlevel->last;
3271 break;
3273 case Scomment_fence: /* Can't happen because it's handled above. */
3274 case Scomment:
3275 if (commentstop || boundary_stop) goto done;
3276 startincomment:
3277 /* The (from == BEGV) test was to enter the loop in the middle so
3278 that we find a 2-char comment ender even if we start in the
3279 middle of it. We don't want to do that if we're just at the
3280 beginning of the comment (think of (*) ... (*)). */
3281 found = forw_comment (from, from_byte, end,
3282 state.incomment, state.comstyle,
3283 (from == BEGV || from < state.comstr_start + 3)
3284 ? 0 : prev_from_syntax,
3285 &out_charpos, &out_bytepos, &state.incomment);
3286 from = out_charpos; from_byte = out_bytepos;
3287 /* Beware! prev_from and friends are invalid now.
3288 Luckily, the `done' doesn't use them and the INC_FROM
3289 sets them to a sane value without looking at them. */
3290 if (!found) goto done;
3291 INC_FROM;
3292 state.incomment = 0;
3293 state.comstyle = 0; /* reset the comment style */
3294 if (boundary_stop) goto done;
3295 break;
3297 case Sopen:
3298 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3299 depth++;
3300 /* curlevel++->last ran into compiler bug on Apollo */
3301 curlevel->last = prev_from;
3302 if (++curlevel == endlevel)
3303 curlevel--; /* error ("Nesting too deep for parser"); */
3304 curlevel->prev = -1;
3305 curlevel->last = -1;
3306 if (targetdepth == depth) goto done;
3307 break;
3309 case Sclose:
3310 depth--;
3311 if (depth < mindepth)
3312 mindepth = depth;
3313 if (curlevel != levelstart)
3314 curlevel--;
3315 curlevel->prev = curlevel->last;
3316 if (targetdepth == depth) goto done;
3317 break;
3319 case Sstring:
3320 case Sstring_fence:
3321 state.comstr_start = from - 1;
3322 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3323 curlevel->last = prev_from;
3324 state.instring = (code == Sstring
3325 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3326 : ST_STRING_STYLE);
3327 if (boundary_stop) goto done;
3328 startinstring:
3330 nofence = state.instring != ST_STRING_STYLE;
3332 while (1)
3334 int c;
3335 enum syntaxcode c_code;
3337 if (from >= end) goto done;
3338 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3339 c_code = SYNTAX (c);
3341 /* Check C_CODE here so that if the char has
3342 a syntax-table property which says it is NOT
3343 a string character, it does not end the string. */
3344 if (nofence && c == state.instring && c_code == Sstring)
3345 break;
3347 switch (c_code)
3349 case Sstring_fence:
3350 if (!nofence) goto string_end;
3351 break;
3352 case Scharquote:
3353 case Sescape:
3354 INC_FROM;
3355 startquotedinstring:
3356 if (from >= end) goto endquoted;
3358 INC_FROM;
3361 string_end:
3362 state.instring = -1;
3363 curlevel->prev = curlevel->last;
3364 INC_FROM;
3365 if (boundary_stop) goto done;
3366 break;
3368 case Smath:
3369 /* FIXME: We should do something with it. */
3370 break;
3371 default:
3372 /* Ignore whitespace, punctuation, quote, endcomment. */
3373 break;
3376 goto done;
3378 stop: /* Here if stopping before start of sexp. */
3379 from = prev_from; /* We have just fetched the char that starts it; */
3380 from_byte = prev_from_byte;
3381 goto done; /* but return the position before it. */
3383 endquoted:
3384 state.quoted = 1;
3385 done:
3386 state.depth = depth;
3387 state.mindepth = mindepth;
3388 state.thislevelstart = curlevel->prev;
3389 state.prevlevelstart
3390 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3391 state.location = from;
3392 state.location_byte = from_byte;
3393 state.levelstarts = Qnil;
3394 while (curlevel > levelstart)
3395 state.levelstarts = Fcons (make_number ((--curlevel)->last),
3396 state.levelstarts);
3397 immediate_quit = 0;
3399 *stateptr = state;
3402 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3403 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3404 Parsing stops at TO or when certain criteria are met;
3405 point is set to where parsing stops.
3406 If fifth arg OLDSTATE is omitted or nil,
3407 parsing assumes that FROM is the beginning of a function.
3408 Value is a list of elements describing final state of parsing:
3409 0. depth in parens.
3410 1. character address of start of innermost containing list; nil if none.
3411 2. character address of start of last complete sexp terminated.
3412 3. non-nil if inside a string.
3413 (it is the character that will terminate the string,
3414 or t if the string should be terminated by a generic string delimiter.)
3415 4. nil if outside a comment, t if inside a non-nestable comment,
3416 else an integer (the current comment nesting).
3417 5. t if following a quote character.
3418 6. the minimum paren-depth encountered during this scan.
3419 7. style of comment, if any.
3420 8. character address of start of comment or string; nil if not in one.
3421 9. Intermediate data for continuation of parsing (subject to change).
3422 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3423 in parentheses becomes equal to TARGETDEPTH.
3424 Fourth arg STOPBEFORE non-nil means stop when come to
3425 any character that starts a sexp.
3426 Fifth arg OLDSTATE is a list like what this function returns.
3427 It is used to initialize the state of the parse. Elements number 1, 2, 6
3428 are ignored.
3429 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3430 If it is symbol `syntax-table', stop after the start of a comment or a
3431 string, or after end of a comment or a string. */)
3432 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3433 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3435 struct lisp_parse_state state;
3436 EMACS_INT target;
3438 if (!NILP (targetdepth))
3440 CHECK_NUMBER (targetdepth);
3441 target = XINT (targetdepth);
3443 else
3444 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth */
3446 validate_region (&from, &to);
3447 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3448 XINT (to),
3449 target, !NILP (stopbefore), oldstate,
3450 (NILP (commentstop)
3451 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3453 SET_PT_BOTH (state.location, state.location_byte);
3455 return Fcons (make_number (state.depth),
3456 Fcons (state.prevlevelstart < 0
3457 ? Qnil : make_number (state.prevlevelstart),
3458 Fcons (state.thislevelstart < 0
3459 ? Qnil : make_number (state.thislevelstart),
3460 Fcons (state.instring >= 0
3461 ? (state.instring == ST_STRING_STYLE
3462 ? Qt : make_number (state.instring)) : Qnil,
3463 Fcons (state.incomment < 0 ? Qt :
3464 (state.incomment == 0 ? Qnil :
3465 make_number (state.incomment)),
3466 Fcons (state.quoted ? Qt : Qnil,
3467 Fcons (make_number (state.mindepth),
3468 Fcons ((state.comstyle
3469 ? (state.comstyle == ST_COMMENT_STYLE
3470 ? Qsyntax_table
3471 : make_number (state.comstyle))
3472 : Qnil),
3473 Fcons (((state.incomment
3474 || (state.instring >= 0))
3475 ? make_number (state.comstr_start)
3476 : Qnil),
3477 Fcons (state.levelstarts, Qnil))))))))));
3480 void
3481 init_syntax_once (void)
3483 register int i, c;
3484 Lisp_Object temp;
3486 /* This has to be done here, before we call Fmake_char_table. */
3487 DEFSYM (Qsyntax_table, "syntax-table");
3489 /* This variable is DEFSYMed in alloc.c and not initialized yet, so
3490 intern it here. NOTE: you must guarantee that init_syntax_once
3491 is called before all other users of this variable. */
3492 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3494 /* Create objects which can be shared among syntax tables. */
3495 Vsyntax_code_object = make_uninit_vector (Smax);
3496 for (i = 0; i < Smax; i++)
3497 ASET (Vsyntax_code_object, i, Fcons (make_number (i), Qnil));
3499 /* Now we are ready to set up this property, so we can
3500 create syntax tables. */
3501 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3503 temp = AREF (Vsyntax_code_object, Swhitespace);
3505 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3507 /* Control characters should not be whitespace. */
3508 temp = AREF (Vsyntax_code_object, Spunct);
3509 for (i = 0; i <= ' ' - 1; i++)
3510 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3511 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3513 /* Except that a few really are whitespace. */
3514 temp = AREF (Vsyntax_code_object, Swhitespace);
3515 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3516 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3517 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3518 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3519 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3521 temp = AREF (Vsyntax_code_object, Sword);
3522 for (i = 'a'; i <= 'z'; i++)
3523 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3524 for (i = 'A'; i <= 'Z'; i++)
3525 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3526 for (i = '0'; i <= '9'; i++)
3527 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3529 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3530 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3532 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3533 Fcons (make_number (Sopen), make_number (')')));
3534 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3535 Fcons (make_number (Sclose), make_number ('(')));
3536 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3537 Fcons (make_number (Sopen), make_number (']')));
3538 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3539 Fcons (make_number (Sclose), make_number ('[')));
3540 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3541 Fcons (make_number (Sopen), make_number ('}')));
3542 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3543 Fcons (make_number (Sclose), make_number ('{')));
3544 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3545 Fcons (make_number (Sstring), Qnil));
3546 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3547 Fcons (make_number (Sescape), Qnil));
3549 temp = AREF (Vsyntax_code_object, Ssymbol);
3550 for (i = 0; i < 10; i++)
3552 c = "_-+*/&|<>="[i];
3553 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3556 temp = AREF (Vsyntax_code_object, Spunct);
3557 for (i = 0; i < 12; i++)
3559 c = ".,;:?!#@~^'`"[i];
3560 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3563 /* All multibyte characters have syntax `word' by default. */
3564 temp = AREF (Vsyntax_code_object, Sword);
3565 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3568 void
3569 syms_of_syntax (void)
3571 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3573 staticpro (&Vsyntax_code_object);
3575 staticpro (&gl_state.object);
3576 staticpro (&gl_state.global_code);
3577 staticpro (&gl_state.current_syntax_table);
3578 staticpro (&gl_state.old_prop);
3580 /* Defined in regex.c */
3581 staticpro (&re_match_object);
3583 DEFSYM (Qscan_error, "scan-error");
3584 Fput (Qscan_error, Qerror_conditions,
3585 listn (CONSTYPE_PURE, 2, Qscan_error, Qerror));
3586 Fput (Qscan_error, Qerror_message,
3587 build_pure_c_string ("Scan error"));
3589 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3590 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3592 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3593 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3594 Otherwise, that text property is simply ignored.
3595 See the info node `(elisp)Syntax Properties' for a description of the
3596 `syntax-table' property. */);
3598 words_include_escapes = 0;
3599 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3600 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3602 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3603 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3604 multibyte_syntax_as_symbol = 0;
3606 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3607 open_paren_in_column_0_is_defun_start,
3608 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3609 open_paren_in_column_0_is_defun_start = 1;
3612 DEFVAR_LISP ("find-word-boundary-function-table",
3613 Vfind_word_boundary_function_table,
3614 doc: /*
3615 Char table of functions to search for the word boundary.
3616 Each function is called with two arguments; POS and LIMIT.
3617 POS and LIMIT are character positions in the current buffer.
3619 If POS is less than LIMIT, POS is at the first character of a word,
3620 and the return value of a function is a position after the last
3621 character of that word.
3623 If POS is not less than LIMIT, POS is at the last character of a word,
3624 and the return value of a function is a position at the first
3625 character of that word.
3627 In both cases, LIMIT bounds the search. */);
3628 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3630 defsubr (&Ssyntax_table_p);
3631 defsubr (&Ssyntax_table);
3632 defsubr (&Sstandard_syntax_table);
3633 defsubr (&Scopy_syntax_table);
3634 defsubr (&Sset_syntax_table);
3635 defsubr (&Schar_syntax);
3636 defsubr (&Smatching_paren);
3637 defsubr (&Sstring_to_syntax);
3638 defsubr (&Smodify_syntax_entry);
3639 defsubr (&Sinternal_describe_syntax_value);
3641 defsubr (&Sforward_word);
3643 defsubr (&Sskip_chars_forward);
3644 defsubr (&Sskip_chars_backward);
3645 defsubr (&Sskip_syntax_forward);
3646 defsubr (&Sskip_syntax_backward);
3648 defsubr (&Sforward_comment);
3649 defsubr (&Sscan_lists);
3650 defsubr (&Sscan_sexps);
3651 defsubr (&Sbackward_prefix_chars);
3652 defsubr (&Sparse_partial_sexp);