1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2013 Free
3 Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
23 #define SYNTAX_INLINE EXTERN_INLINE
25 #include <sys/types.h>
29 #include "character.h"
34 /* Make syntax table lookup grant data in gl_state. */
35 #define SYNTAX_ENTRY_VIA_PROPERTY
38 #include "intervals.h"
41 /* Eight single-bit flags have the following meanings:
42 1. This character is the first of a two-character comment-start sequence.
43 2. This character is the second of a two-character comment-start sequence.
44 3. This character is the first of a two-character comment-end sequence.
45 4. This character is the second of a two-character comment-end sequence.
46 5. This character is a prefix, for backward-prefix-chars.
47 6. The char is part of a delimiter for comments of style "b".
48 7. This character is part of a nestable comment sequence.
49 8. The char is part of a delimiter for comments of style "c".
50 Note that any two-character sequence whose first character has flag 1
51 and whose second character has flag 2 will be interpreted as a comment start.
53 Bits 6 and 8 discriminate among different comment styles.
54 Languages such as C++ allow two orthogonal syntax start/end pairs
55 and bit 6 determines whether a comment-end or Scommentend
56 ends style a or b. Comment markers can start style a, b, c, or bc.
57 Style a is always the default.
58 For 2-char comment markers, the style b flag is looked up only on the second
59 char of the comment marker and on the first char of the comment ender.
60 For style c (like the nested flag), the flag can be placed on any of
63 /* These functions extract specific flags from an integer
64 that holds the syntax code and the flags. */
67 SYNTAX_FLAGS_COMSTART_FIRST (int flags
)
69 return (flags
>> 16) & 1;
72 SYNTAX_FLAGS_COMSTART_SECOND (int flags
)
74 return (flags
>> 17) & 1;
77 SYNTAX_FLAGS_COMEND_FIRST (int flags
)
79 return (flags
>> 18) & 1;
82 SYNTAX_FLAGS_COMEND_SECOND (int flags
)
84 return (flags
>> 19) & 1;
87 SYNTAX_FLAGS_PREFIX (int flags
)
89 return (flags
>> 20) & 1;
92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags
)
94 return (flags
>> 21) & 1;
97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags
)
99 return (flags
>> 23) & 1;
102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags
)
104 return (flags
>> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
107 SYNTAX_FLAGS_COMMENT_NESTED (int flags
)
109 return (flags
>> 22) & 1;
112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
113 the second for comstart and the first for comend. */
115 SYNTAX_FLAGS_COMMENT_STYLE (int flags
, int other_flags
)
117 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags
)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags
)
119 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags
));
122 /* Extract a particular flag for a given character. */
125 SYNTAX_COMEND_FIRST (int c
)
127 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c
));
130 /* We use these constants in place for comment-style and
131 string-ender-char to distinguish comments/strings started by
132 comment_fence and string_fence codes. */
136 ST_COMMENT_STYLE
= 256 + 1,
137 ST_STRING_STYLE
= 256 + 2
140 static Lisp_Object Qsyntax_table_p
;
141 static Lisp_Object Qsyntax_table
, Qscan_error
;
143 /* This is the internal form of the parse state used in parse-partial-sexp. */
145 struct lisp_parse_state
147 EMACS_INT depth
; /* Depth at end of parsing. */
148 int instring
; /* -1 if not within string, else desired terminator. */
149 EMACS_INT incomment
; /* -1 if in unnestable comment else comment nesting */
150 int comstyle
; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
151 bool quoted
; /* True if just after an escape char at end of parsing. */
152 EMACS_INT mindepth
; /* Minimum depth seen while scanning. */
153 /* Char number of most recent start-of-expression at current level */
154 ptrdiff_t thislevelstart
;
155 /* Char number of start of containing expression */
156 ptrdiff_t prevlevelstart
;
157 ptrdiff_t location
; /* Char number at which parsing stopped. */
158 ptrdiff_t location_byte
; /* Corresponding byte position. */
159 ptrdiff_t comstr_start
; /* Position of last comment/string starter. */
160 Lisp_Object levelstarts
; /* Char numbers of starts-of-expression
161 of levels (starting from outermost). */
164 /* These variables are a cache for finding the start of a defun.
165 find_start_pos is the place for which the defun start was found.
166 find_start_value is the defun start position found for it.
167 find_start_value_byte is the corresponding byte position.
168 find_start_buffer is the buffer it was found in.
169 find_start_begv is the BEGV value when it was found.
170 find_start_modiff is the value of MODIFF when it was found. */
172 static ptrdiff_t find_start_pos
;
173 static ptrdiff_t find_start_value
;
174 static ptrdiff_t find_start_value_byte
;
175 static struct buffer
*find_start_buffer
;
176 static ptrdiff_t find_start_begv
;
177 static EMACS_INT find_start_modiff
;
180 static Lisp_Object
skip_chars (bool, Lisp_Object
, Lisp_Object
, bool);
181 static Lisp_Object
skip_syntaxes (bool, Lisp_Object
, Lisp_Object
);
182 static Lisp_Object
scan_lists (EMACS_INT
, EMACS_INT
, EMACS_INT
, bool);
183 static void scan_sexps_forward (struct lisp_parse_state
*,
184 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT
,
185 bool, Lisp_Object
, int);
186 static bool in_classes (int, Lisp_Object
);
188 /* This setter is used only in this file, so it can be private. */
190 bset_syntax_table (struct buffer
*b
, Lisp_Object val
)
192 b
->INTERNAL_FIELD (syntax_table
) = val
;
195 /* Whether the syntax of the character C has the prefix flag set. */
197 syntax_prefix_flag_p (int c
)
199 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c
));
202 struct gl_state_s gl_state
; /* Global state of syntax parser. */
204 enum { INTERVALS_AT_ONCE
= 10 }; /* 1 + max-number of intervals
205 to scan to property-change. */
207 /* Set the syntax entry VAL for char C in table TABLE. */
210 SET_RAW_SYNTAX_ENTRY (Lisp_Object table
, int c
, Lisp_Object val
)
212 CHAR_TABLE_SET (table
, c
, val
);
215 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
216 RANGE is a cons (FROM . TO) specifying the range of characters. */
219 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table
, Lisp_Object range
,
222 Fset_char_table_range (table
, range
, val
);
225 /* Extract the information from the entry for character C
226 in the current syntax table. */
231 Lisp_Object ent
= SYNTAX_ENTRY (c
);
232 return CONSP (ent
) ? XCDR (ent
) : Qnil
;
235 /* This should be called with FROM at the start of forward
236 search, or after the last position of the backward search. It
237 makes sure that the first char is picked up with correct table, so
238 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
240 Sign of COUNT gives the direction of the search.
244 SETUP_SYNTAX_TABLE (ptrdiff_t from
, ptrdiff_t count
)
246 SETUP_BUFFER_SYNTAX_TABLE ();
247 gl_state
.b_property
= BEGV
;
248 gl_state
.e_property
= ZV
+ 1;
249 gl_state
.object
= Qnil
;
251 if (parse_sexp_lookup_properties
)
252 if (count
> 0 || from
> BEGV
)
253 update_syntax_table (count
> 0 ? from
: from
- 1, count
, 1, Qnil
);
256 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
257 If it is t (which is only used in fast_c_string_match_ignore_case),
258 ignore properties altogether.
260 This is meant for regex.c to use. For buffers, regex.c passes arguments
261 to the UPDATE_SYNTAX_TABLE functions which are relative to BEGV.
262 So if it is a buffer, we set the offset field to BEGV. */
265 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object
,
266 ptrdiff_t from
, ptrdiff_t count
)
268 SETUP_BUFFER_SYNTAX_TABLE ();
269 gl_state
.object
= object
;
270 if (BUFFERP (gl_state
.object
))
272 struct buffer
*buf
= XBUFFER (gl_state
.object
);
273 gl_state
.b_property
= 1;
274 gl_state
.e_property
= BUF_ZV (buf
) - BUF_BEGV (buf
) + 1;
275 gl_state
.offset
= BUF_BEGV (buf
) - 1;
277 else if (NILP (gl_state
.object
))
279 gl_state
.b_property
= 1;
280 gl_state
.e_property
= ZV
- BEGV
+ 1;
281 gl_state
.offset
= BEGV
- 1;
283 else if (EQ (gl_state
.object
, Qt
))
285 gl_state
.b_property
= 0;
286 gl_state
.e_property
= PTRDIFF_MAX
;
291 gl_state
.b_property
= 0;
292 gl_state
.e_property
= 1 + SCHARS (gl_state
.object
);
295 if (parse_sexp_lookup_properties
)
296 update_syntax_table (from
+ gl_state
.offset
- (count
<= 0),
297 count
, 1, gl_state
.object
);
300 /* Update gl_state to an appropriate interval which contains CHARPOS. The
301 sign of COUNT give the relative position of CHARPOS wrt the previously
302 valid interval. If INIT, only [be]_property fields of gl_state are
303 valid at start, the rest is filled basing on OBJECT.
305 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
306 direction than the intervals - or in an interval. We update the
307 current syntax-table basing on the property of this interval, and
308 update the interval to start further than CHARPOS - or be
309 NULL. We also update lim_property to be the next value of
310 charpos to call this subroutine again - or be before/after the
311 start/end of OBJECT. */
314 update_syntax_table (ptrdiff_t charpos
, EMACS_INT count
, bool init
,
317 Lisp_Object tmp_table
;
324 gl_state
.old_prop
= Qnil
;
325 gl_state
.start
= gl_state
.b_property
;
326 gl_state
.stop
= gl_state
.e_property
;
327 i
= interval_of (charpos
, object
);
328 gl_state
.backward_i
= gl_state
.forward_i
= i
;
332 /* interval_of updates only ->position of the return value, so
333 update the parents manually to speed up update_interval. */
334 while (!NULL_PARENT (i
))
336 if (AM_RIGHT_CHILD (i
))
337 INTERVAL_PARENT (i
)->position
= i
->position
338 - LEFT_TOTAL_LENGTH (i
) + TOTAL_LENGTH (i
) /* right end */
339 - TOTAL_LENGTH (INTERVAL_PARENT (i
))
340 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i
));
342 INTERVAL_PARENT (i
)->position
= i
->position
- LEFT_TOTAL_LENGTH (i
)
344 i
= INTERVAL_PARENT (i
);
346 i
= gl_state
.forward_i
;
347 gl_state
.b_property
= i
->position
- gl_state
.offset
;
348 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
351 i
= count
> 0 ? gl_state
.forward_i
: gl_state
.backward_i
;
353 /* We are guaranteed to be called with CHARPOS either in i,
356 error ("Error in syntax_table logic for to-the-end intervals");
357 else if (charpos
< i
->position
) /* Move left. */
360 error ("Error in syntax_table logic for intervals <-");
361 /* Update the interval. */
362 i
= update_interval (i
, charpos
);
363 if (INTERVAL_LAST_POS (i
) != gl_state
.b_property
)
366 gl_state
.forward_i
= i
;
367 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
370 else if (charpos
>= INTERVAL_LAST_POS (i
)) /* Move right. */
373 error ("Error in syntax_table logic for intervals ->");
374 /* Update the interval. */
375 i
= update_interval (i
, charpos
);
376 if (i
->position
!= gl_state
.e_property
)
379 gl_state
.backward_i
= i
;
380 gl_state
.b_property
= i
->position
- gl_state
.offset
;
385 tmp_table
= textget (i
->plist
, Qsyntax_table
);
388 invalidate
= !EQ (tmp_table
, gl_state
.old_prop
); /* Need to invalidate? */
390 if (invalidate
) /* Did not get to adjacent interval. */
391 { /* with the same table => */
392 /* invalidate the old range. */
395 gl_state
.backward_i
= i
;
396 gl_state
.b_property
= i
->position
- gl_state
.offset
;
400 gl_state
.forward_i
= i
;
401 gl_state
.e_property
= INTERVAL_LAST_POS (i
) - gl_state
.offset
;
405 if (!EQ (tmp_table
, gl_state
.old_prop
))
407 gl_state
.current_syntax_table
= tmp_table
;
408 gl_state
.old_prop
= tmp_table
;
409 if (EQ (Fsyntax_table_p (tmp_table
), Qt
))
411 gl_state
.use_global
= 0;
413 else if (CONSP (tmp_table
))
415 gl_state
.use_global
= 1;
416 gl_state
.global_code
= tmp_table
;
420 gl_state
.use_global
= 0;
421 gl_state
.current_syntax_table
= BVAR (current_buffer
, syntax_table
);
427 if (cnt
&& !EQ (tmp_table
, textget (i
->plist
, Qsyntax_table
)))
431 gl_state
.e_property
= i
->position
- gl_state
.offset
;
432 gl_state
.forward_i
= i
;
437 = i
->position
+ LENGTH (i
) - gl_state
.offset
;
438 gl_state
.backward_i
= i
;
442 else if (cnt
== INTERVALS_AT_ONCE
)
447 = i
->position
+ LENGTH (i
) - gl_state
.offset
448 /* e_property at EOB is not set to ZV but to ZV+1, so that
449 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
450 having to check eob between the two. */
451 + (next_interval (i
) ? 0 : 1);
452 gl_state
.forward_i
= i
;
456 gl_state
.b_property
= i
->position
- gl_state
.offset
;
457 gl_state
.backward_i
= i
;
462 i
= count
> 0 ? next_interval (i
) : previous_interval (i
);
464 eassert (i
== NULL
); /* This property goes to the end. */
466 gl_state
.e_property
= gl_state
.stop
;
468 gl_state
.b_property
= gl_state
.start
;
471 /* Returns true if char at CHARPOS is quoted.
472 Global syntax-table data should be set up already to be good at CHARPOS
473 or after. On return global syntax data is good for lookup at CHARPOS. */
476 char_quoted (ptrdiff_t charpos
, ptrdiff_t bytepos
)
478 enum syntaxcode code
;
479 ptrdiff_t beg
= BEGV
;
481 ptrdiff_t orig
= charpos
;
483 while (charpos
> beg
)
486 DEC_BOTH (charpos
, bytepos
);
488 UPDATE_SYNTAX_TABLE_BACKWARD (charpos
);
489 c
= FETCH_CHAR_AS_MULTIBYTE (bytepos
);
491 if (! (code
== Scharquote
|| code
== Sescape
))
497 UPDATE_SYNTAX_TABLE (orig
);
501 /* Return the bytepos one character before BYTEPOS.
502 We assume that BYTEPOS is not at the start of the buffer. */
505 dec_bytepos (ptrdiff_t bytepos
)
507 if (NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
514 /* Return a defun-start position before POS and not too far before.
515 It should be the last one before POS, or nearly the last.
517 When open_paren_in_column_0_is_defun_start is nonzero,
518 only the beginning of the buffer is treated as a defun-start.
520 We record the information about where the scan started
521 and what its result was, so that another call in the same area
522 can return the same value very quickly.
524 There is no promise at which position the global syntax data is
525 valid on return from the subroutine, so the caller should explicitly
526 update the global data. */
529 find_defun_start (ptrdiff_t pos
, ptrdiff_t pos_byte
)
531 ptrdiff_t opoint
= PT
, opoint_byte
= PT_BYTE
;
533 if (!open_paren_in_column_0_is_defun_start
)
535 find_start_value
= BEGV
;
536 find_start_value_byte
= BEGV_BYTE
;
537 find_start_buffer
= current_buffer
;
538 find_start_modiff
= MODIFF
;
539 find_start_begv
= BEGV
;
540 find_start_pos
= pos
;
544 /* Use previous finding, if it's valid and applies to this inquiry. */
545 if (current_buffer
== find_start_buffer
546 /* Reuse the defun-start even if POS is a little farther on.
547 POS might be in the next defun, but that's ok.
548 Our value may not be the best possible, but will still be usable. */
549 && pos
<= find_start_pos
+ 1000
550 && pos
>= find_start_value
551 && BEGV
== find_start_begv
552 && MODIFF
== find_start_modiff
)
553 return find_start_value
;
555 /* Back up to start of line. */
556 scan_newline (pos
, pos_byte
, BEGV
, BEGV_BYTE
, -1, 1);
558 /* We optimize syntax-table lookup for rare updates. Thus we accept
559 only those `^\s(' which are good in global _and_ text-property
561 SETUP_BUFFER_SYNTAX_TABLE ();
566 /* Open-paren at start of line means we may have found our
568 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
569 if (SYNTAX (c
) == Sopen
)
571 SETUP_SYNTAX_TABLE (PT
+ 1, -1); /* Try again... */
572 c
= FETCH_CHAR_AS_MULTIBYTE (PT_BYTE
);
573 if (SYNTAX (c
) == Sopen
)
575 /* Now fallback to the default value. */
576 SETUP_BUFFER_SYNTAX_TABLE ();
578 /* Move to beg of previous line. */
579 scan_newline (PT
, PT_BYTE
, BEGV
, BEGV_BYTE
, -2, 1);
582 /* Record what we found, for the next try. */
583 find_start_value
= PT
;
584 find_start_value_byte
= PT_BYTE
;
585 find_start_buffer
= current_buffer
;
586 find_start_modiff
= MODIFF
;
587 find_start_begv
= BEGV
;
588 find_start_pos
= pos
;
590 TEMP_SET_PT_BOTH (opoint
, opoint_byte
);
592 return find_start_value
;
595 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
598 prev_char_comend_first (ptrdiff_t pos
, ptrdiff_t pos_byte
)
603 DEC_BOTH (pos
, pos_byte
);
604 UPDATE_SYNTAX_TABLE_BACKWARD (pos
);
605 c
= FETCH_CHAR (pos_byte
);
606 val
= SYNTAX_COMEND_FIRST (c
);
607 UPDATE_SYNTAX_TABLE_FORWARD (pos
+ 1);
611 /* Check whether charpos FROM is at the end of a comment.
612 FROM_BYTE is the bytepos corresponding to FROM.
613 Do not move back before STOP.
615 Return true if we find a comment ending at FROM/FROM_BYTE.
617 If successful, store the charpos of the comment's beginning
618 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
620 Global syntax data remains valid for backward search starting at
621 the returned value (or at FROM, if the search was not successful). */
624 back_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
625 bool comnested
, int comstyle
, ptrdiff_t *charpos_ptr
,
626 ptrdiff_t *bytepos_ptr
)
628 /* Look back, counting the parity of string-quotes,
629 and recording the comment-starters seen.
630 When we reach a safe place, assume that's not in a string;
631 then step the main scan to the earliest comment-starter seen
632 an even number of string quotes away from the safe place.
634 OFROM[I] is position of the earliest comment-starter seen
635 which is I+2X quotes from the comment-end.
636 PARITY is current parity of quotes from the comment end. */
637 int string_style
= -1; /* Presumed outside of any string. */
638 bool string_lossage
= 0;
639 /* Not a real lossage: indicates that we have passed a matching comment
640 starter plus a non-matching comment-ender, meaning that any matching
641 comment-starter we might see later could be a false positive (hidden
642 inside another comment).
643 Test case: { a (* b } c (* d *) */
644 bool comment_lossage
= 0;
645 ptrdiff_t comment_end
= from
;
646 ptrdiff_t comment_end_byte
= from_byte
;
647 ptrdiff_t comstart_pos
= 0;
648 ptrdiff_t comstart_byte
IF_LINT (= 0);
649 /* Place where the containing defun starts,
650 or 0 if we didn't come across it yet. */
651 ptrdiff_t defun_start
= 0;
652 ptrdiff_t defun_start_byte
= 0;
653 enum syntaxcode code
;
654 ptrdiff_t nesting
= 1; /* current comment nesting */
658 /* FIXME: A }} comment-ender style leads to incorrect behavior
659 in the case of {{ c }}} because we ignore the last two chars which are
660 assumed to be comment-enders although they aren't. */
662 /* At beginning of range to scan, we're outside of strings;
663 that determines quote parity to the comment-end. */
668 bool com2start
, com2end
, comstart
;
670 /* Move back and examine a character. */
671 DEC_BOTH (from
, from_byte
);
672 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
674 prev_syntax
= syntax
;
675 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
676 syntax
= SYNTAX_WITH_FLAGS (c
);
679 /* Check for 2-char comment markers. */
680 com2start
= (SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
681 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax
)
683 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax
, syntax
))
684 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
)
685 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
);
686 com2end
= (SYNTAX_FLAGS_COMEND_FIRST (syntax
)
687 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax
));
688 comstart
= (com2start
|| code
== Scomment
);
690 /* Nasty cases with overlapping 2-char comment markers:
691 - snmp-mode: -- c -- foo -- c --
699 /* If a 2-char comment sequence partly overlaps with another,
700 we don't try to be clever. E.g. |*| in C, or }% in modes that
701 have %..\n and %{..}%. */
702 if (from
> stop
&& (com2end
|| comstart
))
704 ptrdiff_t next
= from
, next_byte
= from_byte
;
705 int next_c
, next_syntax
;
706 DEC_BOTH (next
, next_byte
);
707 UPDATE_SYNTAX_TABLE_BACKWARD (next
);
708 next_c
= FETCH_CHAR_AS_MULTIBYTE (next_byte
);
709 next_syntax
= SYNTAX_WITH_FLAGS (next_c
);
710 if (((comstart
|| comnested
)
711 && SYNTAX_FLAGS_COMEND_SECOND (syntax
)
712 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax
))
713 || ((com2end
|| comnested
)
714 && SYNTAX_FLAGS_COMSTART_SECOND (syntax
)
716 == SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_syntax
))
717 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax
)))
719 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
722 if (com2start
&& comstart_pos
== 0)
723 /* We're looking at a comment starter. But it might be a comment
724 ender as well (see snmp-mode). The first time we see one, we
725 need to consider it as a comment starter,
726 and the subsequent times as a comment ender. */
729 /* Turn a 2-char comment sequences into the appropriate syntax. */
734 /* Ignore comment starters of a different style. */
735 else if (code
== Scomment
736 && (comstyle
!= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0)
737 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
) != comnested
))
740 /* Ignore escaped characters, except comment-enders. */
741 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
748 c
= (code
== Sstring_fence
? ST_STRING_STYLE
: ST_COMMENT_STYLE
);
750 /* Track parity of quotes. */
751 if (string_style
== -1)
752 /* Entering a string. */
754 else if (string_style
== c
)
755 /* Leaving the string. */
758 /* If we have two kinds of string delimiters.
759 There's no way to grok this scanning backwards. */
764 /* We've already checked that it is the relevant comstyle. */
765 if (string_style
!= -1 || comment_lossage
|| string_lossage
)
766 /* There are odd string quotes involved, so let's be careful.
767 Test case in Pascal: " { " a { " } */
772 /* Record best comment-starter so far. */
774 comstart_byte
= from_byte
;
776 else if (--nesting
<= 0)
777 /* nested comments have to be balanced, so we don't need to
778 keep looking for earlier ones. We use here the same (slightly
779 incorrect) reasoning as below: since it is followed by uniform
780 paired string quotes, this comment-start has to be outside of
781 strings, else the comment-end itself would be inside a string. */
786 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == comstyle
787 && ((com2end
&& SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax
))
788 || SYNTAX_FLAGS_COMMENT_NESTED (syntax
)) == comnested
)
789 /* This is the same style of comment ender as ours. */
794 /* Anything before that can't count because it would match
795 this comment-ender rather than ours. */
796 from
= stop
; /* Break out of the loop. */
798 else if (comstart_pos
!= 0 || c
!= '\n')
799 /* We're mixing comment styles here, so we'd better be careful.
800 The (comstart_pos != 0 || c != '\n') check is not quite correct
801 (we should just always set comment_lossage), but removing it
802 would imply that any multiline comment in C would go through
803 lossage, which seems overkill.
804 The failure should only happen in the rare cases such as
810 /* Assume a defun-start point is outside of strings. */
811 if (open_paren_in_column_0_is_defun_start
813 || (temp_byte
= dec_bytepos (from_byte
),
814 FETCH_CHAR (temp_byte
) == '\n')))
817 defun_start_byte
= from_byte
;
818 from
= stop
; /* Break out of the loop. */
827 if (comstart_pos
== 0)
830 from_byte
= comment_end_byte
;
831 UPDATE_SYNTAX_TABLE_FORWARD (comment_end
- 1);
833 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
834 or `done'), then we've found the beginning of the non-nested comment. */
835 else if (1) /* !comnested */
838 from_byte
= comstart_byte
;
839 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
843 struct lisp_parse_state state
;
845 /* We had two kinds of string delimiters mixed up
846 together. Decode this going forwards.
847 Scan fwd from a known safe place (beginning-of-defun)
848 to the one in question; this records where we
849 last passed a comment starter. */
850 /* If we did not already find the defun start, find it now. */
851 if (defun_start
== 0)
853 defun_start
= find_defun_start (comment_end
, comment_end_byte
);
854 defun_start_byte
= find_start_value_byte
;
858 scan_sexps_forward (&state
,
859 defun_start
, defun_start_byte
,
860 comment_end
, TYPE_MINIMUM (EMACS_INT
),
862 defun_start
= comment_end
;
863 if (state
.incomment
== (comnested
? 1 : -1)
864 && state
.comstyle
== comstyle
)
865 from
= state
.comstr_start
;
870 /* If comment_end is inside some other comment, maybe ours
871 is nested, so we need to try again from within the
872 surrounding comment. Example: { a (* " *) */
874 /* FIXME: We should advance by one or two chars. */
875 defun_start
= state
.comstr_start
+ 2;
876 defun_start_byte
= CHAR_TO_BYTE (defun_start
);
879 } while (defun_start
< comment_end
);
881 from_byte
= CHAR_TO_BYTE (from
);
882 UPDATE_SYNTAX_TABLE_FORWARD (from
- 1);
887 *bytepos_ptr
= from_byte
;
889 return from
!= comment_end
;
892 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
893 doc
: /* Return t if OBJECT is a syntax table.
894 Currently, any char-table counts as a syntax table. */)
897 if (CHAR_TABLE_P (object
)
898 && EQ (XCHAR_TABLE (object
)->purpose
, Qsyntax_table
))
904 check_syntax_table (Lisp_Object obj
)
906 CHECK_TYPE (CHAR_TABLE_P (obj
) && EQ (XCHAR_TABLE (obj
)->purpose
, Qsyntax_table
),
907 Qsyntax_table_p
, obj
);
910 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
911 doc
: /* Return the current syntax table.
912 This is the one specified by the current buffer. */)
915 return BVAR (current_buffer
, syntax_table
);
918 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
919 Sstandard_syntax_table
, 0, 0, 0,
920 doc
: /* Return the standard syntax table.
921 This is the one used for new buffers. */)
924 return Vstandard_syntax_table
;
927 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
928 doc
: /* Construct a new syntax table and return it.
929 It is a copy of the TABLE, which defaults to the standard syntax table. */)
935 check_syntax_table (table
);
937 table
= Vstandard_syntax_table
;
939 copy
= Fcopy_sequence (table
);
941 /* Only the standard syntax table should have a default element.
942 Other syntax tables should inherit from parents instead. */
943 set_char_table_defalt (copy
, Qnil
);
945 /* Copied syntax tables should all have parents.
946 If we copied one with no parent, such as the standard syntax table,
947 use the standard syntax table as the copy's parent. */
948 if (NILP (XCHAR_TABLE (copy
)->parent
))
949 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
953 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
954 doc
: /* Select a new syntax table for the current buffer.
955 One argument, a syntax table. */)
959 check_syntax_table (table
);
960 bset_syntax_table (current_buffer
, table
);
961 /* Indicate that this buffer now has a specified syntax table. */
962 idx
= PER_BUFFER_VAR_IDX (syntax_table
);
963 SET_PER_BUFFER_VALUE_P (current_buffer
, idx
, 1);
967 /* Convert a letter which signifies a syntax code
968 into the code it signifies.
969 This is used by modify-syntax-entry, and other things. */
971 unsigned char const syntax_spec_code
[0400] =
972 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
973 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
974 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
975 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
976 Swhitespace
, Scomment_fence
, Sstring
, 0377, Smath
, 0377, 0377, Squote
,
977 Sopen
, Sclose
, 0377, 0377, 0377, Swhitespace
, Spunct
, Scharquote
,
978 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
979 0377, 0377, 0377, 0377, Scomment
, 0377, Sendcomment
, 0377,
980 Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
981 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
982 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
983 0377, 0377, 0377, 0377, Sescape
, 0377, 0377, Ssymbol
,
984 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
985 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
986 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword
,
987 0377, 0377, 0377, 0377, Sstring_fence
, 0377, 0377, 0377
990 /* Indexed by syntax code, give the letter that describes it. */
992 char const syntax_code_spec
[16] =
994 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
998 /* Indexed by syntax code, give the object (cons of syntax code and
999 nil) to be stored in syntax table. Since these objects can be
1000 shared among syntax tables, we generate them in advance. By
1001 sharing objects, the function `describe-syntax' can give a more
1003 static Lisp_Object Vsyntax_code_object
;
1006 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
1007 doc
: /* Return the syntax code of CHARACTER, described by a character.
1008 For example, if CHARACTER is a word constituent, the
1009 character `w' (119) is returned.
1010 The characters that correspond to various syntax codes
1011 are listed in the documentation of `modify-syntax-entry'. */)
1012 (Lisp_Object character
)
1015 CHECK_CHARACTER (character
);
1016 char_int
= XINT (character
);
1017 SETUP_BUFFER_SYNTAX_TABLE ();
1018 return make_number (syntax_code_spec
[SYNTAX (char_int
)]);
1021 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
1022 doc
: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1023 (Lisp_Object character
)
1026 enum syntaxcode code
;
1027 CHECK_CHARACTER (character
);
1028 char_int
= XINT (character
);
1029 SETUP_BUFFER_SYNTAX_TABLE ();
1030 code
= SYNTAX (char_int
);
1031 if (code
== Sopen
|| code
== Sclose
)
1032 return SYNTAX_MATCH (char_int
);
1036 DEFUN ("string-to-syntax", Fstring_to_syntax
, Sstring_to_syntax
, 1, 1, 0,
1037 doc
: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1038 STRING should be a string of the form allowed as argument of
1039 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1040 cons cell \(CODE . MATCHING-CHAR) which can be used, for example, as
1041 the value of a `syntax-table' text property. */)
1042 (Lisp_Object string
)
1044 const unsigned char *p
;
1048 CHECK_STRING (string
);
1051 val
= syntax_spec_code
[*p
++];
1053 error ("Invalid syntax description letter: %c", p
[-1]);
1055 if (val
== Sinherit
)
1061 int character
= STRING_CHAR_AND_LENGTH (p
, len
);
1062 XSETINT (match
, character
);
1063 if (XFASTINT (match
) == ' ')
1106 if (val
< ASIZE (Vsyntax_code_object
) && NILP (match
))
1107 return AREF (Vsyntax_code_object
, val
);
1109 /* Since we can't use a shared object, let's make a new one. */
1110 return Fcons (make_number (val
), match
);
1113 /* I really don't know why this is interactive
1114 help-form should at least be made useful whilst reading the second arg. */
1115 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
1116 "cSet syntax for character: \nsSet syntax for %s to: ",
1117 doc
: /* Set syntax for character CHAR according to string NEWENTRY.
1118 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1119 the current buffer's syntax table.
1120 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1121 in the range MIN to MAX are changed.
1122 The first character of NEWENTRY should be one of the following:
1123 Space or - whitespace syntax. w word constituent.
1124 _ symbol constituent. . punctuation.
1125 ( open-parenthesis. ) close-parenthesis.
1126 " string quote. \\ escape.
1127 $ paired delimiter. ' expression quote or prefix operator.
1128 < comment starter. > comment ender.
1129 / character-quote. @ inherit from parent table.
1130 | generic string fence. ! generic comment fence.
1132 Only single-character comment start and end sequences are represented thus.
1133 Two-character sequences are represented as described below.
1134 The second character of NEWENTRY is the matching parenthesis,
1135 used only if the first character is `(' or `)'.
1136 Any additional characters are flags.
1137 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1138 1 means CHAR is the start of a two-char comment start sequence.
1139 2 means CHAR is the second character of such a sequence.
1140 3 means CHAR is the start of a two-char comment end sequence.
1141 4 means CHAR is the second character of such a sequence.
1143 There can be several orthogonal comment sequences. This is to support
1144 language modes such as C++. By default, all comment sequences are of style
1145 a, but you can set the comment sequence style to b (on the second character
1146 of a comment-start, and the first character of a comment-end sequence) and/or
1147 c (on any of its chars) using this flag:
1148 b means CHAR is part of comment sequence b.
1149 c means CHAR is part of comment sequence c.
1150 n means CHAR is part of a nestable comment sequence.
1152 p means CHAR is a prefix character for `backward-prefix-chars';
1153 such characters are treated as whitespace when they occur
1154 between expressions.
1155 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1156 (Lisp_Object c
, Lisp_Object newentry
, Lisp_Object syntax_table
)
1160 CHECK_CHARACTER_CAR (c
);
1161 CHECK_CHARACTER_CDR (c
);
1164 CHECK_CHARACTER (c
);
1166 if (NILP (syntax_table
))
1167 syntax_table
= BVAR (current_buffer
, syntax_table
);
1169 check_syntax_table (syntax_table
);
1171 newentry
= Fstring_to_syntax (newentry
);
1173 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table
, c
, newentry
);
1175 SET_RAW_SYNTAX_ENTRY (syntax_table
, XINT (c
), newentry
);
1177 /* We clear the regexp cache, since character classes can now have
1178 different values from those in the compiled regexps.*/
1179 clear_regexp_cache ();
1184 /* Dump syntax table to buffer in human-readable format */
1186 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value
,
1187 Sinternal_describe_syntax_value
, 1, 1, 0,
1188 doc
: /* Insert a description of the internal syntax description SYNTAX at point. */)
1189 (Lisp_Object syntax
)
1191 int code
, syntax_code
;
1192 bool start1
, start2
, end1
, end2
, prefix
, comstyleb
, comstylec
, comnested
;
1194 Lisp_Object first
, match_lisp
, value
= syntax
;
1198 insert_string ("default");
1202 if (CHAR_TABLE_P (value
))
1204 insert_string ("deeper char-table ...");
1210 insert_string ("invalid");
1214 first
= XCAR (value
);
1215 match_lisp
= XCDR (value
);
1217 if (!INTEGERP (first
) || !(NILP (match_lisp
) || CHARACTERP (match_lisp
)))
1219 insert_string ("invalid");
1223 syntax_code
= XINT (first
) & INT_MAX
;
1224 code
= syntax_code
& 0377;
1225 start1
= SYNTAX_FLAGS_COMSTART_FIRST (syntax_code
);
1226 start2
= SYNTAX_FLAGS_COMSTART_SECOND (syntax_code
);;
1227 end1
= SYNTAX_FLAGS_COMEND_FIRST (syntax_code
);
1228 end2
= SYNTAX_FLAGS_COMEND_SECOND (syntax_code
);
1229 prefix
= SYNTAX_FLAGS_PREFIX (syntax_code
);
1230 comstyleb
= SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code
);
1231 comstylec
= SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code
);
1232 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax_code
);
1236 insert_string ("invalid");
1240 str
[0] = syntax_code_spec
[code
], str
[1] = 0;
1243 if (NILP (match_lisp
))
1246 insert_char (XINT (match_lisp
));
1267 insert_string ("\twhich means: ");
1272 insert_string ("whitespace"); break;
1274 insert_string ("punctuation"); break;
1276 insert_string ("word"); break;
1278 insert_string ("symbol"); break;
1280 insert_string ("open"); break;
1282 insert_string ("close"); break;
1284 insert_string ("prefix"); break;
1286 insert_string ("string"); break;
1288 insert_string ("math"); break;
1290 insert_string ("escape"); break;
1292 insert_string ("charquote"); break;
1294 insert_string ("comment"); break;
1296 insert_string ("endcomment"); break;
1298 insert_string ("inherit"); break;
1299 case Scomment_fence
:
1300 insert_string ("comment fence"); break;
1302 insert_string ("string fence"); break;
1304 insert_string ("invalid");
1308 if (!NILP (match_lisp
))
1310 insert_string (", matches ");
1311 insert_char (XINT (match_lisp
));
1315 insert_string (",\n\t is the first character of a comment-start sequence");
1317 insert_string (",\n\t is the second character of a comment-start sequence");
1320 insert_string (",\n\t is the first character of a comment-end sequence");
1322 insert_string (",\n\t is the second character of a comment-end sequence");
1324 insert_string (" (comment style b)");
1326 insert_string (" (comment style c)");
1328 insert_string (" (nestable)");
1331 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1336 /* Return the position across COUNT words from FROM.
1337 If that many words cannot be found before the end of the buffer, return 0.
1338 COUNT negative means scan backward and stop at word beginning. */
1341 scan_words (register ptrdiff_t from
, register EMACS_INT count
)
1343 register ptrdiff_t beg
= BEGV
;
1344 register ptrdiff_t end
= ZV
;
1345 register ptrdiff_t from_byte
= CHAR_TO_BYTE (from
);
1346 register enum syntaxcode code
;
1348 Lisp_Object func
, pos
;
1353 SETUP_SYNTAX_TABLE (from
, count
);
1364 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1365 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1366 code
= SYNTAX (ch0
);
1367 INC_BOTH (from
, from_byte
);
1368 if (words_include_escapes
1369 && (code
== Sescape
|| code
== Scharquote
))
1374 /* Now CH0 is a character which begins a word and FROM is the
1375 position of the next character. */
1376 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch0
);
1377 if (! NILP (Ffboundp (func
)))
1379 pos
= call2 (func
, make_number (from
- 1), make_number (end
));
1380 if (INTEGERP (pos
) && from
< XINT (pos
) && XINT (pos
) <= ZV
)
1383 from_byte
= CHAR_TO_BYTE (from
);
1390 if (from
== end
) break;
1391 UPDATE_SYNTAX_TABLE_FORWARD (from
);
1392 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1393 code
= SYNTAX (ch1
);
1395 && (! words_include_escapes
1396 || (code
!= Sescape
&& code
!= Scharquote
)))
1397 || word_boundary_p (ch0
, ch1
))
1399 INC_BOTH (from
, from_byte
);
1414 DEC_BOTH (from
, from_byte
);
1415 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1416 ch1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1417 code
= SYNTAX (ch1
);
1418 if (words_include_escapes
1419 && (code
== Sescape
|| code
== Scharquote
))
1424 /* Now CH1 is a character which ends a word and FROM is the
1426 func
= CHAR_TABLE_REF (Vfind_word_boundary_function_table
, ch1
);
1427 if (! NILP (Ffboundp (func
)))
1429 pos
= call2 (func
, make_number (from
), make_number (beg
));
1430 if (INTEGERP (pos
) && BEGV
<= XINT (pos
) && XINT (pos
) < from
)
1433 from_byte
= CHAR_TO_BYTE (from
);
1442 DEC_BOTH (from
, from_byte
);
1443 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
1444 ch0
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
1445 code
= SYNTAX (ch0
);
1447 && (! words_include_escapes
1448 || (code
!= Sescape
&& code
!= Scharquote
)))
1449 || word_boundary_p (ch0
, ch1
))
1451 INC_BOTH (from
, from_byte
);
1465 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 0, 1, "^p",
1466 doc
: /* Move point forward ARG words (backward if ARG is negative).
1468 If an edge of the buffer or a field boundary is reached, point is left there
1469 and the function returns nil. Field boundaries are not noticed if
1470 `inhibit-field-text-motion' is non-nil. */)
1474 ptrdiff_t orig_val
, val
;
1477 XSETFASTINT (arg
, 1);
1481 val
= orig_val
= scan_words (PT
, XINT (arg
));
1483 val
= XINT (arg
) > 0 ? ZV
: BEGV
;
1485 /* Avoid jumping out of an input field. */
1486 tmp
= Fconstrain_to_field (make_number (val
), make_number (PT
),
1488 val
= XFASTINT (tmp
);
1491 return val
== orig_val
? Qt
: Qnil
;
1494 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
1495 doc
: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1496 STRING is like the inside of a `[...]' in a regular expression
1497 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1498 (but not at the end of a range; quoting is never needed there).
1499 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1500 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1501 Char classes, e.g. `[:alpha:]', are supported.
1503 Returns the distance traveled, either zero or positive. */)
1504 (Lisp_Object string
, Lisp_Object lim
)
1506 return skip_chars (1, string
, lim
, 1);
1509 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
1510 doc
: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1511 See `skip-chars-forward' for details.
1512 Returns the distance traveled, either zero or negative. */)
1513 (Lisp_Object string
, Lisp_Object lim
)
1515 return skip_chars (0, string
, lim
, 1);
1518 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
1519 doc
: /* Move point forward across chars in specified syntax classes.
1520 SYNTAX is a string of syntax code characters.
1521 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1522 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1523 This function returns the distance traveled, either zero or positive. */)
1524 (Lisp_Object syntax
, Lisp_Object lim
)
1526 return skip_syntaxes (1, syntax
, lim
);
1529 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
1530 doc
: /* Move point backward across chars in specified syntax classes.
1531 SYNTAX is a string of syntax code characters.
1532 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1533 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1534 This function returns the distance traveled, either zero or negative. */)
1535 (Lisp_Object syntax
, Lisp_Object lim
)
1537 return skip_syntaxes (0, syntax
, lim
);
1541 skip_chars (bool forwardp
, Lisp_Object string
, Lisp_Object lim
,
1542 bool handle_iso_classes
)
1546 /* Store the ranges of non-ASCII characters. */
1547 int *char_ranges
IF_LINT (= NULL
);
1548 int n_char_ranges
= 0;
1550 ptrdiff_t i
, i_byte
;
1551 /* True if the current buffer is multibyte and the region contains
1554 /* True if STRING is multibyte and it contains non-ASCII chars. */
1555 bool string_multibyte
;
1556 ptrdiff_t size_byte
;
1557 const unsigned char *str
;
1559 Lisp_Object iso_classes
;
1561 CHECK_STRING (string
);
1565 XSETINT (lim
, forwardp
? ZV
: BEGV
);
1567 CHECK_NUMBER_COERCE_MARKER (lim
);
1569 /* In any case, don't allow scan outside bounds of buffer. */
1570 if (XINT (lim
) > ZV
)
1571 XSETFASTINT (lim
, ZV
);
1572 if (XINT (lim
) < BEGV
)
1573 XSETFASTINT (lim
, BEGV
);
1575 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
1576 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
1577 string_multibyte
= SBYTES (string
) > SCHARS (string
);
1579 memset (fastmap
, 0, sizeof fastmap
);
1581 str
= SDATA (string
);
1582 size_byte
= SBYTES (string
);
1585 if (i_byte
< size_byte
1586 && SREF (string
, 0) == '^')
1588 negate
= 1; i_byte
++;
1591 /* Find the characters specified and set their elements of fastmap.
1592 Handle backslashes and ranges specially.
1594 If STRING contains non-ASCII characters, setup char_ranges for
1595 them and use fastmap only for their leading codes. */
1597 if (! string_multibyte
)
1599 bool string_has_eight_bit
= 0;
1601 /* At first setup fastmap. */
1602 while (i_byte
< size_byte
)
1606 if (handle_iso_classes
&& c
== '['
1607 && i_byte
< size_byte
1608 && str
[i_byte
] == ':')
1610 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1611 const unsigned char *class_end
= class_beg
;
1612 const unsigned char *class_limit
= str
+ size_byte
- 2;
1613 /* Leave room for the null. */
1614 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1617 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1618 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1620 while (class_end
< class_limit
1621 && *class_end
>= 'a' && *class_end
<= 'z')
1624 if (class_end
== class_beg
1625 || *class_end
!= ':' || class_end
[1] != ']')
1626 goto not_a_class_name
;
1628 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1629 class_name
[class_end
- class_beg
] = 0;
1631 cc
= re_wctype (class_name
);
1633 error ("Invalid ISO C character class");
1635 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1637 i_byte
= class_end
+ 2 - str
;
1644 if (i_byte
== size_byte
)
1649 /* Treat `-' as range character only if another character
1651 if (i_byte
+ 1 < size_byte
1652 && str
[i_byte
] == '-')
1656 /* Skip over the dash. */
1659 /* Get the end of the range. */
1662 && i_byte
< size_byte
)
1670 if (! ASCII_CHAR_P (c2
))
1671 string_has_eight_bit
= 1;
1677 if (! ASCII_CHAR_P (c
))
1678 string_has_eight_bit
= 1;
1682 /* If the current range is multibyte and STRING contains
1683 eight-bit chars, arrange fastmap and setup char_ranges for
1684 the corresponding multibyte chars. */
1685 if (multibyte
&& string_has_eight_bit
)
1688 char himap
[0200 + 1];
1689 memcpy (himap
, fastmap
+ 0200, 0200);
1691 memset (fastmap
+ 0200, 0, 0200);
1692 char_ranges
= alloca (sizeof *char_ranges
* 128 * 2);
1695 while ((p1
= memchr (himap
+ i
, 1, 0200 - i
)))
1697 /* Deduce the next range C..C2 from the next clump of 1s
1698 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1699 order half of the old FASTMAP. */
1700 int c2
, leading_code
;
1702 c
= BYTE8_TO_CHAR (i
+ 0200);
1704 c2
= BYTE8_TO_CHAR (i
+ 0200 - 1);
1706 char_ranges
[n_char_ranges
++] = c
;
1707 char_ranges
[n_char_ranges
++] = c2
;
1708 leading_code
= CHAR_LEADING_CODE (c
);
1709 memset (fastmap
+ leading_code
, 1,
1710 CHAR_LEADING_CODE (c2
) - leading_code
+ 1);
1714 else /* STRING is multibyte */
1716 char_ranges
= alloca (sizeof *char_ranges
* SCHARS (string
) * 2);
1718 while (i_byte
< size_byte
)
1720 int leading_code
= str
[i_byte
];
1721 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1724 if (handle_iso_classes
&& c
== '['
1725 && i_byte
< size_byte
1726 && STRING_CHAR (str
+ i_byte
) == ':')
1728 const unsigned char *class_beg
= str
+ i_byte
+ 1;
1729 const unsigned char *class_end
= class_beg
;
1730 const unsigned char *class_limit
= str
+ size_byte
- 2;
1731 /* Leave room for the null. */
1732 unsigned char class_name
[CHAR_CLASS_MAX_LENGTH
+ 1];
1735 if (class_limit
- class_beg
> CHAR_CLASS_MAX_LENGTH
)
1736 class_limit
= class_beg
+ CHAR_CLASS_MAX_LENGTH
;
1738 while (class_end
< class_limit
1739 && *class_end
>= 'a' && *class_end
<= 'z')
1742 if (class_end
== class_beg
1743 || *class_end
!= ':' || class_end
[1] != ']')
1744 goto not_a_class_name_multibyte
;
1746 memcpy (class_name
, class_beg
, class_end
- class_beg
);
1747 class_name
[class_end
- class_beg
] = 0;
1749 cc
= re_wctype (class_name
);
1751 error ("Invalid ISO C character class");
1753 iso_classes
= Fcons (make_number (cc
), iso_classes
);
1755 i_byte
= class_end
+ 2 - str
;
1759 not_a_class_name_multibyte
:
1762 if (i_byte
== size_byte
)
1765 leading_code
= str
[i_byte
];
1766 c
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1769 /* Treat `-' as range character only if another character
1771 if (i_byte
+ 1 < size_byte
1772 && str
[i_byte
] == '-')
1774 int c2
, leading_code2
;
1776 /* Skip over the dash. */
1779 /* Get the end of the range. */
1780 leading_code2
= str
[i_byte
];
1781 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1785 && i_byte
< size_byte
)
1787 leading_code2
= str
[i_byte
];
1788 c2
= STRING_CHAR_AND_LENGTH (str
+ i_byte
, len
);
1794 if (ASCII_CHAR_P (c
))
1796 while (c
<= c2
&& c
< 0x80)
1798 leading_code
= CHAR_LEADING_CODE (c
);
1800 if (! ASCII_CHAR_P (c
))
1802 int lim2
= leading_code2
+ 1;
1803 while (leading_code
< lim2
)
1804 fastmap
[leading_code
++] = 1;
1807 char_ranges
[n_char_ranges
++] = c
;
1808 char_ranges
[n_char_ranges
++] = c2
;
1814 if (ASCII_CHAR_P (c
))
1818 fastmap
[leading_code
] = 1;
1819 char_ranges
[n_char_ranges
++] = c
;
1820 char_ranges
[n_char_ranges
++] = c
;
1825 /* If the current range is unibyte and STRING contains non-ASCII
1826 chars, arrange fastmap for the corresponding unibyte
1829 if (! multibyte
&& n_char_ranges
> 0)
1831 memset (fastmap
+ 0200, 0, 0200);
1832 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1834 int c1
= char_ranges
[i
];
1835 int lim2
= char_ranges
[i
+ 1] + 1;
1837 for (; c1
< lim2
; c1
++)
1839 int b
= CHAR_TO_BYTE_SAFE (c1
);
1847 /* If ^ was the first character, complement the fastmap. */
1851 for (i
= 0; i
< sizeof fastmap
; i
++)
1855 for (i
= 0; i
< 0200; i
++)
1857 /* All non-ASCII chars possibly match. */
1858 for (; i
< sizeof fastmap
; i
++)
1864 ptrdiff_t start_point
= PT
;
1866 ptrdiff_t pos_byte
= PT_BYTE
;
1867 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
1871 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
1872 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
1876 endp
= CHAR_POS_ADDR (XINT (lim
));
1877 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
1881 /* This code may look up syntax tables using functions that rely on the
1882 gl_state object. To make sure this object is not out of date,
1883 let's initialize it manually.
1884 We ignore syntax-table text-properties for now, since that's
1885 what we've done in the past. */
1886 SETUP_BUFFER_SYNTAX_TABLE ();
1901 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
1902 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1912 if (! ASCII_CHAR_P (c
))
1914 /* As we are looking at a multibyte character, we
1915 must look up the character in the table
1916 CHAR_RANGES. If there's no data in the table,
1917 that character is not what we want to skip. */
1919 /* The following code do the right thing even if
1920 n_char_ranges is zero (i.e. no data in
1922 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1923 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1925 if (!(negate
^ (i
< n_char_ranges
)))
1929 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
1942 if (!NILP (iso_classes
) && in_classes (*p
, iso_classes
))
1947 goto fwd_unibyte_ok
;
1953 p
++, pos
++, pos_byte
++;
1961 unsigned char *prev_p
;
1971 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
1972 c
= STRING_CHAR (p
);
1974 if (! NILP (iso_classes
) && in_classes (c
, iso_classes
))
1984 if (! ASCII_CHAR_P (c
))
1986 /* See the comment in the previous similar code. */
1987 for (i
= 0; i
< n_char_ranges
; i
+= 2)
1988 if (c
>= char_ranges
[i
] && c
<= char_ranges
[i
+ 1])
1990 if (!(negate
^ (i
< n_char_ranges
)))
1994 pos
--, pos_byte
-= prev_p
- p
;
2007 if (! NILP (iso_classes
) && in_classes (p
[-1], iso_classes
))
2012 goto back_unibyte_ok
;
2015 if (!fastmap
[p
[-1]])
2018 p
--, pos
--, pos_byte
--;
2022 SET_PT_BOTH (pos
, pos_byte
);
2025 return make_number (PT
- start_point
);
2031 skip_syntaxes (bool forwardp
, Lisp_Object string
, Lisp_Object lim
)
2034 unsigned char fastmap
[0400];
2036 ptrdiff_t i
, i_byte
;
2038 ptrdiff_t size_byte
;
2041 CHECK_STRING (string
);
2044 XSETINT (lim
, forwardp
? ZV
: BEGV
);
2046 CHECK_NUMBER_COERCE_MARKER (lim
);
2048 /* In any case, don't allow scan outside bounds of buffer. */
2049 if (XINT (lim
) > ZV
)
2050 XSETFASTINT (lim
, ZV
);
2051 if (XINT (lim
) < BEGV
)
2052 XSETFASTINT (lim
, BEGV
);
2054 if (forwardp
? (PT
>= XFASTINT (lim
)) : (PT
<= XFASTINT (lim
)))
2055 return make_number (0);
2057 multibyte
= (!NILP (BVAR (current_buffer
, enable_multibyte_characters
))
2058 && (XINT (lim
) - PT
!= CHAR_TO_BYTE (XINT (lim
)) - PT_BYTE
));
2060 memset (fastmap
, 0, sizeof fastmap
);
2062 if (SBYTES (string
) > SCHARS (string
))
2063 /* As this is very rare case (syntax spec is ASCII only), don't
2064 consider efficiency. */
2065 string
= string_make_unibyte (string
);
2067 str
= SDATA (string
);
2068 size_byte
= SBYTES (string
);
2071 if (i_byte
< size_byte
2072 && SREF (string
, 0) == '^')
2074 negate
= 1; i_byte
++;
2077 /* Find the syntaxes specified and set their elements of fastmap. */
2079 while (i_byte
< size_byte
)
2082 fastmap
[syntax_spec_code
[c
]] = 1;
2085 /* If ^ was the first character, complement the fastmap. */
2087 for (i
= 0; i
< sizeof fastmap
; i
++)
2091 ptrdiff_t start_point
= PT
;
2093 ptrdiff_t pos_byte
= PT_BYTE
;
2094 unsigned char *p
= PT_ADDR
, *endp
, *stop
;
2098 endp
= (XINT (lim
) == GPT
) ? GPT_ADDR
: CHAR_POS_ADDR (XINT (lim
));
2099 stop
= (pos
< GPT
&& GPT
< XINT (lim
)) ? GPT_ADDR
: endp
;
2103 endp
= CHAR_POS_ADDR (XINT (lim
));
2104 stop
= (pos
>= GPT
&& GPT
> XINT (lim
)) ? GAP_END_ADDR
: endp
;
2108 SETUP_SYNTAX_TABLE (pos
, forwardp
? 1 : -1);
2124 c
= STRING_CHAR_AND_LENGTH (p
, nbytes
);
2125 if (! fastmap
[SYNTAX (c
)])
2127 p
+= nbytes
, pos
++, pos_byte
+= nbytes
;
2128 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
2142 if (! fastmap
[SYNTAX (*p
)])
2144 p
++, pos
++, pos_byte
++;
2145 UPDATE_SYNTAX_TABLE_FORWARD (pos
);
2155 unsigned char *prev_p
;
2164 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2166 while (--p
>= stop
&& ! CHAR_HEAD_P (*p
));
2167 c
= STRING_CHAR (p
);
2168 if (! fastmap
[SYNTAX (c
)])
2170 pos
--, pos_byte
-= prev_p
- p
;
2184 UPDATE_SYNTAX_TABLE_BACKWARD (pos
- 1);
2185 if (! fastmap
[SYNTAX (p
[-1])])
2187 p
--, pos
--, pos_byte
--;
2192 SET_PT_BOTH (pos
, pos_byte
);
2195 return make_number (PT
- start_point
);
2199 /* Return true if character C belongs to one of the ISO classes
2200 in the list ISO_CLASSES. Each class is represented by an
2201 integer which is its type according to re_wctype. */
2204 in_classes (int c
, Lisp_Object iso_classes
)
2206 bool fits_class
= 0;
2208 while (CONSP (iso_classes
))
2211 elt
= XCAR (iso_classes
);
2212 iso_classes
= XCDR (iso_classes
);
2214 if (re_iswctype (c
, XFASTINT (elt
)))
2221 /* Jump over a comment, assuming we are at the beginning of one.
2222 FROM is the current position.
2223 FROM_BYTE is the bytepos corresponding to FROM.
2224 Do not move past STOP (a charpos).
2225 The comment over which we have to jump is of style STYLE
2226 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2227 NESTING should be positive to indicate the nesting at the beginning
2228 for nested comments and should be zero or negative else.
2229 ST_COMMENT_STYLE cannot be nested.
2230 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2231 (or 0 If the search cannot start in the middle of a two-character).
2233 If successful, return true and store the charpos of the comment's end
2234 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2235 Else, return false and store the charpos STOP into *CHARPOS_PTR, the
2236 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2237 (as defined for state.incomment) in *INCOMMENT_PTR.
2239 The comment end is the last character of the comment rather than the
2240 character just after the comment.
2242 Global syntax data is assumed to initially be valid for FROM and
2243 remains valid for forward search starting at the returned position. */
2246 forw_comment (ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t stop
,
2247 EMACS_INT nesting
, int style
, int prev_syntax
,
2248 ptrdiff_t *charpos_ptr
, ptrdiff_t *bytepos_ptr
,
2249 EMACS_INT
*incomment_ptr
)
2252 register enum syntaxcode code
;
2253 register int syntax
, other_syntax
;
2255 if (nesting
<= 0) nesting
= -1;
2257 /* Enter the loop in the middle so that we find
2258 a 2-char comment ender if we start in the middle of it. */
2259 syntax
= prev_syntax
;
2260 if (syntax
!= 0) goto forw_incomment
;
2266 *incomment_ptr
= nesting
;
2267 *charpos_ptr
= from
;
2268 *bytepos_ptr
= from_byte
;
2271 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2272 syntax
= SYNTAX_WITH_FLAGS (c
);
2273 code
= syntax
& 0xff;
2274 if (code
== Sendcomment
2275 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
2276 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ?
2277 (nesting
> 0 && --nesting
== 0) : nesting
< 0))
2278 /* we have encountered a comment end of the same style
2279 as the comment sequence which began this comment
2282 if (code
== Scomment_fence
2283 && style
== ST_COMMENT_STYLE
)
2284 /* we have encountered a comment end of the same style
2285 as the comment sequence which began this comment
2290 && SYNTAX_FLAGS_COMMENT_NESTED (syntax
)
2291 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0) == style
)
2292 /* we have encountered a nested comment of the same style
2293 as the comment sequence which began this comment section */
2295 INC_BOTH (from
, from_byte
);
2296 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2299 if (from
< stop
&& SYNTAX_FLAGS_COMEND_FIRST (syntax
)
2300 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2301 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2302 SYNTAX_FLAGS_COMEND_SECOND (other_syntax
))
2303 && SYNTAX_FLAGS_COMMENT_STYLE (syntax
, other_syntax
) == style
2304 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2305 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
))
2306 ? nesting
> 0 : nesting
< 0))
2309 /* we have encountered a comment end of the same style
2310 as the comment sequence which began this comment
2315 INC_BOTH (from
, from_byte
);
2316 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2321 && SYNTAX_FLAGS_COMSTART_FIRST (syntax
)
2322 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2323 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2324 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
) == style
2325 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2326 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax
) ||
2327 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
)))
2328 /* we have encountered a nested comment of the same style
2329 as the comment sequence which began this comment
2332 INC_BOTH (from
, from_byte
);
2333 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2337 *charpos_ptr
= from
;
2338 *bytepos_ptr
= from_byte
;
2342 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
2344 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2345 Stop scanning if we find something other than a comment or whitespace.
2346 Set point to where scanning stops.
2347 If COUNT comments are found as expected, with nothing except whitespace
2348 between them, return t; otherwise return nil. */)
2351 ptrdiff_t from
, from_byte
, stop
;
2353 enum syntaxcode code
;
2354 int comstyle
= 0; /* style of comment encountered */
2355 bool comnested
= 0; /* whether the comment is nestable or not */
2358 ptrdiff_t out_charpos
, out_bytepos
;
2361 CHECK_NUMBER (count
);
2362 count1
= XINT (count
);
2363 stop
= count1
> 0 ? ZV
: BEGV
;
2369 from_byte
= PT_BYTE
;
2371 SETUP_SYNTAX_TABLE (from
, count1
);
2376 bool comstart_first
;
2377 int syntax
, other_syntax
;
2381 SET_PT_BOTH (from
, from_byte
);
2385 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2386 syntax
= SYNTAX_WITH_FLAGS (c
);
2388 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2389 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2390 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2391 INC_BOTH (from
, from_byte
);
2392 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2393 if (from
< stop
&& comstart_first
2394 && (c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2395 other_syntax
= SYNTAX_WITH_FLAGS (c1
),
2396 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
)))
2398 /* We have encountered a comment start sequence and we
2399 are ignoring all text inside comments. We must record
2400 the comment style this sequence begins so that later,
2401 only a comment end of the same style actually ends
2402 the comment section. */
2404 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2405 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2406 INC_BOTH (from
, from_byte
);
2407 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2410 while (code
== Swhitespace
|| (code
== Sendcomment
&& c
== '\n'));
2412 if (code
== Scomment_fence
)
2413 comstyle
= ST_COMMENT_STYLE
;
2414 else if (code
!= Scomment
)
2417 DEC_BOTH (from
, from_byte
);
2418 SET_PT_BOTH (from
, from_byte
);
2421 /* We're at the start of a comment. */
2422 found
= forw_comment (from
, from_byte
, stop
, comnested
, comstyle
, 0,
2423 &out_charpos
, &out_bytepos
, &dummy
);
2424 from
= out_charpos
; from_byte
= out_bytepos
;
2428 SET_PT_BOTH (from
, from_byte
);
2431 INC_BOTH (from
, from_byte
);
2432 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2433 /* We have skipped one comment. */
2446 SET_PT_BOTH (BEGV
, BEGV_BYTE
);
2451 DEC_BOTH (from
, from_byte
);
2452 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2453 quoted
= char_quoted (from
, from_byte
);
2454 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2455 syntax
= SYNTAX_WITH_FLAGS (c
);
2458 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2459 if (code
== Sendcomment
)
2460 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2461 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2462 && prev_char_comend_first (from
, from_byte
)
2463 && !char_quoted (from
- 1, dec_bytepos (from_byte
)))
2466 /* We must record the comment style encountered so that
2467 later, we can match only the proper comment begin
2468 sequence of the same style. */
2469 DEC_BOTH (from
, from_byte
);
2471 /* Calling char_quoted, above, set up global syntax position
2472 at the new value of FROM. */
2473 c1
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2474 other_syntax
= SYNTAX_WITH_FLAGS (c1
);
2475 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2476 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2479 if (code
== Scomment_fence
)
2481 /* Skip until first preceding unquoted comment_fence. */
2482 bool fence_found
= 0;
2483 ptrdiff_t ini
= from
, ini_byte
= from_byte
;
2487 DEC_BOTH (from
, from_byte
);
2488 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2489 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2490 if (SYNTAX (c
) == Scomment_fence
2491 && !char_quoted (from
, from_byte
))
2496 else if (from
== stop
)
2499 if (fence_found
== 0)
2501 from
= ini
; /* Set point to ini + 1. */
2502 from_byte
= ini_byte
;
2506 /* We have skipped one comment. */
2509 else if (code
== Sendcomment
)
2511 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2512 &out_charpos
, &out_bytepos
);
2516 /* This end-of-line is not an end-of-comment.
2517 Treat it like a whitespace.
2518 CC-mode (and maybe others) relies on this behavior. */
2522 /* Failure: we should go back to the end of this
2523 not-quite-endcomment. */
2524 if (SYNTAX (c
) != code
)
2525 /* It was a two-char Sendcomment. */
2526 INC_BOTH (from
, from_byte
);
2532 /* We have skipped one comment. */
2533 from
= out_charpos
, from_byte
= out_bytepos
;
2537 else if (code
!= Swhitespace
|| quoted
)
2541 INC_BOTH (from
, from_byte
);
2542 SET_PT_BOTH (from
, from_byte
);
2550 SET_PT_BOTH (from
, from_byte
);
2555 /* Return syntax code of character C if C is an ASCII character
2556 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2558 static enum syntaxcode
2559 syntax_multibyte (int c
, bool multibyte_symbol_p
)
2561 return ASCII_CHAR_P (c
) || !multibyte_symbol_p
? SYNTAX (c
) : Ssymbol
;
2565 scan_lists (EMACS_INT from
, EMACS_INT count
, EMACS_INT depth
, bool sexpflag
)
2568 ptrdiff_t stop
= count
> 0 ? ZV
: BEGV
;
2573 enum syntaxcode code
;
2574 EMACS_INT min_depth
= depth
; /* Err out if depth gets less than this. */
2575 int comstyle
= 0; /* style of comment encountered */
2576 bool comnested
= 0; /* whether the comment is nestable or not */
2578 EMACS_INT last_good
= from
;
2580 ptrdiff_t from_byte
;
2581 ptrdiff_t out_bytepos
, out_charpos
;
2583 bool multibyte_symbol_p
= sexpflag
&& multibyte_syntax_as_symbol
;
2585 if (depth
> 0) min_depth
= 0;
2587 if (from
> ZV
) from
= ZV
;
2588 if (from
< BEGV
) from
= BEGV
;
2590 from_byte
= CHAR_TO_BYTE (from
);
2595 SETUP_SYNTAX_TABLE (from
, count
);
2600 bool comstart_first
, prefix
;
2601 int syntax
, other_syntax
;
2602 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2603 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2604 syntax
= SYNTAX_WITH_FLAGS (c
);
2605 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2606 comstart_first
= SYNTAX_FLAGS_COMSTART_FIRST (syntax
);
2607 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2608 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2609 prefix
= SYNTAX_FLAGS_PREFIX (syntax
);
2610 if (depth
== min_depth
)
2612 INC_BOTH (from
, from_byte
);
2613 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2614 if (from
< stop
&& comstart_first
2615 && (c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
),
2616 other_syntax
= SYNTAX_WITH_FLAGS (c
),
2617 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax
))
2618 && parse_sexp_ignore_comments
)
2620 /* we have encountered a comment start sequence and we
2621 are ignoring all text inside comments. We must record
2622 the comment style this sequence begins so that later,
2623 only a comment end of the same style actually ends
2624 the comment section */
2626 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2627 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2628 INC_BOTH (from
, from_byte
);
2629 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2641 INC_BOTH (from
, from_byte
);
2642 /* treat following character as a word constituent */
2645 if (depth
|| !sexpflag
) break;
2646 /* This word counts as a sexp; return at end of it. */
2649 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2651 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2652 switch (syntax_multibyte (c
, multibyte_symbol_p
))
2656 INC_BOTH (from
, from_byte
);
2667 INC_BOTH (from
, from_byte
);
2671 case Scomment_fence
:
2672 comstyle
= ST_COMMENT_STYLE
;
2675 if (!parse_sexp_ignore_comments
) break;
2676 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2677 found
= forw_comment (from
, from_byte
, stop
,
2678 comnested
, comstyle
, 0,
2679 &out_charpos
, &out_bytepos
, &dummy
);
2680 from
= out_charpos
, from_byte
= out_bytepos
;
2687 INC_BOTH (from
, from_byte
);
2688 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2694 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (from_byte
))
2696 INC_BOTH (from
, from_byte
);
2706 if (!++depth
) goto done
;
2711 if (!--depth
) goto done
;
2712 if (depth
< min_depth
)
2713 xsignal3 (Qscan_error
,
2714 build_string ("Containing expression ends prematurely"),
2715 make_number (last_good
), make_number (from
));
2720 temp_pos
= dec_bytepos (from_byte
);
2721 stringterm
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2724 enum syntaxcode c_code
;
2727 UPDATE_SYNTAX_TABLE_FORWARD (from
);
2728 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2729 c_code
= syntax_multibyte (c
, multibyte_symbol_p
);
2731 ? c
== stringterm
&& c_code
== Sstring
2732 : c_code
== Sstring_fence
)
2739 INC_BOTH (from
, from_byte
);
2741 INC_BOTH (from
, from_byte
);
2743 INC_BOTH (from
, from_byte
);
2744 if (!depth
&& sexpflag
) goto done
;
2747 /* Ignore whitespace, punctuation, quote, endcomment. */
2752 /* Reached end of buffer. Error if within object, return nil if between */
2759 /* End of object reached */
2769 DEC_BOTH (from
, from_byte
);
2770 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2771 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2772 syntax
= SYNTAX_WITH_FLAGS (c
);
2773 code
= syntax_multibyte (c
, multibyte_symbol_p
);
2774 if (depth
== min_depth
)
2777 comnested
= SYNTAX_FLAGS_COMMENT_NESTED (syntax
);
2778 if (code
== Sendcomment
)
2779 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (syntax
, 0);
2780 if (from
> stop
&& SYNTAX_FLAGS_COMEND_SECOND (syntax
)
2781 && prev_char_comend_first (from
, from_byte
)
2782 && parse_sexp_ignore_comments
)
2784 /* We must record the comment style encountered so that
2785 later, we can match only the proper comment begin
2786 sequence of the same style. */
2787 int c2
, other_syntax
;
2788 DEC_BOTH (from
, from_byte
);
2789 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2791 c2
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2792 other_syntax
= SYNTAX_WITH_FLAGS (c2
);
2793 comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (other_syntax
, syntax
);
2794 comnested
|= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax
);
2797 /* Quoting turns anything except a comment-ender
2798 into a word character. Note that this cannot be true
2799 if we decremented FROM in the if-statement above. */
2800 if (code
!= Sendcomment
&& char_quoted (from
, from_byte
))
2802 DEC_BOTH (from
, from_byte
);
2805 else if (SYNTAX_FLAGS_PREFIX (syntax
))
2814 if (depth
|| !sexpflag
) break;
2815 /* This word counts as a sexp; count object finished
2816 after passing it. */
2819 temp_pos
= from_byte
;
2820 if (! NILP (BVAR (current_buffer
, enable_multibyte_characters
)))
2824 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2825 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2826 /* Don't allow comment-end to be quoted. */
2827 if (syntax_multibyte (c1
, multibyte_symbol_p
) == Sendcomment
)
2829 quoted
= char_quoted (from
- 1, temp_pos
);
2832 DEC_BOTH (from
, from_byte
);
2833 temp_pos
= dec_bytepos (temp_pos
);
2834 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2836 c1
= FETCH_CHAR_AS_MULTIBYTE (temp_pos
);
2838 switch (syntax_multibyte (c1
, multibyte_symbol_p
))
2840 case Sword
: case Ssymbol
: case Squote
: break;
2841 default: goto done2
;
2843 DEC_BOTH (from
, from_byte
);
2850 temp_pos
= dec_bytepos (from_byte
);
2851 UPDATE_SYNTAX_TABLE_BACKWARD (from
- 1);
2852 if (from
!= stop
&& c
== FETCH_CHAR_AS_MULTIBYTE (temp_pos
))
2853 DEC_BOTH (from
, from_byte
);
2862 if (!++depth
) goto done2
;
2867 if (!--depth
) goto done2
;
2868 if (depth
< min_depth
)
2869 xsignal3 (Qscan_error
,
2870 build_string ("Containing expression ends prematurely"),
2871 make_number (last_good
), make_number (from
));
2875 if (!parse_sexp_ignore_comments
)
2877 found
= back_comment (from
, from_byte
, stop
, comnested
, comstyle
,
2878 &out_charpos
, &out_bytepos
);
2879 /* FIXME: if !found, it really wasn't a comment-end.
2880 For single-char Sendcomment, we can't do much about it apart
2881 from skipping the char.
2882 For 2-char endcomments, we could try again, taking both
2883 chars as separate entities, but it's a lot of trouble
2884 for very little gain, so we don't bother either. -sm */
2886 from
= out_charpos
, from_byte
= out_bytepos
;
2889 case Scomment_fence
:
2895 DEC_BOTH (from
, from_byte
);
2896 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2897 if (!char_quoted (from
, from_byte
))
2899 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2900 if (syntax_multibyte (c
, multibyte_symbol_p
) == code
)
2904 if (code
== Sstring_fence
&& !depth
&& sexpflag
) goto done2
;
2908 stringterm
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2913 DEC_BOTH (from
, from_byte
);
2914 UPDATE_SYNTAX_TABLE_BACKWARD (from
);
2915 if (!char_quoted (from
, from_byte
))
2917 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
2919 && (syntax_multibyte (c
, multibyte_symbol_p
)
2924 if (!depth
&& sexpflag
) goto done2
;
2927 /* Ignore whitespace, punctuation, quote, endcomment. */
2932 /* Reached start of buffer. Error if within object, return nil if between */
2945 XSETFASTINT (val
, from
);
2949 xsignal3 (Qscan_error
,
2950 build_string ("Unbalanced parentheses"),
2951 make_number (last_good
), make_number (from
));
2954 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
2955 doc
: /* Scan from character number FROM by COUNT lists.
2956 Scan forward if COUNT is positive, backward if COUNT is negative.
2957 Return the character number of the position thus found.
2959 A \"list", in this context, refers to a balanced parenthetical
2960 grouping, as determined by the syntax table.
2962 If DEPTH is nonzero, treat that as the nesting depth of the starting
2963 point (i.e. the starting point is DEPTH parentheses deep). This
2964 function scans over parentheses until the depth goes to zero COUNT
2965 times. Hence, positive DEPTH moves out that number of levels of
2966 parentheses, while negative DEPTH moves to a deeper level.
2968 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2970 If we reach the beginning or end of the accessible part of the buffer
2971 before we have scanned over COUNT lists, return nil if the depth at
2972 that point is zero, and signal a error if the depth is nonzero. */)
2973 (Lisp_Object from
, Lisp_Object count
, Lisp_Object depth
)
2975 CHECK_NUMBER (from
);
2976 CHECK_NUMBER (count
);
2977 CHECK_NUMBER (depth
);
2979 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
2982 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
2983 doc
: /* Scan from character number FROM by COUNT balanced expressions.
2984 If COUNT is negative, scan backwards.
2985 Returns the character number of the position thus found.
2987 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2989 If the beginning or end of (the accessible part of) the buffer is reached
2990 in the middle of a parenthetical grouping, an error is signaled.
2991 If the beginning or end is reached between groupings
2992 but before count is used up, nil is returned. */)
2993 (Lisp_Object from
, Lisp_Object count
)
2995 CHECK_NUMBER (from
);
2996 CHECK_NUMBER (count
);
2998 return scan_lists (XINT (from
), XINT (count
), 0, 1);
3001 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
3003 doc
: /* Move point backward over any number of chars with prefix syntax.
3004 This includes chars with "quote" or "prefix" syntax (' or p). */)
3007 ptrdiff_t beg
= BEGV
;
3008 ptrdiff_t opoint
= PT
;
3009 ptrdiff_t opoint_byte
= PT_BYTE
;
3011 ptrdiff_t pos_byte
= PT_BYTE
;
3016 SET_PT_BOTH (opoint
, opoint_byte
);
3021 SETUP_SYNTAX_TABLE (pos
, -1);
3023 DEC_BOTH (pos
, pos_byte
);
3025 while (!char_quoted (pos
, pos_byte
)
3026 /* Previous statement updates syntax table. */
3027 && ((c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
), SYNTAX (c
) == Squote
)
3028 || syntax_prefix_flag_p (c
)))
3031 opoint_byte
= pos_byte
;
3034 DEC_BOTH (pos
, pos_byte
);
3037 SET_PT_BOTH (opoint
, opoint_byte
);
3042 /* Parse forward from FROM / FROM_BYTE to END,
3043 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
3044 and return a description of the state of the parse at END.
3045 If STOPBEFORE, stop at the start of an atom.
3046 If COMMENTSTOP is 1, stop at the start of a comment.
3047 If COMMENTSTOP is -1, stop at the start or end of a comment,
3048 after the beginning of a string, or after the end of a string. */
3051 scan_sexps_forward (struct lisp_parse_state
*stateptr
,
3052 ptrdiff_t from
, ptrdiff_t from_byte
, ptrdiff_t end
,
3053 EMACS_INT targetdepth
, bool stopbefore
,
3054 Lisp_Object oldstate
, int commentstop
)
3056 struct lisp_parse_state state
;
3057 enum syntaxcode code
;
3060 struct level
{ ptrdiff_t last
, prev
; };
3061 struct level levelstart
[100];
3062 struct level
*curlevel
= levelstart
;
3063 struct level
*endlevel
= levelstart
+ 100;
3064 EMACS_INT depth
; /* Paren depth of current scanning location.
3065 level - levelstart equals this except
3066 when the depth becomes negative. */
3067 EMACS_INT mindepth
; /* Lowest DEPTH value seen. */
3068 bool start_quoted
= 0; /* True means starting after a char quote. */
3070 ptrdiff_t prev_from
; /* Keep one character before FROM. */
3071 ptrdiff_t prev_from_byte
;
3072 int prev_from_syntax
;
3073 bool boundary_stop
= commentstop
== -1;
3076 ptrdiff_t out_bytepos
, out_charpos
;
3080 prev_from_byte
= from_byte
;
3082 DEC_BOTH (prev_from
, prev_from_byte
);
3084 /* Use this macro instead of `from++'. */
3086 do { prev_from = from; \
3087 prev_from_byte = from_byte; \
3088 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3089 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3090 INC_BOTH (from, from_byte); \
3092 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3098 if (NILP (oldstate
))
3101 state
.instring
= -1;
3102 state
.incomment
= 0;
3103 state
.comstyle
= 0; /* comment style a by default. */
3104 state
.comstr_start
= -1; /* no comment/string seen. */
3108 tem
= Fcar (oldstate
);
3114 oldstate
= Fcdr (oldstate
);
3115 oldstate
= Fcdr (oldstate
);
3116 oldstate
= Fcdr (oldstate
);
3117 tem
= Fcar (oldstate
);
3118 /* Check whether we are inside string_fence-style string: */
3119 state
.instring
= (!NILP (tem
)
3120 ? (CHARACTERP (tem
) ? XFASTINT (tem
) : ST_STRING_STYLE
)
3123 oldstate
= Fcdr (oldstate
);
3124 tem
= Fcar (oldstate
);
3125 state
.incomment
= (!NILP (tem
)
3126 ? (INTEGERP (tem
) ? XINT (tem
) : -1)
3129 oldstate
= Fcdr (oldstate
);
3130 tem
= Fcar (oldstate
);
3131 start_quoted
= !NILP (tem
);
3133 /* if the eighth element of the list is nil, we are in comment
3134 style a. If it is non-nil, we are in comment style b */
3135 oldstate
= Fcdr (oldstate
);
3136 oldstate
= Fcdr (oldstate
);
3137 tem
= Fcar (oldstate
);
3138 state
.comstyle
= (NILP (tem
)
3140 : (RANGED_INTEGERP (0, tem
, ST_COMMENT_STYLE
)
3142 : ST_COMMENT_STYLE
));
3144 oldstate
= Fcdr (oldstate
);
3145 tem
= Fcar (oldstate
);
3146 state
.comstr_start
=
3147 RANGED_INTEGERP (PTRDIFF_MIN
, tem
, PTRDIFF_MAX
) ? XINT (tem
) : -1;
3148 oldstate
= Fcdr (oldstate
);
3149 tem
= Fcar (oldstate
);
3150 while (!NILP (tem
)) /* >= second enclosing sexps. */
3152 Lisp_Object temhd
= Fcar (tem
);
3153 if (RANGED_INTEGERP (PTRDIFF_MIN
, temhd
, PTRDIFF_MAX
))
3154 curlevel
->last
= XINT (temhd
);
3155 if (++curlevel
== endlevel
)
3156 curlevel
--; /* error ("Nesting too deep for parser"); */
3157 curlevel
->prev
= -1;
3158 curlevel
->last
= -1;
3165 curlevel
->prev
= -1;
3166 curlevel
->last
= -1;
3168 SETUP_SYNTAX_TABLE (prev_from
, 1);
3169 temp
= FETCH_CHAR (prev_from_byte
);
3170 prev_from_syntax
= SYNTAX_WITH_FLAGS (temp
);
3171 UPDATE_SYNTAX_TABLE_FORWARD (from
);
3173 /* Enter the loop at a place appropriate for initial state. */
3175 if (state
.incomment
)
3176 goto startincomment
;
3177 if (state
.instring
>= 0)
3179 nofence
= state
.instring
!= ST_STRING_STYLE
;
3181 goto startquotedinstring
;
3184 else if (start_quoted
)
3191 code
= prev_from_syntax
& 0xff;
3194 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax
)
3195 && (c1
= FETCH_CHAR (from_byte
),
3196 syntax
= SYNTAX_WITH_FLAGS (c1
),
3197 SYNTAX_FLAGS_COMSTART_SECOND (syntax
)))
3198 /* Duplicate code to avoid a complex if-expression
3199 which causes trouble for the SGI compiler. */
3201 /* Record the comment style we have entered so that only
3202 the comment-end sequence of the same style actually
3203 terminates the comment section. */
3205 = SYNTAX_FLAGS_COMMENT_STYLE (syntax
, prev_from_syntax
);
3206 comnested
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
)
3207 | SYNTAX_FLAGS_COMMENT_NESTED (syntax
));
3208 state
.incomment
= comnested
? 1 : -1;
3209 state
.comstr_start
= prev_from
;
3213 else if (code
== Scomment_fence
)
3215 /* Record the comment style we have entered so that only
3216 the comment-end sequence of the same style actually
3217 terminates the comment section. */
3218 state
.comstyle
= ST_COMMENT_STYLE
;
3219 state
.incomment
= -1;
3220 state
.comstr_start
= prev_from
;
3223 else if (code
== Scomment
)
3225 state
.comstyle
= SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax
, 0);
3226 state
.incomment
= (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax
) ?
3228 state
.comstr_start
= prev_from
;
3231 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax
))
3237 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3238 curlevel
->last
= prev_from
;
3240 if (from
== end
) goto endquoted
;
3243 /* treat following character as a word constituent */
3246 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3247 curlevel
->last
= prev_from
;
3251 int symchar
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3252 switch (SYNTAX (symchar
))
3257 if (from
== end
) goto endquoted
;
3269 curlevel
->prev
= curlevel
->last
;
3272 case Scomment_fence
: /* Can't happen because it's handled above. */
3274 if (commentstop
|| boundary_stop
) goto done
;
3276 /* The (from == BEGV) test was to enter the loop in the middle so
3277 that we find a 2-char comment ender even if we start in the
3278 middle of it. We don't want to do that if we're just at the
3279 beginning of the comment (think of (*) ... (*)). */
3280 found
= forw_comment (from
, from_byte
, end
,
3281 state
.incomment
, state
.comstyle
,
3282 (from
== BEGV
|| from
< state
.comstr_start
+ 3)
3283 ? 0 : prev_from_syntax
,
3284 &out_charpos
, &out_bytepos
, &state
.incomment
);
3285 from
= out_charpos
; from_byte
= out_bytepos
;
3286 /* Beware! prev_from and friends are invalid now.
3287 Luckily, the `done' doesn't use them and the INC_FROM
3288 sets them to a sane value without looking at them. */
3289 if (!found
) goto done
;
3291 state
.incomment
= 0;
3292 state
.comstyle
= 0; /* reset the comment style */
3293 if (boundary_stop
) goto done
;
3297 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3299 /* curlevel++->last ran into compiler bug on Apollo */
3300 curlevel
->last
= prev_from
;
3301 if (++curlevel
== endlevel
)
3302 curlevel
--; /* error ("Nesting too deep for parser"); */
3303 curlevel
->prev
= -1;
3304 curlevel
->last
= -1;
3305 if (targetdepth
== depth
) goto done
;
3310 if (depth
< mindepth
)
3312 if (curlevel
!= levelstart
)
3314 curlevel
->prev
= curlevel
->last
;
3315 if (targetdepth
== depth
) goto done
;
3320 state
.comstr_start
= from
- 1;
3321 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
3322 curlevel
->last
= prev_from
;
3323 state
.instring
= (code
== Sstring
3324 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte
))
3326 if (boundary_stop
) goto done
;
3329 nofence
= state
.instring
!= ST_STRING_STYLE
;
3334 enum syntaxcode c_code
;
3336 if (from
>= end
) goto done
;
3337 c
= FETCH_CHAR_AS_MULTIBYTE (from_byte
);
3338 c_code
= SYNTAX (c
);
3340 /* Check C_CODE here so that if the char has
3341 a syntax-table property which says it is NOT
3342 a string character, it does not end the string. */
3343 if (nofence
&& c
== state
.instring
&& c_code
== Sstring
)
3349 if (!nofence
) goto string_end
;
3354 startquotedinstring
:
3355 if (from
>= end
) goto endquoted
;
3361 state
.instring
= -1;
3362 curlevel
->prev
= curlevel
->last
;
3364 if (boundary_stop
) goto done
;
3368 /* FIXME: We should do something with it. */
3371 /* Ignore whitespace, punctuation, quote, endcomment. */
3377 stop
: /* Here if stopping before start of sexp. */
3378 from
= prev_from
; /* We have just fetched the char that starts it; */
3379 from_byte
= prev_from_byte
;
3380 goto done
; /* but return the position before it. */
3385 state
.depth
= depth
;
3386 state
.mindepth
= mindepth
;
3387 state
.thislevelstart
= curlevel
->prev
;
3388 state
.prevlevelstart
3389 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
3390 state
.location
= from
;
3391 state
.location_byte
= from_byte
;
3392 state
.levelstarts
= Qnil
;
3393 while (curlevel
> levelstart
)
3394 state
.levelstarts
= Fcons (make_number ((--curlevel
)->last
),
3401 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
3402 doc
: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3403 Parsing stops at TO or when certain criteria are met;
3404 point is set to where parsing stops.
3405 If fifth arg OLDSTATE is omitted or nil,
3406 parsing assumes that FROM is the beginning of a function.
3407 Value is a list of elements describing final state of parsing:
3409 1. character address of start of innermost containing list; nil if none.
3410 2. character address of start of last complete sexp terminated.
3411 3. non-nil if inside a string.
3412 (it is the character that will terminate the string,
3413 or t if the string should be terminated by a generic string delimiter.)
3414 4. nil if outside a comment, t if inside a non-nestable comment,
3415 else an integer (the current comment nesting).
3416 5. t if following a quote character.
3417 6. the minimum paren-depth encountered during this scan.
3418 7. style of comment, if any.
3419 8. character address of start of comment or string; nil if not in one.
3420 9. Intermediate data for continuation of parsing (subject to change).
3421 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3422 in parentheses becomes equal to TARGETDEPTH.
3423 Fourth arg STOPBEFORE non-nil means stop when come to
3424 any character that starts a sexp.
3425 Fifth arg OLDSTATE is a list like what this function returns.
3426 It is used to initialize the state of the parse. Elements number 1, 2, 6
3428 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3429 If it is symbol `syntax-table', stop after the start of a comment or a
3430 string, or after end of a comment or a string. */)
3431 (Lisp_Object from
, Lisp_Object to
, Lisp_Object targetdepth
,
3432 Lisp_Object stopbefore
, Lisp_Object oldstate
, Lisp_Object commentstop
)
3434 struct lisp_parse_state state
;
3437 if (!NILP (targetdepth
))
3439 CHECK_NUMBER (targetdepth
);
3440 target
= XINT (targetdepth
);
3443 target
= TYPE_MINIMUM (EMACS_INT
); /* We won't reach this depth */
3445 validate_region (&from
, &to
);
3446 scan_sexps_forward (&state
, XINT (from
), CHAR_TO_BYTE (XINT (from
)),
3448 target
, !NILP (stopbefore
), oldstate
,
3450 ? 0 : (EQ (commentstop
, Qsyntax_table
) ? -1 : 1)));
3452 SET_PT_BOTH (state
.location
, state
.location_byte
);
3454 return Fcons (make_number (state
.depth
),
3455 Fcons (state
.prevlevelstart
< 0
3456 ? Qnil
: make_number (state
.prevlevelstart
),
3457 Fcons (state
.thislevelstart
< 0
3458 ? Qnil
: make_number (state
.thislevelstart
),
3459 Fcons (state
.instring
>= 0
3460 ? (state
.instring
== ST_STRING_STYLE
3461 ? Qt
: make_number (state
.instring
)) : Qnil
,
3462 Fcons (state
.incomment
< 0 ? Qt
:
3463 (state
.incomment
== 0 ? Qnil
:
3464 make_number (state
.incomment
)),
3465 Fcons (state
.quoted
? Qt
: Qnil
,
3466 Fcons (make_number (state
.mindepth
),
3467 Fcons ((state
.comstyle
3468 ? (state
.comstyle
== ST_COMMENT_STYLE
3470 : make_number (state
.comstyle
))
3472 Fcons (((state
.incomment
3473 || (state
.instring
>= 0))
3474 ? make_number (state
.comstr_start
)
3476 Fcons (state
.levelstarts
, Qnil
))))))))));
3480 init_syntax_once (void)
3485 /* This has to be done here, before we call Fmake_char_table. */
3486 DEFSYM (Qsyntax_table
, "syntax-table");
3488 /* Intern_C_String this now in case it isn't already done.
3489 Setting this variable twice is harmless.
3490 But don't staticpro it here--that is done in alloc.c. */
3491 Qchar_table_extra_slots
= intern_c_string ("char-table-extra-slots");
3493 /* Create objects which can be shared among syntax tables. */
3494 Vsyntax_code_object
= make_uninit_vector (Smax
);
3495 for (i
= 0; i
< Smax
; i
++)
3496 ASET (Vsyntax_code_object
, i
, Fcons (make_number (i
), Qnil
));
3498 /* Now we are ready to set up this property, so we can
3499 create syntax tables. */
3500 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
3502 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3504 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
3506 /* Control characters should not be whitespace. */
3507 temp
= AREF (Vsyntax_code_object
, Spunct
);
3508 for (i
= 0; i
<= ' ' - 1; i
++)
3509 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3510 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 0177, temp
);
3512 /* Except that a few really are whitespace. */
3513 temp
= AREF (Vsyntax_code_object
, Swhitespace
);
3514 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ' ', temp
);
3515 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\t', temp
);
3516 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\n', temp
);
3517 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 015, temp
);
3518 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, 014, temp
);
3520 temp
= AREF (Vsyntax_code_object
, Sword
);
3521 for (i
= 'a'; i
<= 'z'; i
++)
3522 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3523 for (i
= 'A'; i
<= 'Z'; i
++)
3524 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3525 for (i
= '0'; i
<= '9'; i
++)
3526 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
3528 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
3529 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
3531 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
3532 Fcons (make_number (Sopen
), make_number (')')));
3533 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
3534 Fcons (make_number (Sclose
), make_number ('(')));
3535 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
3536 Fcons (make_number (Sopen
), make_number (']')));
3537 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
3538 Fcons (make_number (Sclose
), make_number ('[')));
3539 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
3540 Fcons (make_number (Sopen
), make_number ('}')));
3541 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
3542 Fcons (make_number (Sclose
), make_number ('{')));
3543 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
3544 Fcons (make_number (Sstring
), Qnil
));
3545 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
3546 Fcons (make_number (Sescape
), Qnil
));
3548 temp
= AREF (Vsyntax_code_object
, Ssymbol
);
3549 for (i
= 0; i
< 10; i
++)
3551 c
= "_-+*/&|<>="[i
];
3552 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3555 temp
= AREF (Vsyntax_code_object
, Spunct
);
3556 for (i
= 0; i
< 12; i
++)
3558 c
= ".,;:?!#@~^'`"[i
];
3559 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, c
, temp
);
3562 /* All multibyte characters have syntax `word' by default. */
3563 temp
= AREF (Vsyntax_code_object
, Sword
);
3564 char_table_set_range (Vstandard_syntax_table
, 0x80, MAX_CHAR
, temp
);
3568 syms_of_syntax (void)
3570 DEFSYM (Qsyntax_table_p
, "syntax-table-p");
3572 staticpro (&Vsyntax_code_object
);
3574 staticpro (&gl_state
.object
);
3575 staticpro (&gl_state
.global_code
);
3576 staticpro (&gl_state
.current_syntax_table
);
3577 staticpro (&gl_state
.old_prop
);
3579 /* Defined in regex.c */
3580 staticpro (&re_match_object
);
3582 DEFSYM (Qscan_error
, "scan-error");
3583 Fput (Qscan_error
, Qerror_conditions
,
3584 listn (CONSTYPE_PURE
, 2, Qscan_error
, Qerror
));
3585 Fput (Qscan_error
, Qerror_message
,
3586 build_pure_c_string ("Scan error"));
3588 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments
,
3589 doc
: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3591 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties
,
3592 doc
: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3593 Otherwise, that text property is simply ignored.
3594 See the info node `(elisp)Syntax Properties' for a description of the
3595 `syntax-table' property. */);
3597 words_include_escapes
= 0;
3598 DEFVAR_BOOL ("words-include-escapes", words_include_escapes
,
3599 doc
: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3601 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol
,
3602 doc
: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3603 multibyte_syntax_as_symbol
= 0;
3605 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3606 open_paren_in_column_0_is_defun_start
,
3607 doc
: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3608 open_paren_in_column_0_is_defun_start
= 1;
3611 DEFVAR_LISP ("find-word-boundary-function-table",
3612 Vfind_word_boundary_function_table
,
3614 Char table of functions to search for the word boundary.
3615 Each function is called with two arguments; POS and LIMIT.
3616 POS and LIMIT are character positions in the current buffer.
3618 If POS is less than LIMIT, POS is at the first character of a word,
3619 and the return value of a function is a position after the last
3620 character of that word.
3622 If POS is not less than LIMIT, POS is at the last character of a word,
3623 and the return value of a function is a position at the first
3624 character of that word.
3626 In both cases, LIMIT bounds the search. */);
3627 Vfind_word_boundary_function_table
= Fmake_char_table (Qnil
, Qnil
);
3629 defsubr (&Ssyntax_table_p
);
3630 defsubr (&Ssyntax_table
);
3631 defsubr (&Sstandard_syntax_table
);
3632 defsubr (&Scopy_syntax_table
);
3633 defsubr (&Sset_syntax_table
);
3634 defsubr (&Schar_syntax
);
3635 defsubr (&Smatching_paren
);
3636 defsubr (&Sstring_to_syntax
);
3637 defsubr (&Smodify_syntax_entry
);
3638 defsubr (&Sinternal_describe_syntax_value
);
3640 defsubr (&Sforward_word
);
3642 defsubr (&Sskip_chars_forward
);
3643 defsubr (&Sskip_chars_backward
);
3644 defsubr (&Sskip_syntax_forward
);
3645 defsubr (&Sskip_syntax_backward
);
3647 defsubr (&Sforward_comment
);
3648 defsubr (&Sscan_lists
);
3649 defsubr (&Sscan_sexps
);
3650 defsubr (&Sbackward_prefix_chars
);
3651 defsubr (&Sparse_partial_sexp
);